pyqt4 seg故障顺序应用程序启动停止
我正在尝试使用 pyqt 阅读网页。我需要使用不同的 URL 多次调用一个方法。我目前使用的代码类似于: http ://blog.sitescraper.net/2010/06/scraping-javascript-webpages-in-python.html#comment-form
但是,当我尝试时,我遇到了段错误。欢迎任何建议。
import sys
from time import clock
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *
class Render(QWebPage):
def __init__(self):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.networkAccessManager().finished.connect(self.handleEnd)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
def loadURL(self, url):
self.mainFrame().load(QUrl(url))
self.app.exec_()
def savePageImage (self, width, height, Imagefile):
pageSize = self.mainFrame().contentsSize();
if width == 0:
pageWidth = pageSize.width()
else:
pageWidth = width
if height == 0:
pageHeight = pageSize.height()
else:
pageHeight = height
self.setViewportSize(QSize(pageWidth, pageHeight))
Img = QImage(self.viewportSize(), QImage.Format_ARGB32)
painter = QPainter(Img)
self.mainFrame().render(painter)
painter.end()
Img.save(Imagefile)
def _loadFinished(self, result):
print "load finish"
self.frame = self.mainFrame()
self.returnVal = result
self.app.quit()
def handleEnd (self, reply):
# get first http code and disconnect
# could add filter to listen relevant responses
self.httpcode = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute)
self.networkAccessManager().finished.disconnect(self.handleEnd)
jsrurl = 'http://www.w3resource.com/javascript/document-alert-confirm/four.html'
badurl='something.or.other'
badhttp = 'http://eclecticself.com/test2.html'
testurl = 'http://www.nydailynews.com/entertainment/index.html'
testurl2 = 'http://www.palmbeachpost.com/'
testurl3 = 'http://www.nydailynews.com/news/politics/2011/08/03/2011-08-03_pat_buchanan_downplays_controversy_after_calling_president_obama_your_boy_to_rev.html'
url = testurl
start = clock()
r = Render()
r.loadURL(url)
html = r.frame.toHtml()
elapsed = clock() - start
print elapsed
if (r.returnVal == True):
if (r.httpcode.toInt()[0] != 404):
#print html.toUtf8()
start = clock()
r.savePageImage(1024, 0, "pageSnapshot.png")
elapsed = clock() - start
print elapsed
else:
print 'page not found'
else:
print 'badurl'
s = Render()
s.loadURL(jsrurl)
html = s.frame.toHtml()
elapsed = clock() - start
print elapsed
if (s.returnVal == True):
if (s.httpcode.toInt()[0] != 404):
print html.toUtf8()
start = clock()
s.savePageImage(1024, 0, "pageSnapshot.png")
elapsed = clock() - start
print elapsed
else:
print 'page not found'
else:
print 'badurl'
I'm trying to read webpages using pyqt. I need to call a method multiple times with different URLs. I am currently using code similar to: http://blog.sitescraper.net/2010/06/scraping-javascript-webpages-in-python.html#comment-form
However when I try I get seg faults. Any suggestions welcome.
import sys
from time import clock
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *
class Render(QWebPage):
def __init__(self):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.networkAccessManager().finished.connect(self.handleEnd)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
def loadURL(self, url):
self.mainFrame().load(QUrl(url))
self.app.exec_()
def savePageImage (self, width, height, Imagefile):
pageSize = self.mainFrame().contentsSize();
if width == 0:
pageWidth = pageSize.width()
else:
pageWidth = width
if height == 0:
pageHeight = pageSize.height()
else:
pageHeight = height
self.setViewportSize(QSize(pageWidth, pageHeight))
Img = QImage(self.viewportSize(), QImage.Format_ARGB32)
painter = QPainter(Img)
self.mainFrame().render(painter)
painter.end()
Img.save(Imagefile)
def _loadFinished(self, result):
print "load finish"
self.frame = self.mainFrame()
self.returnVal = result
self.app.quit()
def handleEnd (self, reply):
# get first http code and disconnect
# could add filter to listen relevant responses
self.httpcode = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute)
self.networkAccessManager().finished.disconnect(self.handleEnd)
jsrurl = 'http://www.w3resource.com/javascript/document-alert-confirm/four.html'
badurl='something.or.other'
badhttp = 'http://eclecticself.com/test2.html'
testurl = 'http://www.nydailynews.com/entertainment/index.html'
testurl2 = 'http://www.palmbeachpost.com/'
testurl3 = 'http://www.nydailynews.com/news/politics/2011/08/03/2011-08-03_pat_buchanan_downplays_controversy_after_calling_president_obama_your_boy_to_rev.html'
url = testurl
start = clock()
r = Render()
r.loadURL(url)
html = r.frame.toHtml()
elapsed = clock() - start
print elapsed
if (r.returnVal == True):
if (r.httpcode.toInt()[0] != 404):
#print html.toUtf8()
start = clock()
r.savePageImage(1024, 0, "pageSnapshot.png")
elapsed = clock() - start
print elapsed
else:
print 'page not found'
else:
print 'badurl'
s = Render()
s.loadURL(jsrurl)
html = s.frame.toHtml()
elapsed = clock() - start
print elapsed
if (s.returnVal == True):
if (s.httpcode.toInt()[0] != 404):
print html.toUtf8()
start = clock()
s.savePageImage(1024, 0, "pageSnapshot.png")
elapsed = clock() - start
print elapsed
else:
print 'page not found'
else:
print 'badurl'
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
PyQt 经常忘记保留对对象的引用。解决方法:
尝试使用 PySide 而不是 PyQt,这很简单,因为 API 几乎完全是与 PyQt 相同。我会先尝试 PySide,它可能会立即解决您的问题,或者至少使其可预测和可重现。
尝试保留对您正在使用的所有 Qt 对象的引用,并在使用完这些对象后删除这些引用。您还可以尝试显式关闭它们或导航到“about:blank”,然后再转到下一个网页。
通常会有帮助。如果没有,那么您需要按照 utdemir 上面的建议缩小范围。调试通常没有帮助,因为此类问题通常也与时序相关。没有输出缓冲区的日志记录通常可以帮助您更接近问题的根源。
我与你同在,这样的问题很难追查!
PyQt is often forgetting to keep references to objects. Workarounds:
Try to use PySide instead of PyQt, it is easy, since the API is almost completely the same as PyQt. I would try PySide first, it might solve your problem immediately or at least make it predictable and reproducible.
Try to keep references to all the Qt objects you are using and remove those references when you're done with the objects. You can also try to explicitly close them or navigate to "about:blank" before going to the next Web page.
It usually helps. If not, then you need to narrow it down as utdemir suggested it above. Debugging usually not help, since such issues are often timing related as well. Logging without an output buffer usually helps you get closer to the source of the problem.
I'm with you in soul, such issues are hard to track down!