pyqt4 seg故障顺序应用程序启动停止

发布于 2024-11-28 00:35:44 字数 3113 浏览 1 评论 0原文

我正在尝试使用 pyqt 阅读网页。我需要使用不同的 URL 多次调用一个方法。我目前使用的代码类似于: http ://blog.sitescraper.net/2010/06/scraping-javascript-webpages-in-python.html#comment-form

但是,当我尝试时,我遇到了段错误。欢迎任何建议。

import sys

from time import clock
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *

class Render(QWebPage):
  def __init__(self):
    self.app = QApplication(sys.argv)
    QWebPage.__init__(self)

    self.networkAccessManager().finished.connect(self.handleEnd)
    self.loadFinished.connect(self._loadFinished)

    self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
    self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)

  def loadURL(self, url):
    self.mainFrame().load(QUrl(url))
    self.app.exec_()

  def savePageImage (self, width, height, Imagefile):
    pageSize = self.mainFrame().contentsSize();
    if width == 0:
        pageWidth = pageSize.width()
    else:
        pageWidth = width
    if height == 0:
        pageHeight = pageSize.height()
    else:
        pageHeight = height

    self.setViewportSize(QSize(pageWidth, pageHeight))
    Img = QImage(self.viewportSize(), QImage.Format_ARGB32)
    painter = QPainter(Img)
    self.mainFrame().render(painter)
    painter.end()
    Img.save(Imagefile)


  def _loadFinished(self, result):
    print "load finish"
    self.frame = self.mainFrame()
    self.returnVal = result 
    self.app.quit()

  def handleEnd (self, reply):
    # get first http code and disconnect
    # could add filter to listen relevant responses
    self.httpcode = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute)
    self.networkAccessManager().finished.disconnect(self.handleEnd)


jsrurl = 'http://www.w3resource.com/javascript/document-alert-confirm/four.html'
badurl='something.or.other'
badhttp = 'http://eclecticself.com/test2.html'
testurl = 'http://www.nydailynews.com/entertainment/index.html'
testurl2 = 'http://www.palmbeachpost.com/'
testurl3 = 'http://www.nydailynews.com/news/politics/2011/08/03/2011-08-03_pat_buchanan_downplays_controversy_after_calling_president_obama_your_boy_to_rev.html'
url = testurl



start = clock()
r = Render()
r.loadURL(url)
html = r.frame.toHtml()
elapsed = clock() - start
print elapsed

if (r.returnVal == True):
    if (r.httpcode.toInt()[0] != 404):
        #print html.toUtf8()
        start = clock()
        r.savePageImage(1024, 0, "pageSnapshot.png")
        elapsed = clock() - start
        print elapsed
    else:
        print 'page not found'
else:
    print 'badurl'

s = Render()
s.loadURL(jsrurl)
html = s.frame.toHtml()
elapsed = clock() - start
print elapsed
if (s.returnVal == True):
    if (s.httpcode.toInt()[0] != 404):
        print html.toUtf8()
        start = clock()
        s.savePageImage(1024, 0, "pageSnapshot.png")
        elapsed = clock() - start
        print elapsed
    else:
        print 'page not found'
else:
    print 'badurl'

I'm trying to read webpages using pyqt. I need to call a method multiple times with different URLs. I am currently using code similar to: http://blog.sitescraper.net/2010/06/scraping-javascript-webpages-in-python.html#comment-form

However when I try I get seg faults. Any suggestions welcome.

import sys

from time import clock
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *

class Render(QWebPage):
  def __init__(self):
    self.app = QApplication(sys.argv)
    QWebPage.__init__(self)

    self.networkAccessManager().finished.connect(self.handleEnd)
    self.loadFinished.connect(self._loadFinished)

    self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
    self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)

  def loadURL(self, url):
    self.mainFrame().load(QUrl(url))
    self.app.exec_()

  def savePageImage (self, width, height, Imagefile):
    pageSize = self.mainFrame().contentsSize();
    if width == 0:
        pageWidth = pageSize.width()
    else:
        pageWidth = width
    if height == 0:
        pageHeight = pageSize.height()
    else:
        pageHeight = height

    self.setViewportSize(QSize(pageWidth, pageHeight))
    Img = QImage(self.viewportSize(), QImage.Format_ARGB32)
    painter = QPainter(Img)
    self.mainFrame().render(painter)
    painter.end()
    Img.save(Imagefile)


  def _loadFinished(self, result):
    print "load finish"
    self.frame = self.mainFrame()
    self.returnVal = result 
    self.app.quit()

  def handleEnd (self, reply):
    # get first http code and disconnect
    # could add filter to listen relevant responses
    self.httpcode = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute)
    self.networkAccessManager().finished.disconnect(self.handleEnd)


jsrurl = 'http://www.w3resource.com/javascript/document-alert-confirm/four.html'
badurl='something.or.other'
badhttp = 'http://eclecticself.com/test2.html'
testurl = 'http://www.nydailynews.com/entertainment/index.html'
testurl2 = 'http://www.palmbeachpost.com/'
testurl3 = 'http://www.nydailynews.com/news/politics/2011/08/03/2011-08-03_pat_buchanan_downplays_controversy_after_calling_president_obama_your_boy_to_rev.html'
url = testurl



start = clock()
r = Render()
r.loadURL(url)
html = r.frame.toHtml()
elapsed = clock() - start
print elapsed

if (r.returnVal == True):
    if (r.httpcode.toInt()[0] != 404):
        #print html.toUtf8()
        start = clock()
        r.savePageImage(1024, 0, "pageSnapshot.png")
        elapsed = clock() - start
        print elapsed
    else:
        print 'page not found'
else:
    print 'badurl'

s = Render()
s.loadURL(jsrurl)
html = s.frame.toHtml()
elapsed = clock() - start
print elapsed
if (s.returnVal == True):
    if (s.httpcode.toInt()[0] != 404):
        print html.toUtf8()
        start = clock()
        s.savePageImage(1024, 0, "pageSnapshot.png")
        elapsed = clock() - start
        print elapsed
    else:
        print 'page not found'
else:
    print 'badurl'

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

浅唱々樱花落 2024-12-05 00:35:44

PyQt 经常忘记保留对对象的引用。解决方法:

  • 尝试使用 PySide 而不是 PyQt,这很简单,因为 API 几乎完全是与 PyQt 相同。我会先尝试 PySide,它可能会立即解决您的问题,或者至少使其可预测和可重现。

  • 尝试保留对您正在使用的所有 Qt 对象的引用,并在使用完这些对象后删除这些引用。您还可以尝试显式关闭它们或导航到“about:blank”,然后再转到下一个网页。

通常会有帮助。如果没有,那么您需要按照 utdemir 上面的建议缩小范围。调试通常没有帮助,因为此类问题通常也与时序相关。没有输出缓冲区的日志记录通常可以帮助您更接近问题的根源。

我与你同在,这样的问题很难追查!

PyQt is often forgetting to keep references to objects. Workarounds:

  • Try to use PySide instead of PyQt, it is easy, since the API is almost completely the same as PyQt. I would try PySide first, it might solve your problem immediately or at least make it predictable and reproducible.

  • Try to keep references to all the Qt objects you are using and remove those references when you're done with the objects. You can also try to explicitly close them or navigate to "about:blank" before going to the next Web page.

It usually helps. If not, then you need to narrow it down as utdemir suggested it above. Debugging usually not help, since such issues are often timing related as well. Logging without an output buffer usually helps you get closer to the source of the problem.

I'm with you in soul, such issues are hard to track down!

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文