PDA

View Full Version : Webpage snapshot program is inconsistent



adamkauk
10th November 2009, 20:04
Hi,

I'm trying to debug a webpage snapshot program. It is supposed to make a bmp picture of whatever webpage it is given the URL of. It works for most sites, but for some reason, for yahoo.com, it only gets a gray picture. We figured out that it's because the webframe doesn't actually get the html for the yahoo.com. It only gets the javascript at the beginning of the webpage. We found this out through the toHtml function of the webframe.

We're pretty sure it's not a useragent problem, because we used wget with our useragent and got the whole source of yahoo.com.

The program is written in PyQt. The code is following, and here is how you run it:

export DISPLAY=:0
python sitesnap.py -s 1 -v 1024x768 -o v.bmp http://yahoo.com

--------------------------------------------------------------------------

#!/usr/bin/python

from PyQt4 import QtCore, QtGui, QtWebKit
from optparse import OptionParser

class Thumbnailer(QtCore.QObject):
def __init__(self, url, filename, viewport = QtCore.QSize(800, 600), scale = .25):
QtCore.QObject.__init__(self)
self.viewport = viewport
self.scale = scale
self.filename = filename
self.page = QtWebKit.QWebPage()

self.connect(self.page, QtCore.SIGNAL("loadFinished(bool)"), self.render)
self.page.mainFrame().load(QtCore.QUrl(url))

def render(self, ok):
page = self.page
page.setViewportSize(self.viewport)
scrl = page.mainFrame().scrollBarMaximum(QtCore.Qt.Vertic al)
if scrl > 0:
self.viewport.setHeight(self.viewport.height() + scrl + 10)
page.setViewportSize(self.viewport)
image = QtGui.QImage(page.viewportSize(), QtGui.QImage.Format_ARGB32)

painter = QtGui.QPainter(image)
page.mainFrame().render(painter)
painter.end()
thumbnail = QtGui.QImage(image.scaled(self.viewport * self.scale, QtCore.Qt.KeepAspectRatio, QtCore.Qt.SmoothTransformation))
thumbnail.save(self.filename);
QtCore.QCoreApplication.quit()

if __name__ == "__main__":
parser = OptionParser(usage = "usage: %prog [options] url")
parser.add_option("-o", "--outfile", dest = "filename", help = "write image to filename")
parser.add_option("-v", "--viewport", dest = "viewport", help = "size of viewport", metavar = "WxH", default = "800x600")
parser.add_option("-s", "--scale", dest = "scale", help = "viewport is scaled by factor scale for final image", default = ".25")
(options, args) = parser.parse_args()
app = QtGui.QApplication([])
tmp = map(int, options.viewport.split("x"))
viewport = QtCore.QSize(*tmp)
thumb = Thumbnailer(args[0], viewport = viewport, scale = float(options.scale), filename = options.filename)
app.exec_()