#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Does scraping for all currently-known versions of Chrome""" import pywintypes import types from drivers import keyboard from drivers import mouse from drivers import windowing # TODO: this has moved, use some logic to find it. For now, # expects a subst k:. DEFAULT_PATH = r"k:\chrome.exe" def InvokeBrowser(path): """Invoke the Chrome browser. Args: path: full path to browser Returns: A tuple of (main window, process handle, address bar, render pane) """ # Reuse an existing instance of the browser if we can find one. This # may not work correctly, especially if the window is behind other windows. # TODO(jhaas): make this work with Vista wnds = windowing.FindChildWindows(0, "Chrome_XPFrame") if len(wnds): wnd = wnds[0] proc = None else: # Invoke Chrome (proc, wnd) = windowing.InvokeAndWait(path) # Get windows we'll need address_bar = windowing.FindChildWindow(wnd, "Chrome_AutocompleteEdit") render_pane = GetChromeRenderPane(wnd) return (wnd, proc, address_bar, render_pane) def Scrape(urls, outdir, size, pos, timeout, kwargs): """Invoke a browser, send it to a series of URLs, and save its output. Args: urls: list of URLs to scrape outdir: directory to place output size: size of browser window to use pos: position of browser window timeout: amount of time to wait for page to load kwargs: miscellaneous keyword args Returns: None if success, else an error string """ if "path" in kwargs and kwargs["path"]: path = kwargs["path"] else: path = DEFAULT_PATH (wnd, proc, address_bar, render_pane) = InvokeBrowser(path) # Resize and reposition the frame windowing.MoveAndSizeWindow(wnd, pos, size, render_pane) # Visit each URL we're given if type(urls) in types.StringTypes: urls = [urls] timedout = False for url in urls: # Double-click in the address bar, type the name, and press Enter mouse.ClickInWindow(address_bar) keyboard.TypeString(url, 0.1) keyboard.TypeString("\n") # Wait for the page to finish loading load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout) timedout = load_time < 0 if timedout: break # Scrape the page image = windowing.ScrapeWindow(render_pane) # Save to disk if "filename" in kwargs: if callable(kwargs["filename"]): filename = kwargs["filename"](url) else: filename = kwargs["filename"] else: filename = windowing.URLtoFilename(url, outdir, ".bmp") image.save(filename) if proc: windowing.SetForegroundWindow(wnd) # Send Alt-F4, then wait for process to end keyboard.TypeString(r"{\4}", use_modifiers=True) if not windowing.WaitForProcessExit(proc, timeout): windowing.EndProcess(proc) return "crashed" if timedout: return "timeout" return None def Time(urls, size, timeout, kwargs): """Measure how long it takes to load each of a series of URLs Args: urls: list of URLs to time size: size of browser window to use timeout: amount of time to wait for page to load kwargs: miscellaneous keyword args Returns: A list of tuples (url, time). "time" can be "crashed" or "timeout" """ if "path" in kwargs and kwargs["path"]: path = kwargs["path"] else: path = DEFAULT_PATH proc = None # Visit each URL we're given if type(urls) in types.StringTypes: urls = [urls] ret = [] for url in urls: try: # Invoke the browser if necessary if not proc: (wnd, proc, address_bar, render_pane) = InvokeBrowser(path) # Resize and reposition the frame windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane) # Double-click in the address bar, type the name, and press Enter mouse.ClickInWindow(address_bar) keyboard.TypeString(url, 0.1) keyboard.TypeString("\n") # Wait for the page to finish loading load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout) timedout = load_time < 0 if timedout: load_time = "timeout" # Send an alt-F4 to make the browser close; if this times out, # we've probably got a crash windowing.SetForegroundWindow(wnd) keyboard.TypeString(r"{\4}", use_modifiers=True) if not windowing.WaitForProcessExit(proc, timeout): windowing.EndProcess(proc) load_time = "crashed" proc = None except pywintypes.error: proc = None load_time = "crashed" ret.append( (url, load_time) ) if proc: windowing.SetForegroundWindow(wnd) keyboard.TypeString(r"{\4}", use_modifiers=True) if not windowing.WaitForProcessExit(proc, timeout): windowing.EndProcess(proc) return ret def main(): # We're being invoked rather than imported, so run some tests path = r"c:\sitecompare\scrapes\chrome\0.1.97.0" windowing.PreparePath(path) # Scrape three sites and save the results Scrape([ "http://www.microsoft.com", "http://www.google.com", "http://www.sun.com"], path, (1024, 768), (0, 0)) return 0 if __name__ == "__main__": sys.exit(main())