60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Command for scraping images from a URL or list of URLs.
|
|
|
|
Prerequisites:
|
|
1. The command_line package from tools/site_compare
|
|
2. Either the IE BHO or Firefox extension (or both)
|
|
|
|
Installation:
|
|
1. Build the IE BHO, or call regsvr32 on a prebuilt binary
|
|
2. Add a file called "measurepageloadtimeextension@google.com" to
|
|
the default Firefox profile directory under extensions, containing
|
|
the path to the Firefox extension root
|
|
|
|
Invoke with the command line arguments as documented within
|
|
the command line.
|
|
"""
|
|
|
|
import command_line
|
|
|
|
from drivers import windowing
|
|
from utils import browser_iterate
|
|
|
|
def CreateCommand(cmdline):
|
|
"""Inserts the command and arguments into a command line for parsing."""
|
|
cmd = cmdline.AddCommand(
|
|
["scrape"],
|
|
"Scrapes an image from a URL or series of URLs.",
|
|
None,
|
|
ExecuteScrape)
|
|
|
|
browser_iterate.SetupIterationCommandLine(cmd)
|
|
cmd.AddArgument(
|
|
["-log", "--logfile"], "File to write text output", type="string")
|
|
cmd.AddArgument(
|
|
["-out", "--outdir"], "Directory to store scrapes", type="string", required=True)
|
|
|
|
|
|
def ExecuteScrape(command):
|
|
"""Executes the Scrape command."""
|
|
|
|
def ScrapeResult(url, proc, wnd, result):
|
|
"""Capture and save the scrape."""
|
|
if log_file: log_file.write(result)
|
|
|
|
# Scrape the page
|
|
image = windowing.ScrapeWindow(wnd)
|
|
filename = windowing.URLtoFilename(url, command["--outdir"], ".bmp")
|
|
image.save(filename)
|
|
|
|
if command["--logfile"]: log_file = open(command["--logfile"], "w")
|
|
else: log_file = None
|
|
|
|
browser_iterate.Iterate(command, ScrapeResult)
|
|
|
|
# Close the log file and return. We're done.
|
|
if log_file: log_file.close()
|