#!/usr/bin/env python3
from __future__ import print_function
import sys
import mechanize
if len(sys.argv) == 1:
uri = "http://wwwsearch.sourceforge.net/"
else:
uri = sys.argv[1]
br = mechanize.Browser()
br.open(mechanize.urljoin(uri, "mechanize/example.html"))
forms = list(br.forms())
# f = open("example.html")
# forms = mechanize.ParseFile(f, "http://example.com/example.html",
# backwards_compat=False)
# f.close()
form = forms[0]
print(form) # very useful!
# A 'control' is a graphical HTML form widget: a text entry box, a
# dropdown 'select' list, a checkbox, etc.
# Indexing allows setting and retrieval of control values
original_text = form["comments"] # a string, NOT a Control instance
form["comments"] = "Blah."
# Controls that represent lists (checkbox, select and radio lists) are
# ListControl instances. Their values are sequences of list item names.
# They come in two flavours: single- and multiple-selection:
form["favorite_cheese"] = ["brie"] # single
form["cheeses"] = ["parmesan", "leicester", "cheddar"] # multi
# equivalent, but more flexible:
form.set_value(["parmesan", "leicester", "cheddar"], name="cheeses")
# Add files to FILE controls with .add_file(). Only call this multiple
# times if the server is expecting multiple files.
# add a file, default value for MIME type, no filename sent to server
form.add_file(open("data.dat", 'rb'))
# add a second file, explicitly giving MIME type, and telling the server
# what the filename is
form.add_file(open("data.txt", 'rb'), "text/plain", "data.txt")
# All Controls may be disabled (equivalent of greyed-out in browser)...
control = form.find_control("comments")
print(control.disabled)
# ...or readonly
print(control.readonly)
# readonly and disabled attributes can be assigned to
control.disabled = False
# convenience method, used here to make all controls writable (unless
# they're disabled):
form.set_all_readonly(False)
# A couple of notes about list controls and HTML:
# 1. List controls correspond to either a single SELECT element, or
# multiple INPUT elements. Items correspond to either OPTION or INPUT
# elements. For example, this is a SELECT control, named "control1":
#
# and this is a CHECKBOX control, named "control2":
#
#
# You know the latter is a single control because all the name attributes
# are the same.
# 2. Item names are the strings that go to make up the value that should
# be returned to the server. These strings come from various different
# pieces of text in the HTML. The HTML standard and the mechanize
# docstrings explain in detail, but playing around with an HTML file,
# ParseFile() and 'print form' is very useful to understand this!
# You can get the Control instances from inside the form...
control = form.find_control("cheeses", type="select")
print(control.name, control.value, control.type)
control.value = ["mascarpone", "curd"]
# ...and the Item instances from inside the Control
item = control.get("curd")
print(item.name, item.selected, item.id, item.attrs)
item.selected = False
# Controls may be referred to by label:
# find control with label that has a *substring* "Cheeses"
# (e.g., a label "Please select a cheese" would match).
control = form.find_control(label="select a cheese")
# You can explicitly say that you're referring to a ListControl:
# set value of "cheeses" ListControl
form.set_value(["gouda"], name="cheeses", kind="list")
# equivalent:
form.find_control(name="cheeses", kind="list").value = ["gouda"]
# the first example is also almost equivalent to the following (but
# insists that the control be a ListControl -- so it will skip any
# non-list controls that come before the control we want)
form["cheeses"] = ["gouda"]
# The kind argument can also take values "multilist", "singlelist", "text",
# "clickable" and "file":
# find first control that will accept text, and scribble in it
form.set_value("rhubarb rhubarb", kind="text", nr=0)
# find, and set the value of, the first single-selection list control
form.set_value(["spam"], kind="singlelist", nr=0)
# You can find controls with a general predicate function:
def control_has_caerphilly(control):
for item in control.items:
if item.name == "caerphilly":
return True
form.find_control(kind="list", predicate=control_has_caerphilly)
# HTMLForm.controls is a list of all controls in the form
for control in form.controls:
if control.value == "inquisition":
sys.exit()
# Control.items is a list of all Item instances in the control
for item in form.find_control("cheeses").items:
print(item.name)
# To remove items from a list control, remove it from .items:
cheeses = form.find_control("cheeses")
curd = cheeses.get("curd")
del cheeses.items[cheeses.items.index(curd)]
# To add items to a list container, instantiate an Item with its control
# and attributes:
# Note that you are responsible for getting the attributes correct here,
# and these are not quite identical to the original HTML, due to
# defaulting rules and a few special attributes (e.g. Items that represent
# OPTIONs have a special "contents" key in their .attrs dict). In future
# there will be an explicitly supported way of using the parsing logic to
# add items and controls from HTML strings without knowing these details.
mechanize.Item(cheeses, {"contents": "mascarpone",
"value": "mascarpone"})
# You can specify list items by label using set/get_value_by_label() and
# the label argument of the .get() method. Sometimes labels are easier to
# maintain than names, sometimes the other way around.
form.set_value_by_label(["Mozzarella", "Caerphilly"], "cheeses")
# Which items are present, selected, and successful?
# is the "parmesan" item of the "cheeses" control successful (selected
# and not disabled)?
print("parmesan" in form["cheeses"])
# is the "parmesan" item of the "cheeses" control selected?
print("parmesan" in [
item.name for item in form.find_control("cheeses").items if item.selected])
# does cheeses control have a "caerphilly" item?
print("caerphilly" in [
item.name for item in form.find_control("cheeses").items])
# Sometimes one wants to set or clear individual items in a list, rather
# than setting the whole .value:
# select the item named "gorgonzola" in the first control named "cheeses"
form.find_control("cheeses").get("gorgonzola").selected = True
# You can be more specific:
# deselect "edam" in third CHECKBOX control
form.find_control(type="checkbox", nr=2).get("edam").selected = False
# deselect item labelled "Mozzarella" in control with id "chz"
form.find_control(id="chz").get(label="Mozzarella").selected = False
# Often, a single checkbox (a CHECKBOX control with a single item) is
# present. In that case, the name of the single item isn't of much
# interest, so it's a good idea to check and uncheck the box without
# using the item name:
form.find_control("smelly").items[0].selected = True # check
form.find_control("smelly").items[0].selected = False # uncheck
# Items may be disabled (selecting or de-selecting a disabled item is
# not allowed):
control = form.find_control("cheeses")
print(control.get("emmenthal").disabled)
control.get("emmenthal").disabled = True
# enable all items in control
control.set_all_items_disabled(False)
request2 = form.click() # mechanize.Request object
try:
response2 = mechanize.urlopen(request2)
except mechanize.HTTPError as response2:
pass
print(response2.geturl())
# headers
for name, value in list(response2.info().items()):
if name != "date":
print("%s: %s" % (name.title(), value))
print(response2.read()) # body
response2.close()