Object
The Mechanize library is used for automating interaction with a website. It can follow links, and submit forms. Form fields can be populated and submitted. A history of URL's is maintained and can be queried.
require 'rubygems' require 'mechanize' require 'logger' agent = Mechanize.new { |a| a.log = Logger.new("mech.log") } agent.user_agent_alias = 'Mac Safari' page = agent.get("http://www.google.com/") search_form = page.form_with(:name => "f") search_form.field_with(:name => "q").value = "Hello" search_results = agent.submit(search_form) puts search_results.body
User Agent aliases
The version of Mechanize you are using.
Controls how this agent deals with redirects. If it is set to true or :all, all 3xx redirects are automatically followed. This is the default behavior. If it is :permanent, only 301 (Moved Permanently) redirects are followed. If it is a false value, no redirects are followed.
# File lib/mechanize.rb, line 120 def inherited(child) child.html_parser ||= html_parser child.log ||= log super end
# File lib/mechanize.rb, line 127 def initialize # attr_accessors @cookie_jar = CookieJar.new @log = nil @open_timeout = nil @read_timeout = nil @user_agent = AGENT_ALIASES['Mechanize'] @watch_for_set = nil @history_added = nil @ca_file = nil # OpenSSL server certificate file # callback for OpenSSL errors while verifying the server certificate # chain, can be used for debugging or to ignore errors by always # returning _true_ @verify_callback = nil @cert = nil # OpenSSL Certificate @key = nil # OpenSSL Private Key @pass = nil # OpenSSL Password @redirect_ok = true @gzip_enabled = true # attr_readers @history = Mechanize::History.new @pluggable_parser = PluggableParser.new # Auth variables @user = nil # Auth User @password = nil # Auth Password @digest = nil # DigestAuth Digest @auth_hash = {} # Keep track of urls for sending auth @request_headers= {} # A hash of request headers to be used @conditional_requests = true @follow_meta_refresh = false @redirection_limit = 20 # Connection Cache & Keep alive @keep_alive_time = 300 @keep_alive = true @scheme_handlers = Hash.new { |h,k| h[k] = lambda { |link, page| raise UnsupportedSchemeError.new(k) } } @scheme_handlers['http'] = lambda { |link, page| link } @scheme_handlers['https'] = @scheme_handlers['http'] @scheme_handlers['relative'] = @scheme_handlers['http'] @scheme_handlers['file'] = @scheme_handlers['http'] @pre_connect_hook = Chain::PreConnectHook.new @post_connect_hook = Chain::PostConnectHook.new set_http @html_parser = self.class.html_parser yield self if block_given? end
Sets the user and password to be used for authentication.
# File lib/mechanize.rb, line 225 def auth(user, password) @user = user @password = password end
Equivalent to the browser back button. Returns the most recent page visited.
# File lib/mechanize.rb, line 347 def back @history.pop end
If the parameter is a string, finds the button or link with the value of the string and clicks it. Otherwise, clicks the Mechanize::Page::Link object passed in. Returns the page fetched.
# File lib/mechanize.rb, line 324 def click(link) case link when String, Regexp if real_link = page.link_with(:text => link) click real_link else button = nil form = page.forms.find do |f| button = f.button_with(:value => link) button.is_a? Form::Submit end submit form, button if form end else referer = link.page rescue referer = nil href = link.respond_to?(:href) ? link.href : (link['href'] || link['src']) get(:url => href, :referer => (referer || current_page())) end end
Returns the current page loaded by Mechanize
# File lib/mechanize.rb, line 435 def current_page @history.last end
DELETE to url with query_params, and setting options:
delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
# File lib/mechanize.rb, line 292 def delete(url, query_params = {}, options = {}) page = head(url, query_params, options.merge({:verb => :delete})) add_to_history(page) page end
Fetches the URL passed in and returns a page.
# File lib/mechanize.rb, line 232 def get(options, parameters = [], referer = nil) verb = :get unless options.is_a? Hash url = options unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0 referer = parameters parameters = [] end else raise ArgumentError.new("url must be specified") unless url = options[:url] parameters = options[:params] || [] referer = options[:referer] headers = options[:headers] verb = options[:verb] || verb end unless referer if url.to_s =~ %{\Ahttps?://} referer = Page.new(nil, {'content-type'=>'text/html'}) else referer = current_page || Page.new(nil, {'content-type'=>'text/html'}) end end # FIXME: Huge hack so that using a URI as a referer works. I need to # refactor everything to pass around URIs but still support # Mechanize::Page#base unless referer.is_a?(Mechanize::File) referer = referer.is_a?(String) ? Page.new(URI.parse(referer), {'content-type' => 'text/html'}) : Page.new(referer, {'content-type' => 'text/html'}) end # fetch the page page = fetch_page( :uri => url, :referer => referer, :headers => headers || {}, :verb => verb, :params => parameters ) add_to_history(page) yield page if block_given? page end
Fetch a file and return the contents of the file.
# File lib/mechanize.rb, line 317 def get_file(url) get(url).body end
HEAD to url with query_params, and setting options:
head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
# File lib/mechanize.rb, line 303 def head(url, query_params = {}, options = {}) options = { :uri => url, :headers => {}, :params => query_params, :verb => :head }.merge(options) # fetch the page page = fetch_page(options) yield page if block_given? page end
# File lib/mechanize.rb, line 189 def log=(l); self.class.log = l end
# File lib/mechanize.rb, line 188 def max_history; @history.max_size end
# File lib/mechanize.rb, line 187 def max_history=(length); @history.max_size = length end
Posts to the given URL with the request entity. The request entity is specified by either a string, or a list of key-value pairs represented by a hash or an array of arrays.
Examples:
agent.post('http://example.com/', "foo" => "bar") agent.post('http://example.com/', [ ["foo", "bar"] ]) agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
# File lib/mechanize.rb, line 361 def post(url, query={}, headers={}) if query.is_a?(String) return request_with_entity(:post, url, query, :headers => headers) end node = {} # Create a fake form class << node def search(*args); []; end end node['method'] = 'POST' node['enctype'] = 'application/x-www-form-urlencoded' form = Form.new(node) query.each { |k,v| if v.is_a?(IO) form.enctype = 'multipart/form-data' ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path)) ul.file_data = v.read form.file_uploads << ul else form.fields << Form::Field.new({'name' => k.to_s},v) end } post_form(url, form, headers) end
# File lib/mechanize.rb, line 196 def post_connect_hooks @post_connect_hook.hooks end
# File lib/mechanize.rb, line 192 def pre_connect_hooks @pre_connect_hook.hooks end
PUT to url with entity, and setting options:
put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
# File lib/mechanize.rb, line 283 def put(url, entity, options = {}) request_with_entity(:put, url, entity, options) end
# File lib/mechanize.rb, line 409 def request_with_entity(verb, url, entity, options={}) cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'}) options = { :uri => url, :referer => cur_page, :headers => {}, }.update(options) headers = { 'Content-Type' => 'application/octet-stream', 'Content-Length' => entity.size.to_s, }.update(options[:headers]) options.update({ :verb => verb, :params => [entity], :headers => headers, }) page = fetch_page(options) add_to_history(page) page end
Sets the proxy address, port, user, and password addr should be a host, with no "http://"
# File lib/mechanize.rb, line 202 def set_proxy(addr, port, user = nil, pass = nil) proxy = URI.parse "http://#{addr}" proxy.port = port proxy.user = user if user proxy.password = pass if pass set_http proxy nil end
Submit a form with an optional button. Without a button:
page = agent.get('http://example.com') agent.submit(page.forms.first)
With a button
agent.submit(page.forms.first, page.forms.first.buttons.first)
# File lib/mechanize.rb, line 393 def submit(form, button=nil, headers={}) form.add_button_to_query(button) if button case form.method.upcase when 'POST' post_form(form.action, form, headers) when 'GET' get( :url => form.action.gsub(/\?[^\?]*$/, ''), :params => form.build_query, :headers => headers, :referer => form.page ) else raise "unsupported method: #{form.method.upcase}" end end
Runs given block, then resets the page history as it was before. self is given as a parameter to the block. Returns the value of the block.
# File lib/mechanize.rb, line 454 def transact history_backup = @history.dup begin yield self ensure @history = history_backup end end
Set the user agent for the Mechanize object. See AGENT_ALIASES
# File lib/mechanize.rb, line 215 def user_agent_alias=(al) self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias") end
Generated with the Darkfish Rdoc Generator 2.