class Mechanize::Page

Synopsis

This class encapsulates an HTML page. If Mechanize finds a content type of 'text/html', this class will be instantiated and returned.

Example

require 'rubygems'
require 'mechanize'

agent = Mechanize.new
agent.get('http://google.com/').class  #=> Mechanize::Page

Attributes

mech[RW]

Public Class Methods

new(uri=nil, response=nil, body=nil, code=nil, mech=nil) click to toggle source
Calls superclass method Mechanize::File.new
# File lib/mechanize/page.rb, line 26
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
  @encoding = nil

  method = response.respond_to?(:each_header) ? :each_header : :each
  response.send(method) do |header,v|
    next unless v =~ /charset/
    encoding = v[/charset=([^; ]+)/, 1]
    @encoding = encoding unless encoding == 'none'
  end

  # Force the encoding to be 8BIT so we can perform regular expressions.
  # We'll set it to the detected encoding later
  body.force_encoding('ASCII-8BIT') if body && body.respond_to?(:force_encoding)

  @encoding ||= Util.detect_charset(body)

  super(uri, response, body, code)
  @mech           ||= mech

  @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/

  raise Mechanize::ContentTypeError.new(response['content-type']) unless
    response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/
  @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
end

Public Instance Methods

base_with(criteria)() click to toggle source

Find a single base tag matching criteria. Example:

page.base_with(:href => /foo/).click
# File lib/mechanize/page.rb, line 152
    
bases() click to toggle source

Return a list of all base tags

# File lib/mechanize/page.rb, line 255
def bases
  @bases ||=
    search('base').map { |node| Base.new(node, @mech, self) }
end
bases_with(criteria)() click to toggle source

Find all base tags matching criteria. Example:

page.bases_with(:href => /foo/).each do |base|
  puts base.href
end
# File lib/mechanize/page.rb, line 161
    
content_type() click to toggle source

Get the content type

# File lib/mechanize/page.rb, line 102
def content_type
  response['content-type']
end
encoding() click to toggle source
# File lib/mechanize/page.rb, line 82
def encoding
  parser.respond_to?(:encoding) ? parser.encoding : nil
end
encoding=(encoding) click to toggle source
# File lib/mechanize/page.rb, line 68
def encoding=(encoding)
  @encoding = encoding

  if @parser
    parser_encoding = @parser.encoding
    if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
      # lazy reinitialize the parser with the new encoding
      @parser = nil
    end
  end

  encoding
end
form_with(criteria)() click to toggle source

Find a single form matching criteria. Example:

page.form_with(:action => '/post/login.php') do |f|
  ...
end
# File lib/mechanize/page.rb, line 120
    
forms() click to toggle source

Return a list of all form tags

# File lib/mechanize/page.rb, line 229
def forms
  @forms ||= search('form').map do |html_form|
    form = Form.new(html_form, @mech, self)
    form.action ||= @uri.to_s
    form
  end
end
forms_with(criteria)() click to toggle source

Find all forms form matching criteria. Example:

page.forms_with(:action => '/post/login.php').each do |f|
  ...
end
# File lib/mechanize/page.rb, line 129
    
frame_with(criteria)() click to toggle source

Find a single frame tag matching criteria. Example:

page.frame_with(:src => /foo/).click
# File lib/mechanize/page.rb, line 168
    
frames() click to toggle source

Return a list of all frame tags

# File lib/mechanize/page.rb, line 262
def frames
  @frames ||=
    search('frame').map { |node| Frame.new(node, @mech, self) }
end
frames_with(criteria)() click to toggle source

Find all frame tags matching criteria. Example:

page.frames_with(:src => /foo/).each do |frame|
  p frame.src
end
# File lib/mechanize/page.rb, line 177
    
iframe_with(criteria)() click to toggle source

Find a single iframe tag matching criteria. Example:

page.iframe_with(:src => /foo/).click
# File lib/mechanize/page.rb, line 184
    
iframes() click to toggle source

Return a list of all iframe tags

# File lib/mechanize/page.rb, line 269
def iframes
  @iframes ||=
    search('iframe').map { |node| Frame.new(node, @mech, self) }
end
iframes_with(criteria)() click to toggle source

Find all iframe tags matching criteria. Example:

page.iframes_with(:src => /foo/).each do |iframe|
  p iframe.src
end
# File lib/mechanize/page.rb, line 193
    
image_urls() click to toggle source
# File lib/mechanize/page.rb, line 281
def image_urls
  @image_urls ||= images.map(&:url).uniq
end
images() click to toggle source

Return a list of all img tags

# File lib/mechanize/page.rb, line 276
def images
  @images ||=
    search('img').map { |node| Image.new(node, self) }
end
labels() click to toggle source

Return a list of all label tags

# File lib/mechanize/page.rb, line 287
def labels
  @labels ||=
    search('label').map { |node| Label.new(node, self) }
end
labels_hash() click to toggle source
# File lib/mechanize/page.rb, line 292
def labels_hash
  unless @labels_hash
    hash = {}
    labels.each do |label|
      hash[label.node['for']] = label if label.for
    end
    @labels_hash = hash
  end
  return @labels_hash
end
meta() click to toggle source

Return a list of all meta tags

# File lib/mechanize/page.rb, line 239
def meta
  @meta ||= search('head > meta').map do |node|
    next unless node['http-equiv'] && node['content']
    (equiv, content) = node['http-equiv'], node['content']
    if equiv && equiv.downcase == 'refresh'
      Meta.parse(content, uri) do |delay, href|
        node['delay'] = delay
        node['href'] = href
        Meta.new(node, @mech, self)
      end
    end
  end.compact
end
parser() click to toggle source
# File lib/mechanize/page.rb, line 86
def parser
  return @parser if @parser

  if body && response
    if mech.html_parser == Nokogiri::HTML
      @parser = mech.html_parser.parse(html_body, nil, @encoding)
    else
      @parser = mech.html_parser.parse(html_body)
    end
  end

  @parser
end
Also aliased as: root
root()
Alias for: parser
title() click to toggle source
# File lib/mechanize/page.rb, line 52
def title
  @title ||=
    if doc = parser
      title = if doc.respond_to?(:title)
                doc.title
              else
                doc.search('title').inner_text
              end
      if title && !title.empty?
        title
      else
        nil
      end
    end
end

Private Instance Methods

html_body() click to toggle source
# File lib/mechanize/page.rb, line 305
def html_body
  if body
    body.length > 0 ? body : '<html></html>'
  else
    ''
  end
end