Each MultiXml parser is expected to parse an XML document into a Hash. The conversion rules are:
Each document starts out as an empty Hash.
Reading an element created an entry in the parent Hash that has a key of the element name and a value of a Hash with attributes as key value pairs. Children are added as described by this rule.
Text and CDATE is stored in the parent element Hash with a key of '__content__' and a value of the text itself.
If a key already exists in the Hash then the value associated with the key is converted to an Array with the old and new value in it.
Other elements such as the xml prolog, doctype, and comments are ignored.
The default parser based on what you currently have loaded and installed. First checks to see if any parsers are already loaded, then checks to see which are installed if none are loaded.
# File lib/multi_xml.rb, line 76 def default_parser return :ox if defined?(::Ox) return :libxml if defined?(::LibXML) return :nokogiri if defined?(::Nokogiri) REQUIREMENT_MAP.each do |(library, parser)| begin require library return parser rescue LoadError next end end end
Parse an XML string or IO into Ruby.
Options
:symbolize_keys
If true, will use symbols instead of strings for the keys.
:disallowed_types
Types to disallow from being typecasted. Defaults to `['yaml', 'symbol']`. Use `[]` to allow all types.
# File lib/multi_xml.rb, line 117 def parse(xml, options={}) xml ||= '' xml.strip! if xml.respond_to?(:strip!) begin xml = StringIO.new(xml) unless xml.respond_to?(:read) char = xml.getc return {} if char.nil? xml.ungetc(char) hash = typecast_xml_value(undasherize_keys(parser.parse(xml)), options[:disallowed_types]) || {} rescue DisallowedTypeError raise rescue parser.parse_error => error raise ParseError, error.to_s, error.backtrace end hash = symbolize_keys(hash) if options[:symbolize_keys] hash end
Get the current parser class.
# File lib/multi_xml.rb, line 66 def parser return @@parser if defined?(@@parser) self.parser = self.default_parser @@parser end
Set the XML parser utilizing a symbol, string, or class. Supported by default are:
:libxml
:nokogiri
:ox
:rexml
# File lib/multi_xml.rb, line 98 def parser=(new_parser) case new_parser when String, Symbol require "multi_xml/parsers/#{new_parser.to_s.downcase}" @@parser = MultiXml::Parsers.const_get("#{new_parser.to_s.split('_').map{|s| s.capitalize}.join('')}") when Class, Module @@parser = new_parser else raise "Did not recognize your parser specification. Please specify either a symbol or a class." end end
# File lib/multi_xml.rb, line 164 def parse_file(file, entity) f = StringIO.new(Base64.decode64(file)) f.extend(FileLike) f.original_filename = entity['name'] f.content_type = entity['content_type'] f end
# File lib/multi_xml.rb, line 172 def symbolize_keys(hash) hash.inject({}) do |result, (key, value)| new_key = case key when String key.to_sym else key end new_value = case value when Hash symbolize_keys(value) else value end result[new_key] = new_value result end end
# File lib/multi_xml.rb, line 205 def typecast_xml_value(value, disallowed_types=nil) disallowed_types ||= DISALLOWED_XML_TYPES case value when Hash if value.include?('type') && !value['type'].is_a?(Hash) && disallowed_types.include?(value['type']) raise DisallowedTypeError, value['type'] end if value['type'] == 'array' # this commented-out suggestion helps to avoid the multiple attribute # problem, but it breaks when there is only one item in the array. # # from: https://github.com/jnunemaker/httparty/issues/102 # # _, entries = value.detect { |k, v| k != 'type' && v.is_a?(Array) } # This attempt fails to consider the order that the detect method # retrieves the entries. #_, entries = value.detect {|key, _| key != 'type'} # This approach ignores attribute entries that are not convertable # to an Array which allows attributes to be ignored. _, entries = value.detect {|k, v| k != 'type' && (v.is_a?(Array) || v.is_a?(Hash)) } if entries.nil? || (entries.is_a?(String) && entries.strip.empty?) [] else case entries when Array entries.map {|entry| typecast_xml_value(entry, disallowed_types)} when Hash [typecast_xml_value(entries, disallowed_types)] else raise "can't typecast #{entries.class.name}: #{entries.inspect}" end end elsif value.has_key?(CONTENT_ROOT) content = value[CONTENT_ROOT] if block = PARSING[value['type']] if block.arity == 1 value.delete('type') if PARSING[value['type']] if value.keys.size > 1 value[CONTENT_ROOT] = block.call(content) value else block.call(content) end else block.call(content, value) end else value.keys.size > 1 ? value : content end elsif value['type'] == 'string' && value['nil'] != 'true' '' # blank or nil parsed values are represented by nil elsif value.empty? || value['nil'] == 'true' nil # If the type is the only element which makes it then # this still makes the value nil, except if type is # a XML node(where type['value'] is a Hash) elsif value['type'] && value.size == 1 && !value['type'].is_a?(Hash) nil else xml_value = value.inject({}) do |hash, (k, v)| hash[k] = typecast_xml_value(v, disallowed_types) hash end # Turn {:files => {:file => #<StringIO>} into {:files => #<StringIO>} so it is compatible with # how multipart uploaded files from HTML appear xml_value['file'].is_a?(StringIO) ? xml_value['file'] : xml_value end when Array value.map!{|i| typecast_xml_value(i, disallowed_types)} value.length > 1 ? value : value.first when String value else raise "can't typecast #{value.class.name}: #{value.inspect}" end end
# File lib/multi_xml.rb, line 191 def undasherize_keys(params) case params when Hash params.inject({}) do |hash, (key, value)| hash[key.to_s.tr('-', '_')] = undasherize_keys(value) hash end when Array params.map{|value| undasherize_keys(value)} else params end end