1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """xml parsing routines
19
20 Flumotion deals with two basic kinds of XML: config and registry. They
21 correspond to data and schema, more or less. This file defines some base
22 parsing routines shared between both kinds of XML.
23 """
24
25 import os
26 from xml.dom import minidom, Node
27 from xml.parsers import expat
28
29 from flumotion.common import log, python
30
31 __version__ = "$Rev$"
32
33
35 """
36 Object designed to wrap, or "box", any value. Useful mostly in the
37 context of the table-driven XML parser, so that a handler that wants
38 to set a scalar value can do so, getting around the limitations of
39 Python's lexical scoping.
40 """
41
44
47
50
51
53 """
54 Error during parsing of XML.
55
56 args[0]: str
57 """
58
59
61 """
62 XML parser base class.
63
64 I add some helper functions for specialized XML parsers, mostly the
65 parseFromTable method.
66
67 I am here so that the config parser and the registry parser can
68 share code.
69 """
70
71 parserError = ParserError
72
74 """
75 Return the root of the XML tree for the the string or filename
76 passed as an argument. Raises fxml.ParserError if the XML could
77 not be parsed.
78
79 @param file: An open file object, or the name of a file. Note
80 that if you pass a file object, this function will leave the
81 file open.
82 @type file: File object; can be a duck file like StringIO.
83 Alternately, the path of a file on disk.
84 """
85 self.debug('Parsing XML from %r', file)
86 try:
87 return minidom.parse(file)
88 except expat.ExpatError, e:
89 raise self.parserError('Error parsing XML from %r: %s' % (
90 file, log.getExceptionMessage(e)))
91
93 """
94 Checks that a given XML node has all of the required attributes,
95 and no unknown attributes. Raises fxml.ParserError if unknown
96 or missing attributes are detected. An empty attribute (e.g.
97 'foo=""') is treated as a missing attribute.
98
99 @param node: An XML DOM node.
100 @type node: L{xml.dom.Node}
101 @param required: Set of required attributes, or None.
102 @type required: Sequence (list, tuple, ...) of strings.
103 @param optional: Set of optional attributes, or None.
104 @type optional: Sequence (list, tuple, ...) of strings.
105 """
106 attrs = python.set([k for k in node.attributes.keys()
107 if node.getAttribute(k)])
108 required = python.set(required or ())
109 optional = python.set(optional or ())
110 for x in attrs - required.union(optional):
111 raise self.parserError("Unknown attribute in <%s>: %s"
112 % (node.nodeName, x))
113 for x in required - attrs:
114 raise self.parserError("Missing attribute in <%s>: %s"
115 % (node.nodeName, x))
116
117 - def parseAttributes(self, node, required=None, optional=None,
118 type=str):
119 """
120 Checks the validity of the attributes on an XML node, via
121 Parser.checkAttributes, then parses them out and returns them
122 all as a tuple.
123
124 @param node: An XML DOM node.
125 @type node: L{xml.dom.Node}
126 @param required: Set of required attributes, or None.
127 @type required: Sequence (list, tuple, ...) of strings.
128 @param optional: Set of optional attributes, or None.
129 @type optional: Sequence (list, tuple, ...) of strings.
130 @param type: Type to which to cast attribute values. The
131 original values will always be unicode objects; in most cases
132 you want `str' objects, so this defaults to `str'.
133 @type type: Function of type object -> object.
134
135 @returns: List of all attributes as a tuple. The first element
136 of the returned tuple will be the value of the first required
137 attribute, the second the value of the second required
138 attribute, and so on. The optional attributes follow, with None
139 as the value if the optional attribute was not present.
140 @rtype: tuple of string or None, as long as the combined length
141 of the required and optional attributes.
142 """
143 self.checkAttributes(node, required, optional)
144 out = []
145 for k in (required or ()) + (optional or ()):
146 if node.hasAttribute(k):
147
148 a = node.getAttribute(k)
149 if a:
150 out.append(type(a))
151 else:
152 out.append(None)
153 else:
154 out.append(None)
155 return out
156
158 """
159 A data-driven verifying XML parser. Raises fxml.ParserError if
160 an unexpected child node is encountered.
161
162 @param parent: An XML node whose child nodes you are interested
163 in parsing.
164 @type parent: L{xml.dom.Node}
165 @param parsers: A parse table defining how to parse the child
166 nodes. The keys are the possible child nodes, and the value is a
167 two-tuple of how to parse them consisting of a parser and a
168 value handler. The parser is a one-argument function that will
169 be called with the child node as an argument, and the handler is
170 a one-argument function that will be called with the result of
171 calling the parser.
172 @type parsers: dict of string -> (function, function)
173 """
174 for child in parent.childNodes:
175 if (child.nodeType == Node.TEXT_NODE or
176 child.nodeType == Node.COMMENT_NODE):
177 continue
178 if child.nodeName == 'xi:include':
179 p = Parser()
180 path = child.attributes['href'].value
181 children = p.getRoot(resolve_relative_path(path)).childNodes[0]
182 p.parseFromTable(children, parsers)
183 continue
184 else:
185 try:
186 parser, handler = parsers[child.nodeName]
187 except KeyError:
188 raise self.parserError("unexpected node in <%s>: %s"
189 % (parent.nodeName, child))
190 handler(parser(child))
191
192 - def parseTextNode(self, node, type=str):
193 """Parse a text-containing XML node.
194
195 The node is expected to contain only text children. Recognized
196 node types are L{xml.dom.Node.TEXT_NODE} and
197 L{xml.dom.Node.CDATA_SECTION_NODE}.
198
199 @param node: the node to parse
200 @type node: L{xml.dom.Node}
201 @param type: a function to call on the resulting text
202 @type type: function of type unicode -> object
203
204 @returns: The result of calling type on the unicode text. By
205 default, type is L{str}.
206 """
207 ret = []
208 for child in node.childNodes:
209 if (child.nodeType == Node.TEXT_NODE
210 or child.nodeType == Node.CDATA_SECTION_NODE):
211 ret.append(child.data)
212 elif child.nodeType == Node.COMMENT_NODE:
213 continue
214 else:
215 raise self.parserError('unexpected non-text content of '
216 '%r: %r' % (node, child))
217 try:
218 return type(''.join(ret))
219 except Exception, e:
220 raise self.parserError('failed to parse %s as %s: %s', node,
221 type, log.getExceptionMessage(e))
222
223
225
226
227
228 root = os.path.join(os.path.dirname(__file__), '..', '..')
229 return os.path.join(root, path)
230