File: Synopsis/Parsers/Python/SXRGenerator.py
  1#
  2# Copyright (C) 2008 Stefan Seefeld
  3# All rights reserved.
  4# Licensed to the public under the terms of the GNU LGPL (>= 2),
  5# see the file COPYING for details.
  6#
  7
  8import parser
  9import token
 10import tokenize
 11import symbol
 12import keyword
 13
 14HAVE_ENCODING_DECL = hasattr(symbol, "encoding_decl") # python 2.3
 15HAVE_IMPORT_NAME = hasattr(symbol, "import_name") # python 2.4
 16HAVE_DECORATOR = hasattr(symbol,"decorator") # python 2.4
 17
 18def num_tokens(ptree):
 19    """Count the number of leaf tokens in the given ptree."""
 20
 21    if type(ptree) == str: return 1
 22    else: return sum([num_tokens(n) for n in ptree[1:]])
 23
 24
 25class LexerDebugger:
 26
 27    def __init__(self, lexer):
 28
 29        self.lexer = lexer
 30
 31    def next(self):
 32
 33        n = self.lexer.next()
 34        print 'next is "%s" (%s)'%(n[1], n[0])
 35        return n
 36
 37header="""<sxr filename="%(filename)s">
 38<line>"""
 39
 40trailer="""</line>
 41</sxr>
 42"""
 43
 44def escape(text):
 45
 46    for p in [('&', '&amp;'), ('"', '&quot;'), ('<', '&lt;'), ('>', '&gt;'),]:
 47        text = text.replace(*p)
 48    return text
 49
 50
 51class SXRGenerator:
 52    """"""
 53
 54    def __init__(self):
 55        """"""
 56
 57        self.handlers = {}
 58        self.handlers[token.ENDMARKER] = self.handle_end_marker
 59        self.handlers[token.NEWLINE] = self.handle_newline
 60        self.handlers[token.INDENT] = self.handle_indent
 61        self.handlers[token.DEDENT] = self.handle_dedent
 62        self.handlers[token.STRING] = self.handle_string
 63        self.handlers[symbol.funcdef]= self.handle_function
 64        self.handlers[symbol.parameters] = self.handle_parameters
 65        self.handlers[symbol.classdef] = self.handle_class
 66        self.handlers[token.NAME] = self.handle_name
 67        self.handlers[symbol.expr_stmt] = self.handle_expr_stmt
 68        #self.handlers[token.OP] = self.handle_op
 69        self.handlers[symbol.power] = self.handle_power
 70        if HAVE_ENCODING_DECL:
 71            self.handlers[symbol.encoding_decl] = self.handle_encoding_decl
 72        if HAVE_IMPORT_NAME:
 73            self.handlers[symbol.import_as_names] = self.handle_import_as_names
 74            self.handlers[symbol.dotted_as_names] = self.handle_dotted_as_names
 75            self.handlers[symbol.import_from] = self.handle_import_from
 76            self.handlers[symbol.import_name] = self.handle_import_name
 77        else:
 78            self.handlers[symbol.import_stmt] = self.handle_import
 79        if HAVE_DECORATOR:
 80            self.handlers[symbol.decorator] = self.handle_decorator
 81
 82        self.col = 0
 83        self.lineno = 1
 84        self.parameters = []
 85        self.scopes = []
 86
 87    def process_file(self, scope, sourcefile, sxr):
 88
 89        self.scopes = list(scope)
 90        input = open(sourcefile.abs_name, 'r+')
 91        src = input.readlines()
 92        self.lines = len(`len(src) + 1`)
 93        ptree = parser.ast2tuple(parser.suite(''.join(src)))
 94        input.seek(0)
 95        self.lexer = tokenize.generate_tokens(input.readline)
 96        #self.lexer = LexerDebugger(tokenize.generate_tokens(input.readline))
 97        self.sxr = open(sxr, 'w+')
 98        lineno_template = '%%%ds' % self.lines
 99        lineno = lineno_template % self.lineno
100        self.sxr.write(header % {'filename': sourcefile.name})
101        try:
102            self.handle(ptree)
103        except StopIteration:
104            raise
105        self.sxr.write(trailer)
106        self.sxr.close()
107        self.scopes.pop()
108
109    def handle(self, ptree):
110
111        if type(ptree) == tuple:
112            kind = ptree[0]
113            value = ptree[1:]
114            handler = self.handlers.get(kind, self.default_handler)
115            handler(value)
116        else:
117            raise Exception("Process error: Type is not a tuple %s" % str(ptree))
118
119
120    def default_handler(self, ptree):
121
122        for node in ptree:
123            if type(node) == tuple: self.handle(node)
124            elif type(node) == str: self.handle_token(node)
125            else: raise Exception("Invalid ptree node")
126
127
128    def next_token(self):
129        """Return the next visible token.
130        Process tokens that are not part of the parse tree silently."""
131
132        t = self.lexer.next()
133        while t[0] in [tokenize.NL, tokenize.COMMENT]:
134            if t[0] is tokenize.NL:
135                self.print_newline()
136            elif t[0] is tokenize.COMMENT:
137                self.print_token(t)
138                if t[1][-1] == '\n': self.print_newline()
139            t = self.lexer.next()
140        return t
141
142
143    def handle_token(self, item = None):
144
145        t = self.next_token()
146        if item is not None and t[1] != item:
147            raise 'Internal error in line %d: expected "%s", got "%s" (%d)'%(self.lineno, item, t[1], t[0])
148        else:
149            self.print_token(t)
150
151
152    def handle_name_as_xref(self, xref, name, from_ = None, type = None):
153
154        kind, value, (srow, scol), (erow, ecol), line = self.next_token()
155        if (kind, value) != (token.NAME, name):
156            raise 'Internal error in line %d: expected name "%s", got "%s" (%d)'%(name, self.lineno, item, t[1], t[0])
157
158        if self.col != scol:
159            self.sxr.write(' ' * (scol - self.col))
160        attrs = []
161        if from_: attrs.append('from="%s"'%from_)
162        if type: attrs.append('type="%s"'%type)
163        a = '<a href="%s" %s>%s</a>'%('.'.join(xref), ' '.join(attrs), value)
164        self.sxr.write(a)
165        self.col = ecol
166
167
168    def handle_tokens(self, ptree):
169
170        tokens = num_tokens(ptree)
171        for i in xrange(tokens):
172            self.handle_token()
173
174
175    def handle_end_marker(self, nodes): pass
176    def handle_newline(self, nodes):
177
178        self.handle_token()
179
180
181    def handle_indent(self, indent):
182
183        self.handle_token()
184
185
186    def handle_dedent(self, dedent):
187
188        self.handle_token()
189
190
191    def handle_string(self, content):
192
193        self.handle_token()
194
195
196    def handle_function(self, nodes):
197
198        if HAVE_DECORATOR:
199            if nodes[0][0] == symbol.decorators:
200                offset = 1
201                # FIXME
202                self.handle(nodes[0])
203            else:
204                offset = 0
205        else:
206            offset = 0
207
208        def_token = nodes[0 + offset]
209        self.handle_token(def_token[1])
210        name = nodes[1 + offset][1]
211        qname = tuple(self.scopes + [name])
212        self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
213        # Handle the parameters.
214        self.handle(nodes[2 + offset])
215
216        colon_token = nodes[3 + offset]
217        self.handle_token(colon_token[1])
218        body = nodes[4 + offset]
219        # Theoretically, we'd have to push the function scope here.
220        # Practically, however, we don't inject xrefs (yet) into function bodies.
221        self.handle_tokens(body)
222
223        # Don't traverse the function body, since the ASG doesn't handle
224        # local declarations anyways.
225
226
227    def handle_parameters(self, nodes):
228
229        self.handle_token(nodes[0][1])
230        if nodes[1][0] == symbol.varargslist:
231            args = list(nodes[1][1:])
232            while args:
233                if args[0][0] == token.COMMA:
234                    self.handle_token(args[0][1])
235                    pass
236                elif args[0][0] == symbol.fpdef:
237                    self.handle_tokens(args[0])
238                elif args[0][0] == token.EQUAL:
239                    self.handle_token(args[0][1])
240                    del args[0]
241                    self.handle_tokens(args[0])
242                elif args[0][0] == token.DOUBLESTAR:
243                    self.handle_token(args[0][1])
244                    del args[0]
245                    self.handle_token(args[0][1])
246                elif args[0][0] == token.STAR:
247                    self.handle_token(args[0][1])
248                    del args[0]
249                    self.handle_token(args[0][1])
250                else:
251                    print "Unknown symbol:",args[0]
252                del args[0]
253        self.handle_token(nodes[-1][1])
254
255
256    def handle_class(self, nodes):
257
258        class_token = nodes[0]
259        self.handle_token(class_token[1])
260        name = nodes[1][1]
261        qname = tuple(self.scopes + [name])
262        self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
263        base_clause = nodes[2][0] == token.LPAR and nodes[3] or None
264        self.handle_tokens(nodes[2])
265        bases = []
266        if base_clause:
267            self.handle_tokens(base_clause)
268            self.handle_token(')')
269            self.handle_token(':')
270
271            body = nodes[6]
272        else:
273            body = nodes[3]
274        self.scopes.append(name)
275        self.handle(body)
276        self.scopes.pop()
277
278
279    def handle_name(self, content):
280
281        self.handle_token(content[0])
282
283
284    def handle_expr_stmt(self, nodes):
285
286        for n in nodes: self.handle_tokens(n)
287
288
289    def handle_dotted_name(self, dname, rest):
290
291        self.handle_token(dname[0])
292        for name in dname[1:]:
293            self.handle_token('.')
294            self.handle_token(name)
295        map(self.handle, rest)
296
297
298    def handle_op(self, nodes): pass
299
300
301    def handle_power(self, content):
302
303        def get_dotted_name(content):
304            if content[0][0] != symbol.atom or content[0][1][0] != token.NAME:
305                return None
306            dotted_name = [content[0][1][1]]
307            i = 1
308            for param in content[1:]:
309                if param[0] != symbol.trailer: break
310                if param[1][0] != token.DOT: break
311                if param[2][0] != token.NAME: break
312                dotted_name.append(param[2][1])
313                i += 1
314            if i < len(content): return dotted_name, content[i:]
315            else: return dotted_name, []
316
317        name = get_dotted_name(content)
318        if name: self.handle_dotted_name(*name)
319        else: map(self.handle, content)
320
321
322    def handle_encoding_decl(self, nodes):
323
324        # For some reason the encoding is the last tuple item
325        for n in nodes[:-1]: self.handle(n)
326
327    def handle_import_as_names(self, nodes):
328
329        for n in nodes: self.handle(n)
330
331
332    def handle_dotted_as_names(self, nodes):
333
334        for n in nodes: self.handle(n)
335
336
337    def handle_import_from(self, nodes):
338
339        self.handle_token('from')
340        self.handle(nodes[1])
341        self.handle_token('import')
342        self.handle(nodes[3])
343
344
345    def handle_import_name(self, nodes):
346
347        self.handle_token('import')
348        self.handle_dotted_as_names(nodes[1][1:])
349
350
351    def handle_import(self, nodes):
352
353        #self.handle_token('import')
354        for n in nodes: self.handle(n)
355
356
357    def handle_decorator(self, nodes): pass
358
359
360    def print_token(self, t):
361
362        kind, value, (srow, scol), (erow, ecol), line = t
363        if kind == token.NEWLINE:
364            self.print_newline()
365        else:
366            if self.col != scol:
367                self.sxr.write(' ' * (scol - self.col))
368            if keyword.iskeyword(value):
369                format = '<span class="py-keyword">%s</span>'
370            elif kind == token.STRING:
371                format = '<span class="py-string">%s</span>'
372                chunks = value.split('\n')
373                for c in chunks[:-1]:
374                    self.sxr.write(format % escape(c))
375                    self.print_newline()
376                value = chunks[-1]
377
378            elif kind == tokenize.COMMENT:
379                format = '<span class="py-comment">%s</span>'
380                if value[-1] == '\n': value = value[:-1]
381            else:
382                format = '%s'
383
384            self.sxr.write(format % escape(value))
385            self.col = ecol
386
387
388    def print_newline(self):
389
390        self.col = 0
391        self.lineno += 1
392        self.sxr.write('</line>\n')
393        self.sxr.write('<line>')
394
395
396