File: Synopsis/Parsers/Python/ASGTranslator.py
  1#
  2# Copyright (C) 2008 Stefan Seefeld
  3# All rights reserved.
  4# Licensed to the public under the terms of the GNU LGPL (>= 2),
  5# see the file COPYING for details.
  6#
  7
  8from Synopsis import ASG
  9from Synopsis.QualifiedName import QualifiedPythonName as QName
 10from Synopsis.SourceFile import *
 11from Synopsis.DocString import DocString
 12import sys, os.path
 13import compiler, tokenize, token
 14from compiler.consts import OP_ASSIGN
 15from compiler.visitor import ASTVisitor
 16
 17class TokenParser:
 18
 19    def __init__(self, text):
 20        self.text = text + '\n\n'
 21        self.lines = self.text.splitlines(1)
 22        self.generator = tokenize.generate_tokens(iter(self.lines).next)
 23        self.next()
 24
 25    def __iter__(self):
 26        return self
 27
 28    def next(self):
 29        self.token = self.generator.next()
 30        self.type, self.string, self.start, self.end, self.line = self.token
 31        return self.token
 32
 33    def goto_line(self, lineno):
 34        while self.start[0] < lineno:
 35            self.next()
 36        return token
 37
 38    def rhs(self, lineno):
 39        """
 40        Return a whitespace-normalized expression string from the right-hand
 41        side of an assignment at line `lineno`.
 42        """
 43        self.goto_line(lineno)
 44        while self.string != '=':
 45            self.next()
 46        self.stack = None
 47        while self.type != token.NEWLINE and self.string != ';':
 48            if self.string == '=' and not self.stack:
 49                self.tokens = []
 50                self.stack = []
 51                self._type = None
 52                self._string = None
 53                self._backquote = 0
 54            else:
 55                self.note_token()
 56            self.next()
 57        self.next()
 58        text = ''.join(self.tokens)
 59        return text.strip()
 60
 61    closers = {')': '(', ']': '[', '}': '{'}
 62    openers = {'(': 1, '[': 1, '{': 1}
 63    del_ws_prefix = {'.': 1, '=': 1, ')': 1, ']': 1, '}': 1, ':': 1, ',': 1}
 64    no_ws_suffix = {'.': 1, '=': 1, '(': 1, '[': 1, '{': 1}
 65
 66    def note_token(self):
 67        if self.type == tokenize.NL:
 68            return
 69        del_ws = self.del_ws_prefix.has_key(self.string)
 70        append_ws = not self.no_ws_suffix.has_key(self.string)
 71        if self.openers.has_key(self.string):
 72            self.stack.append(self.string)
 73            if (self._type == token.NAME
 74                or self.closers.has_key(self._string)):
 75                del_ws = 1
 76        elif self.closers.has_key(self.string):
 77            assert self.stack[-1] == self.closers[self.string]
 78            self.stack.pop()
 79        elif self.string == '`':
 80            if self._backquote:
 81                del_ws = 1
 82                assert self.stack[-1] == '`'
 83                self.stack.pop()
 84            else:
 85                append_ws = 0
 86                self.stack.append('`')
 87            self._backquote = not self._backquote
 88        if del_ws and self.tokens and self.tokens[-1] == ' ':
 89            del self.tokens[-1]
 90        self.tokens.append(self.string)
 91        self._type = self.type
 92        self._string = self.string
 93        if append_ws:
 94            self.tokens.append(' ')
 95
 96    def function_parameters(self, lineno):
 97        """
 98        Return a dictionary mapping parameters to defaults
 99        (whitespace-normalized strings).
100        """
101        self.goto_line(lineno)
102        while self.string != 'def':
103            self.next()
104        while self.string != '(':
105            self.next()
106        name = None
107        default = False
108        parameter_tuple = False
109        self.tokens = []
110        parameters = {}
111        self.stack = [self.string]
112        self.next()
113        while 1:
114            if len(self.stack) == 1:
115                if parameter_tuple:
116                    name = ''.join(self.tokens).strip()
117                    self.tokens = []
118                    parameter_tuple = False
119                if self.string in (')', ','):
120                    if name:
121                        if self.tokens:
122                            default_text = ''.join(self.tokens).strip()
123                        else:
124                            default_text = ''
125                        parameters[name] = default_text
126                        self.tokens = []
127                        name = None
128                        default = False
129                    if self.string == ')':
130                        break
131                elif self.type == token.NAME:
132                    if name and default:
133                        self.note_token()
134                    else:
135                        assert name is None, (
136                            'token=%r name=%r parameters=%r stack=%r'
137                            % (self.token, name, parameters, self.stack))
138                        name = self.string
139                elif self.string == '=':
140                    assert name is not None, 'token=%r' % (self.token,)
141                    assert default is False, 'token=%r' % (self.token,)
142                    assert self.tokens == [], 'token=%r' % (self.token,)
143                    default = True
144                    self._type = None
145                    self._string = None
146                    self._backquote = 0
147                elif name:
148                    self.note_token()
149                elif self.string == '(':
150                    parameter_tuple = True
151                    self._type = None
152                    self._string = None
153                    self._backquote = 0
154                    self.note_token()
155                else:                   # ignore these tokens:
156                    assert (self.string in ('*', '**', '\n')
157                            or self.type == tokenize.COMMENT), (
158                        'token=%r' % (self.token,))
159            else:
160                self.note_token()
161            self.next()
162        return parameters
163
164class ASGTranslator(ASTVisitor):
165    """Translate the Python AST into a Synopsis ASG."""
166
167    def __init__(self, package, types, docformat):
168        """Create an ASGTranslator.
169
170        package: enclosing package the generated modules are to be part of."""
171
172        ASTVisitor.__init__(self)
173        self.scope = package and [package] or []
174        self.file = None
175        self.types = types
176        self.attributes = []
177        self.any_type = ASG.BuiltinTypeId('Python',QName('',))
178        self.docformat = docformat
179        self.documentable = None
180        self.name = QName()
181        self.imports = []
182        "Tuple with (module, names) pairs."
183
184    def process_file(self, file):
185
186        self.file = file
187        source = open(self.file.abs_name).read()
188        self.token_parser = TokenParser(source)
189        ast = compiler.parse(source)
190        compiler.walk(ast, self, walker=self)
191
192    def scope_name(self):
193        return len(self.scope) and self.scope[-1].name or ()
194
195    def default(self, node, *args):
196        self.documentable = None
197
198    def default_visit(self, node, *args):
199        ASTVisitor.default(self, node, *args)
200
201    def visitDiscard(self, node):
202        if self.documentable:
203            self.visit(node.expr)
204
205    def visitConst(self, node):
206        if self.documentable:
207            if type(node.value) in (str, unicode):
208                self.documentable.annotations['doc'] = DocString(node.value, self.docformat)
209            else:
210                self.documentable = None
211
212    def visitStmt(self, node):
213        self.default_visit(node)
214
215    def visitAssign(self, node):
216
217        save_attributes = self.attributes
218        self.attributes = []
219        self.in_ass_tuple = False
220        for child in node.nodes:
221            self.dispatch(child)
222        if self.attributes:
223            if type(self.scope[-1]) == ASG.Operation:
224                # Inject the attributes into the class.
225                self.scope[-2].declarations.extend(self.attributes)
226            else:
227                self.scope[-1].declarations.extend(self.attributes)
228        if len(self.attributes) == 1:
229            self.documentable = self.attributes[0]
230        else:
231            self.documentable = None
232        self.attributes = save_attributes
233
234    def visitModule(self, node):
235
236        name = os.path.basename(os.path.splitext(self.file.name)[0])
237        if name == '__init__':
238            name = os.path.basename(os.path.dirname(self.file.name))
239            qname = QName(self.scope_name() + (name,))
240            module = ASG.Module(self.file, node.lineno, 'package', qname)
241        else:
242            qname = QName(self.scope_name() + (name,))
243            module = ASG.Module(self.file, node.lineno, 'module', qname)
244        self.types[qname] = ASG.DeclaredTypeId('Python', qname, module)
245
246        self.scope.append(module)
247        self.documentable = module
248        self.visit(node.node)
249        self.scope.pop()
250        self.file.declarations.append(module)
251
252    def visitImport(self, node):
253
254        self.imports.extend([(n[0], None) for n in node.names])
255        self.documentable = None
256
257    def visitFrom(self, node):
258
259        self.imports.extend([(node.modname, n[0]) for n in node.names])
260        self.documentable = None
261
262    def visitAssName(self, node):
263
264        if not self.in_ass_tuple:
265            meta_tags = ['__docformat__']
266            if len(self.scope) == 1 and node.name in meta_tags:
267                expression_text = eval(self.token_parser.rhs(node.lineno))
268                self.file.annotations[node.name] = expression_text
269                self.docformat = expression_text
270
271        qname = QName(self.scope_name() + (node.name,))
272        if type(self.scope[-1]) in (ASG.Function, ASG.Operation):
273            return
274        elif type(self.scope[-1]) == ASG.Class:
275            attribute = ASG.Variable(self.file, node.lineno, 'class attribute',
276                                     qname, self.any_type, False)
277        else:
278            attribute = ASG.Variable(self.file, node.lineno, 'attribute',
279                                     qname, self.any_type, False)
280        if node.name.startswith('__'):
281            attribute.accessibility = ASG.PRIVATE
282        elif node.name.startswith('_'):
283            attribute.accessibility = ASG.PROTECTED
284        self.attributes.append(attribute)
285        self.types[qname] = ASG.DeclaredTypeId('Python', attribute.name, attribute)
286
287    def visitAssTuple(self, node):
288
289        self.in_ass_tuple = True
290        for a in node.nodes:
291            self.visit(a)
292        self.in_ass_tuple = False
293
294    def visitAssAttr(self, node):
295        self.default_visit(node, node.attrname)
296        if type(self.scope[-1]) == ASG.Operation:
297            # We only parse constructors, so look out for
298            # self attributes defined here.
299            # FIXME: There is no reason the 'self' argument actually has to be spelled 'self'.
300            if self.name[0] == 'self':
301                # FIXME: qualifying variables is ambiguous, since we don't distinguish
302                #        class attributes and object attributes.
303                qname = self.scope[-2].name + self.name[1:]
304                self.attributes.append(ASG.Variable(self.file, node.lineno,
305                                                    'attribute', qname, self.any_type, False))
306
307    def visitGetattr(self, node, suffix):
308        self.default_visit(node, node.attrname + '.' + suffix)
309
310    def visitName(self, node, suffix=None):
311
312        if suffix:
313            self.name = QName((node.name,) + (suffix,))
314        else:
315            self.name = QName((node.name,))
316
317    def visitFunction(self, node):
318
319        if isinstance(self.scope[-1], ASG.Function):
320            # Skip local functions.
321            return
322        qname = QName(self.scope_name() + (node.name,))
323        if type(self.scope[-1]) == ASG.Class:
324            function = ASG.Operation(self.file, node.lineno, 'method',
325                                     [], self.any_type, [], qname, node.name)
326        else:
327            function = ASG.Function(self.file, node.lineno, 'function',
328                                    [], self.any_type, [], qname, node.name)
329
330        # The following attributes are special in that even though they are private they
331        # match publicly accessible operations, so we exclude them from being
332        # marked as private.
333        special_attributes = ('__init__', '__str__', '__repr__', '__iter__', '__getitem__')
334
335        if node.name.startswith('__'):
336            if node.name not in special_attributes:
337                function.accessibility = ASG.PRIVATE
338        elif node.name.startswith('_'):
339            function.accessibility = ASG.PROTECTED
340
341        function.annotations['doc'] = DocString(node.doc or '', self.docformat)
342        # Given that functions in Python are first-class citizens, should they be
343        # treated like (named) types ?
344        self.types[qname] = ASG.DeclaredTypeId('Python', function.name, function)
345
346        self.scope.append(function)
347        self.documentable = function
348        function.parameters = self.parse_parameter_list(node)
349        if node.name == '__init__':
350            # Only parse constructors, to find member variables
351            self.visit(node.code)
352        self.scope.pop()
353        self.scope[-1].declarations.append(function)
354
355    def parse_parameter_list(self, node):
356        parameters = []
357        special = []
358        argnames = list(node.argnames)
359        if node.kwargs:
360            special.append(ASG.Parameter('**', self.any_type, '', argnames[-1]))
361            argnames.pop()
362        if node.varargs:
363            special.append(ASG.Parameter('*', self.any_type, '', argnames[-1]))
364            argnames.pop()
365        defaults = list(node.defaults)
366        defaults = [None] * (len(argnames) - len(defaults)) + defaults
367        values = self.token_parser.function_parameters(node.lineno)
368        for argname, default in zip(node.argnames, defaults):
369            if type(argname) is tuple:
370                for a in argname:
371                    # FIXME: It is generally impossible to match tuple parameters
372                    # to defaults individually, we ignore default values for now.
373                    # (We may try to match them, and only leave out those resulting
374                    # from tuple-returning call expressions. But that's for another day.)
375                    parameters.append(ASG.Parameter('', self.any_type, '', a))
376            else:
377                parameters.append(ASG.Parameter('', self.any_type, '', argname,
378                                                values[argname]))
379        if parameters or special:
380            special.reverse()
381            parameters.extend(special)
382        return parameters
383
384    def visitClass(self, node):
385
386        if isinstance(self.scope[-1], ASG.Function):
387            # Skip local classes.
388            return
389        bases = []
390        for base in node.bases:
391            self.visit(base)
392            # FIXME: This logic is broken !
393            #        It assumes that names are either local or fully qualified.
394            if len(self.name) == 1 and self.scope:
395                # Name is unqualified. Qualify it.
396                base = QName(list(self.scope[-1].name) + list(self.name))
397            else:
398                base = self.name
399            if self.types.has_key(base):
400                base = self.types[base]
401            else:
402                base = ASG.UnknownTypeId('Python', base)
403            bases.append(base)
404        qname = QName(self.scope_name() + (node.name,))
405        class_ = ASG.Class(self.file, node.lineno, 'class', qname)
406        class_.parents = [ASG.Inheritance('', b, '') for b in bases]
407        class_.annotations['doc'] = DocString(node.doc or '', self.docformat)
408        self.types[qname] = ASG.DeclaredTypeId('Python', class_.name, class_)
409        self.scope.append(class_)
410        self.documentable = class_
411        self.visit(node.code)
412        self.scope.pop()
413        self.scope[-1].declarations.append(class_)
414
415    def visitGetattr(self, node, suffix=None):
416        if suffix:
417            name = node.attrname + '.' + suffix
418        else:
419            name = node.attrname
420        self.default_visit(node, name)
421
422
423