cpython/Parser/asdl.py

"""An implementation of the Zephyr Abstract Syntax Definition Language.

See http://asdl.sourceforge.net/ and
http://www.cs.princeton.edu/research/techreps/TR-554-97

Only supports top level module decl, not view.  I'm guessing that view
is intended to support the browser and I'm not interested in the
browser.

Changes for Python: Add support for module versions
"""

import os
import sys
import traceback

import spark

def output(string):
    sys.stdout.write(string + "\n")


class Token(object):
    # spark seems to dispatch in the parser based on a token's
    # type attribute
    def __init__(self, type, lineno):
        self.type = type
        self.lineno = lineno

    def __str__(self):
        return self.type

    def __repr__(self):
        return str(self)

class Id(Token):
    def __init__(self, value, lineno):
        self.type = 'Id'
        self.value = value
        self.lineno = lineno

    def __str__(self):
        return self.value

class String(Token):
    def __init__(self, value, lineno):
        self.type = 'String'
        self.value = value
        self.lineno = lineno

class ASDLSyntaxError(Exception):

    def __init__(self, lineno, token=None, msg=None):
        self.lineno = lineno
        self.token = token
        self.msg = msg

    def __str__(self):
        if self.msg is None:
            return "Error at '%s', line %d" % (self.token, self.lineno)
        else:
            return "%s, line %d" % (self.msg, self.lineno)

class ASDLScanner(spark.GenericScanner, object):

    def tokenize(self, input):
        self.rv = []
        self.lineno = 1
        super(ASDLScanner, self).tokenize(input)
        return self.rv

    def t_id(self, s):
        r"[\w\.]+"
        # XXX doesn't distinguish upper vs. lower, which is
        # significant for ASDL.
        self.rv.append(Id(s, self.lineno))

    def t_string(self, s):
        r'"[^"]*"'
        self.rv.append(String(s, self.lineno))

    def t_xxx(self, s): # not sure what this production means
        r"<="
        self.rv.append(Token(s, self.lineno))

    def t_punctuation(self, s):
        r"[\{\}\*\=\|\(\)\,\?\:]"
        self.rv.append(Token(s, self.lineno))

    def t_comment(self, s):
        r"\-\-[^\n]*"
        pass

    def t_newline(self, s):
        r"\n"
        self.lineno += 1

    def t_whitespace(self, s):
        r"[ \t]+"
        pass

    def t_default(self, s):
        r" . +"
        raise ValueError("unmatched input: %r" % s)

class ASDLParser(spark.GenericParser, object):
    def __init__(self):
        super(ASDLParser, self).__init__("module")

    def typestring(self, tok):
        return tok.type

    def error(self, tok):
        raise ASDLSyntaxError(tok.lineno, tok)

    def p_module_0(self, info):
        " module ::= Id Id version { } "
        module, name, version, _0, _1 = info
        if module.value != "module":
            raise ASDLSyntaxError(module.lineno,
                                  msg="expected 'module', found %s" % module)
        return Module(name, None, version)

    def p_module(self, info):
        " module ::= Id Id version { definitions } "
        module, name, version, _0, definitions, _1 = info
        if module.value != "module":
            raise ASDLSyntaxError(module.lineno,
                                  msg="expected 'module', found %s" % module)
        return Module(name, definitions, version)

    def p_version(self, info):
        "version ::= Id String"
        version, V = info
        if version.value != "version":
            raise ASDLSyntaxError(version.lineno,
                                  msg="expected 'version', found %" % version)
        return V

    def p_definition_0(self, definition):
        " definitions ::= definition "
        return definition[0]

    def p_definition_1(self, definitions):
        " definitions ::= definition definitions "
        return definitions[0] + definitions[1]

    def p_definition(self, info):
        " definition ::= Id = type "
        id, _, type = info
        return [Type(id, type)]

    def p_type_0(self, product):
        " type ::= product "
        return product[0]

    def p_type_1(self, sum):
        " type ::= sum "
        return Sum(sum[0])

    def p_type_2(self, info):
        " type ::= sum Id ( fields ) "
        sum, id, _0, attributes, _1 = info
        if id.value != "attributes":
            raise ASDLSyntaxError(id.lineno,
                                  msg="expected attributes, found %s" % id)
        if attributes:
            attributes.reverse()
        return Sum(sum, attributes)

    def p_product(self, info):
        " product ::= ( fields ) "
        _0, fields, _1 = info
        # XXX can't I just construct things in the right order?
        fields.reverse()
        return Product(fields)

    def p_sum_0(self, constructor):
        " sum ::= constructor "
        return [constructor[0]]

    def p_sum_1(self, info):
        " sum ::= constructor | sum "
        constructor, _, sum = info
        return [constructor] + sum

    def p_sum_2(self, info):
        " sum ::= constructor | sum "
        constructor, _, sum = info
        return [constructor] + sum

    def p_constructor_0(self, id):
        " constructor ::= Id "
        return Constructor(id[0])

    def p_constructor_1(self, info):
        " constructor ::= Id ( fields ) "
        id, _0, fields, _1 = info
        # XXX can't I just construct things in the right order?
        fields.reverse()
        return Constructor(id, fields)

    def p_fields_0(self, field):
        " fields ::= field "
        return [field[0]]

    def p_fields_1(self, info):
        " fields ::= field , fields "
        field, _, fields = info
        return fields + [field]

    def p_field_0(self, type_):
        " field ::= Id "
        return Field(type_[0])

    def p_field_1(self, info):
        " field ::= Id Id "
        type, name = info
        return Field(type, name)

    def p_field_2(self, info):
        " field ::= Id * Id "
        type, _, name = info
        return Field(type, name, seq=True)

    def p_field_3(self, info):
        " field ::= Id ? Id "
        type, _, name = info
        return Field(type, name, opt=True)

    def p_field_4(self, type_):
        " field ::= Id * "
        return Field(type_[0], seq=True)

    def p_field_5(self, type_):
        " field ::= Id ? "
        return Field(type[0], opt=True)

builtin_types = ("identifier", "string", "int", "bool", "object")

# below is a collection of classes to capture the AST of an AST :-)
# not sure if any of the methods are useful yet, but I'm adding them
# piecemeal as they seem helpful

class AST(object):
    pass # a marker class

class Module(AST):
    def __init__(self, name, dfns, version):
        self.name = name
        self.dfns = dfns
        self.version = version
        self.types = {} # maps type name to value (from dfns)
        for type in dfns:
            self.types[type.name.value] = type.value

    def __repr__(self):
        return "Module(%s, %s)" % (self.name, self.dfns)

class Type(AST):
    def __init__(self, name, value):
        self.name = name
        self.value = value

    def __repr__(self):
        return "Type(%s, %s)" % (self.name, self.value)

class Constructor(AST):
    def __init__(self, name, fields=None):
        self.name = name
        self.fields = fields or []

    def __repr__(self):
        return "Constructor(%s, %s)" % (self.name, self.fields)

class Field(AST):
    def __init__(self, type, name=None, seq=False, opt=False):
        self.type = type
        self.name = name
        self.seq = seq
        self.opt = opt

    def __repr__(self):
        if self.seq:
            extra = ", seq=True"
        elif self.opt:
            extra = ", opt=True"
        else:
            extra = ""
        if self.name is None:
            return "Field(%s%s)" % (self.type, extra)
        else:
            return "Field(%s, %s%s)" % (self.type, self.name, extra)

class Sum(AST):
    def __init__(self, types, attributes=None):
        self.types = types
        self.attributes = attributes or []

    def __repr__(self):
        if self.attributes is None:
            return "Sum(%s)" % self.types
        else:
            return "Sum(%s, %s)" % (self.types, self.attributes)

class Product(AST):
    def __init__(self, fields):
        self.fields = fields

    def __repr__(self):
        return "Product(%s)" % self.fields

class VisitorBase(object):

    def __init__(self, skip=False):
        self.cache = {}
        self.skip = skip

    def visit(self, object, *args):
        meth = self._dispatch(object)
        if meth is None:
            return
        try:
            meth(object, *args)
        except Exception:
            output("Error visiting" + repr(object))
            output(str(sys.exc_info()[1]))
            traceback.print_exc()
            # XXX hack
            if hasattr(self, 'file'):
                self.file.flush()
            os._exit(1)

    def _dispatch(self, object):
        assert isinstance(object, AST), repr(object)
        klass = object.__class__
        meth = self.cache.get(klass)
        if meth is None:
            methname = "visit" + klass.__name__
            if self.skip:
                meth = getattr(self, methname, None)
            else:
                meth = getattr(self, methname)
            self.cache[klass] = meth
        return meth

class Check(VisitorBase):

    def __init__(self):
        super(Check, self).__init__(skip=True)
        self.cons = {}
        self.errors = 0
        self.types = {}

    def visitModule(self, mod):
        for dfn in mod.dfns:
            self.visit(dfn)

    def visitType(self, type):
        self.visit(type.value, str(type.name))

    def visitSum(self, sum, name):
        for t in sum.types:
            self.visit(t, name)

    def visitConstructor(self, cons, name):
        key = str(cons.name)
        conflict = self.cons.get(key)
        if conflict is None:
            self.cons[key] = name
        else:
            output("Redefinition of constructor %s" % key)
            output("Defined in %s and %s" % (conflict, name))
            self.errors += 1
        for f in cons.fields:
            self.visit(f, key)

    def visitField(self, field, name):
        key = str(field.type)
        l = self.types.setdefault(key, [])
        l.append(name)

    def visitProduct(self, prod, name):
        for f in prod.fields:
            self.visit(f, name)

def check(mod):
    v = Check()
    v.visit(mod)

    for t in v.types:
        if t not in mod.types and not t in builtin_types:
            v.errors += 1
            uses = ", ".join(v.types[t])
            output("Undefined type %s, used in %s" % (t, uses))

    return not v.errors

def parse(file):
    scanner = ASDLScanner()
    parser = ASDLParser()

    buf = open(file).read()
    tokens = scanner.tokenize(buf)
    try:
        return parser.parse(tokens)
    except ASDLSyntaxError:
        err = sys.exc_info()[1]
        output(str(err))
        lines = buf.split("\n")
        output(lines[err.lineno - 1]) # lines starts at 0, files at 1

if __name__ == "__main__":
    import glob
    import sys

    if len(sys.argv) > 1:
        files = sys.argv[1:]
    else:
        testdir = "tests"
        files = glob.glob(testdir + "/*.asdl")

    for file in files:
        output(file)
        mod = parse(file)
        if not mod:
            break
        output("module", mod.name)
        output(len(mod.dfns), "definitions")
        if not check(mod):
            output("Check failed")
        else:
            for dfn in mod.dfns:
                output(dfn.type)