mirror of
https://github.com/python/cpython.git
synced 2024-12-01 11:15:56 +01:00
3e0055f8c6
This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests.
394 lines
10 KiB
Python
394 lines
10 KiB
Python
"""An implementation of the Zephyr Abstract Syntax Definition Language.
|
|
|
|
See http://asdl.sourceforge.net/ and
|
|
http://www.cs.princeton.edu/~danwang/Papers/dsl97/dsl97-abstract.html.
|
|
|
|
Only supports top level module decl, not view. I'm guessing that view
|
|
is intended to support the browser and I'm not interested in the
|
|
browser.
|
|
"""
|
|
|
|
#__metaclass__ = type
|
|
|
|
import os
|
|
import traceback
|
|
|
|
import spark
|
|
|
|
class Token:
|
|
# spark seems to dispatch in the parser based on a token's
|
|
# type attribute
|
|
def __init__(self, type, lineno):
|
|
self.type = type
|
|
self.lineno = lineno
|
|
|
|
def __str__(self):
|
|
return self.type
|
|
|
|
def __repr__(self):
|
|
return str(self)
|
|
|
|
class Id(Token):
|
|
def __init__(self, value, lineno):
|
|
self.type = 'Id'
|
|
self.value = value
|
|
self.lineno = lineno
|
|
|
|
def __str__(self):
|
|
return self.value
|
|
|
|
class ASDLSyntaxError:
|
|
|
|
def __init__(self, lineno, token=None, msg=None):
|
|
self.lineno = lineno
|
|
self.token = token
|
|
self.msg = msg
|
|
|
|
def __str__(self):
|
|
if self.msg is None:
|
|
return "Error at '%s', line %d" % (self.token, self.lineno)
|
|
else:
|
|
return "%s, line %d" % (self.msg, self.lineno)
|
|
|
|
class ASDLScanner(spark.GenericScanner, object):
|
|
|
|
def tokenize(self, input):
|
|
self.rv = []
|
|
self.lineno = 1
|
|
super(ASDLScanner, self).tokenize(input)
|
|
return self.rv
|
|
|
|
def t_id(self, s):
|
|
r"[\w\.]+"
|
|
# XXX doesn't distinguish upper vs. lower, which is
|
|
# significant for ASDL.
|
|
self.rv.append(Id(s, self.lineno))
|
|
|
|
def t_xxx(self, s): # not sure what this production means
|
|
r"<="
|
|
self.rv.append(Token(s, self.lineno))
|
|
|
|
def t_punctuation(self, s):
|
|
r"[\{\}\*\=\|\(\)\,\?\:]"
|
|
self.rv.append(Token(s, self.lineno))
|
|
|
|
def t_comment(self, s):
|
|
r"\-\-[^\n]*"
|
|
pass
|
|
|
|
def t_newline(self, s):
|
|
r"\n"
|
|
self.lineno += 1
|
|
|
|
def t_whitespace(self, s):
|
|
r"[ \t]+"
|
|
pass
|
|
|
|
def t_default(self, s):
|
|
r" . +"
|
|
raise ValueError, "unmatched input: %s" % `s`
|
|
|
|
class ASDLParser(spark.GenericParser, object):
|
|
def __init__(self):
|
|
super(ASDLParser, self).__init__("module")
|
|
|
|
def typestring(self, tok):
|
|
return tok.type
|
|
|
|
def error(self, tok):
|
|
raise ASDLSyntaxError(tok.lineno, tok)
|
|
|
|
def p_module_0(self, (module, name, _0, _1)):
|
|
" module ::= Id Id { } "
|
|
if module.value != "module":
|
|
raise ASDLSyntaxError(module.lineno,
|
|
msg="expected 'module', found %s" % module)
|
|
return Module(name, None)
|
|
|
|
def p_module(self, (module, name, _0, definitions, _1)):
|
|
" module ::= Id Id { definitions } "
|
|
if module.value != "module":
|
|
raise ASDLSyntaxError(module.lineno,
|
|
msg="expected 'module', found %s" % module)
|
|
return Module(name, definitions)
|
|
|
|
def p_definition_0(self, (definition,)):
|
|
" definitions ::= definition "
|
|
return definition
|
|
|
|
def p_definition_1(self, (definitions, definition)):
|
|
" definitions ::= definition definitions "
|
|
return definitions + definition
|
|
|
|
def p_definition(self, (id, _, type)):
|
|
" definition ::= Id = type "
|
|
return [Type(id, type)]
|
|
|
|
def p_type_0(self, (product,)):
|
|
" type ::= product "
|
|
return product
|
|
|
|
def p_type_1(self, (sum,)):
|
|
" type ::= sum "
|
|
return Sum(sum)
|
|
|
|
def p_type_2(self, (sum, id, _0, attributes, _1)):
|
|
" type ::= sum Id ( fields ) "
|
|
if id.value != "attributes":
|
|
raise ASDLSyntaxError(id.lineno,
|
|
msg="expected attributes, found %s" % id)
|
|
return Sum(sum, attributes)
|
|
|
|
def p_product(self, (_0, fields, _1)):
|
|
" product ::= ( fields ) "
|
|
# XXX can't I just construct things in the right order?
|
|
fields.reverse()
|
|
return Product(fields)
|
|
|
|
def p_sum_0(self, (constructor,)):
|
|
" sum ::= constructor """
|
|
return [constructor]
|
|
|
|
def p_sum_1(self, (constructor, _, sum)):
|
|
" sum ::= constructor | sum "
|
|
return [constructor] + sum
|
|
|
|
def p_sum_2(self, (constructor, _, sum)):
|
|
" sum ::= constructor | sum "
|
|
return [constructor] + sum
|
|
|
|
def p_constructor_0(self, (id,)):
|
|
" constructor ::= Id "
|
|
return Constructor(id)
|
|
|
|
def p_constructor_1(self, (id, _0, fields, _1)):
|
|
" constructor ::= Id ( fields ) "
|
|
# XXX can't I just construct things in the right order?
|
|
fields.reverse()
|
|
return Constructor(id, fields)
|
|
|
|
def p_fields_0(self, (field,)):
|
|
" fields ::= field "
|
|
return [field]
|
|
|
|
def p_fields_1(self, (field, _, fields)):
|
|
" fields ::= field , fields "
|
|
return fields + [field]
|
|
|
|
def p_field_0(self, (type,)):
|
|
" field ::= Id "
|
|
return Field(type)
|
|
|
|
def p_field_1(self, (type, name)):
|
|
" field ::= Id Id "
|
|
return Field(type, name)
|
|
|
|
def p_field_2(self, (type, _, name)):
|
|
" field ::= Id * Id "
|
|
return Field(type, name, seq=1)
|
|
|
|
def p_field_3(self, (type, _, name)):
|
|
" field ::= Id ? Id "
|
|
return Field(type, name, opt=1)
|
|
|
|
def p_field_4(self, (type, _)):
|
|
" field ::= Id * "
|
|
return Field(type, seq=1)
|
|
|
|
def p_field_5(self, (type, _)):
|
|
" field ::= Id ? "
|
|
return Field(type, opt=1)
|
|
|
|
builtin_types = ("identifier", "string", "int", "bool", "object")
|
|
|
|
# below is a collection of classes to capture the AST of an AST :-)
|
|
# not sure if any of the methods are useful yet, but I'm adding them
|
|
# piecemeal as they seem helpful
|
|
|
|
class AST:
|
|
pass # a marker class
|
|
|
|
class Module(AST):
|
|
def __init__(self, name, dfns):
|
|
self.name = name
|
|
self.dfns = dfns
|
|
self.types = {} # maps type name to value (from dfns)
|
|
for type in dfns:
|
|
self.types[type.name.value] = type.value
|
|
|
|
def __repr__(self):
|
|
return "Module(%s, %s)" % (self.name, self.dfns)
|
|
|
|
class Type(AST):
|
|
def __init__(self, name, value):
|
|
self.name = name
|
|
self.value = value
|
|
|
|
def __repr__(self):
|
|
return "Type(%s, %s)" % (self.name, self.value)
|
|
|
|
class Constructor(AST):
|
|
def __init__(self, name, fields=None):
|
|
self.name = name
|
|
self.fields = fields or []
|
|
|
|
def __repr__(self):
|
|
return "Constructor(%s, %s)" % (self.name, self.fields)
|
|
|
|
class Field(AST):
|
|
def __init__(self, type, name=None, seq=0, opt=0):
|
|
self.type = type
|
|
self.name = name
|
|
self.seq = seq
|
|
self.opt = opt
|
|
|
|
def __repr__(self):
|
|
if self.seq:
|
|
extra = ", seq=1"
|
|
elif self.opt:
|
|
extra = ", opt=1"
|
|
else:
|
|
extra = ""
|
|
if self.name is None:
|
|
return "Field(%s%s)" % (self.type, extra)
|
|
else:
|
|
return "Field(%s, %s%s)" % (self.type, self.name, extra)
|
|
|
|
class Sum(AST):
|
|
def __init__(self, types, attributes=None):
|
|
self.types = types
|
|
self.attributes = attributes or []
|
|
|
|
def __repr__(self):
|
|
if self.attributes is None:
|
|
return "Sum(%s)" % self.types
|
|
else:
|
|
return "Sum(%s, %s)" % (self.types, self.attributes)
|
|
|
|
class Product(AST):
|
|
def __init__(self, fields):
|
|
self.fields = fields
|
|
|
|
def __repr__(self):
|
|
return "Product(%s)" % self.fields
|
|
|
|
class VisitorBase(object):
|
|
|
|
def __init__(self, skip=0):
|
|
self.cache = {}
|
|
self.skip = skip
|
|
|
|
def visit(self, object, *args):
|
|
meth = self._dispatch(object)
|
|
if meth is None:
|
|
return
|
|
try:
|
|
meth(object, *args)
|
|
except Exception, err:
|
|
print "Error visiting", repr(object)
|
|
print err
|
|
traceback.print_exc()
|
|
# XXX hack
|
|
if hasattr(self, 'file'):
|
|
self.file.flush()
|
|
os._exit(1)
|
|
|
|
def _dispatch(self, object):
|
|
assert isinstance(object, AST), repr(object)
|
|
klass = object.__class__
|
|
meth = self.cache.get(klass)
|
|
if meth is None:
|
|
methname = "visit" + klass.__name__
|
|
if self.skip:
|
|
meth = getattr(self, methname, None)
|
|
else:
|
|
meth = getattr(self, methname)
|
|
self.cache[klass] = meth
|
|
return meth
|
|
|
|
class Check(VisitorBase):
|
|
|
|
def __init__(self):
|
|
super(Check, self).__init__(skip=1)
|
|
self.cons = {}
|
|
self.errors = 0
|
|
self.types = {}
|
|
|
|
def visitModule(self, mod):
|
|
for dfn in mod.dfns:
|
|
self.visit(dfn)
|
|
|
|
def visitType(self, type):
|
|
self.visit(type.value, str(type.name))
|
|
|
|
def visitSum(self, sum, name):
|
|
for t in sum.types:
|
|
self.visit(t, name)
|
|
|
|
def visitConstructor(self, cons, name):
|
|
key = str(cons.name)
|
|
conflict = self.cons.get(key)
|
|
if conflict is None:
|
|
self.cons[key] = name
|
|
else:
|
|
print "Redefinition of constructor %s" % key
|
|
print "Defined in %s and %s" % (conflict, name)
|
|
self.errors += 1
|
|
for f in cons.fields:
|
|
self.visit(f, key)
|
|
|
|
def visitField(self, field, name):
|
|
key = str(field.type)
|
|
l = self.types.setdefault(key, [])
|
|
l.append(name)
|
|
|
|
def visitProduct(self, prod, name):
|
|
for f in prod.fields:
|
|
self.visit(f, name)
|
|
|
|
def check(mod):
|
|
v = Check()
|
|
v.visit(mod)
|
|
|
|
for t in v.types:
|
|
if not mod.types.has_key(t) and not t in builtin_types:
|
|
v.errors += 1
|
|
uses = ", ".join(v.types[t])
|
|
print "Undefined type %s, used in %s" % (t, uses)
|
|
|
|
return not v.errors
|
|
|
|
def parse(file):
|
|
scanner = ASDLScanner()
|
|
parser = ASDLParser()
|
|
|
|
buf = open(file).read()
|
|
tokens = scanner.tokenize(buf)
|
|
try:
|
|
return parser.parse(tokens)
|
|
except ASDLSyntaxError, err:
|
|
print err
|
|
lines = buf.split("\n")
|
|
print lines[err.lineno - 1] # lines starts at 0, files at 1
|
|
|
|
if __name__ == "__main__":
|
|
import glob
|
|
import sys
|
|
|
|
if len(sys.argv) > 1:
|
|
files = sys.argv[1:]
|
|
else:
|
|
testdir = "tests"
|
|
files = glob.glob(testdir + "/*.asdl")
|
|
|
|
for file in files:
|
|
print file
|
|
mod = parse(file)
|
|
print "module", mod.name
|
|
print len(mod.dfns), "definitions"
|
|
if not check(mod):
|
|
print "Check failed"
|
|
else:
|
|
for dfn in mod.dfns:
|
|
print dfn.type
|