phasm/phasm/parser.py
Johan B.W. de Vries a838035e1a Cleanup
- Removed unused _str types
- Tested with untyped function arguments, seems to work a bit
2022-11-27 12:33:17 +01:00

731 lines
28 KiB
Python

"""
Parses the source code from the plain text into a syntax tree
"""
from typing import Any, Dict, NoReturn, Union
import ast
from .type3 import types as type3types
from .exceptions import StaticError
from .ourlang import (
WEBASSEMBLY_BUILTIN_FLOAT_OPS,
Module, ModuleDataBlock,
Function,
Expression,
BinaryOp,
ConstantPrimitive, ConstantTuple,
FunctionCall, AccessStructMember, Subscript,
StructDefinition, StructConstructor,
# TupleConstructor,
UnaryOp, VariableReference,
Fold,
Statement,
StatementIf, StatementPass, StatementReturn,
FunctionParam,
ModuleConstantDef,
)
def phasm_parse(source: str) -> Module:
"""
Public method for parsing Phasm code into a Phasm Module
"""
res = ast.parse(source, '')
our_visitor = OurVisitor()
return our_visitor.visit_Module(res)
OurLocals = Dict[str, Union[FunctionParam]] # Also local variable and module constants?
class OurVisitor:
"""
Class to visit a Python syntax tree and create an ourlang syntax tree
We're (ab)using the Python AST parser to give us a leg up
At some point, we may deviate from Python syntax. If nothing else,
we probably won't keep up with the Python syntax changes.
"""
# pylint: disable=C0103,C0116,C0301,R0201,R0912
def __init__(self) -> None:
pass
def visit_Module(self, node: ast.Module) -> Module:
module = Module()
_not_implemented(not node.type_ignores, 'Module.type_ignores')
# Second pass for the types
for stmt in node.body:
res = self.pre_visit_Module_stmt(module, stmt)
if isinstance(res, ModuleConstantDef):
if res.name in module.constant_defs:
raise StaticError(
f'{res.name} already defined on line {module.constant_defs[res.name].lineno}'
)
module.constant_defs[res.name] = res
if isinstance(res, StructDefinition):
if res.struct_type3.name in module.struct_definitions:
raise StaticError(
f'{res.struct_type3.name} already defined on line {module.struct_definitions[res.struct_type3.name].lineno}'
)
module.struct_definitions[res.struct_type3.name] = res
constructor = StructConstructor(res.struct_type3)
module.functions[constructor.name] = constructor
if isinstance(res, Function):
if res.name in module.functions:
raise StaticError(
f'{res.name} already defined on line {module.functions[res.name].lineno}'
)
module.functions[res.name] = res
# Second pass for the function bodies
for stmt in node.body:
self.visit_Module_stmt(module, stmt)
return module
def pre_visit_Module_stmt(self, module: Module, node: ast.stmt) -> Union[Function, StructDefinition, ModuleConstantDef]:
if isinstance(node, ast.FunctionDef):
return self.pre_visit_Module_FunctionDef(module, node)
if isinstance(node, ast.ClassDef):
return self.pre_visit_Module_ClassDef(module, node)
if isinstance(node, ast.AnnAssign):
return self.pre_visit_Module_AnnAssign(module, node)
raise NotImplementedError(f'{node} on Module')
def pre_visit_Module_FunctionDef(self, module: Module, node: ast.FunctionDef) -> Function:
function = Function(node.name, node.lineno)
_not_implemented(not node.args.posonlyargs, 'FunctionDef.args.posonlyargs')
for arg in node.args.args:
function.posonlyargs.append(FunctionParam(
arg.arg,
self.visit_type(module, arg.annotation) if arg.annotation else None,
))
_not_implemented(not node.args.vararg, 'FunctionDef.args.vararg')
_not_implemented(not node.args.kwonlyargs, 'FunctionDef.args.kwonlyargs')
_not_implemented(not node.args.kw_defaults, 'FunctionDef.args.kw_defaults')
_not_implemented(not node.args.kwarg, 'FunctionDef.args.kwarg')
_not_implemented(not node.args.defaults, 'FunctionDef.args.defaults')
# Do stmts at the end so we have the return value
for decorator in node.decorator_list:
if not isinstance(decorator, ast.Name):
_raise_static_error(decorator, 'Function decorators must be string')
if not isinstance(decorator.ctx, ast.Load):
_raise_static_error(decorator, 'Must be load context')
_not_implemented(decorator.id in ('exported', 'imported'), 'Custom decorators')
if decorator.id == 'exported':
function.exported = True
else:
function.imported = True
if node.returns is not None: # Note: `-> None` would be a ast.Constant
function.returns_type3 = self.visit_type(module, node.returns)
else:
# Mostly works already, needs to fix Function.returns_type3 and have it updated
raise NotImplementedError('Function without an explicit return type')
_not_implemented(not node.type_comment, 'FunctionDef.type_comment')
return function
def pre_visit_Module_ClassDef(self, module: Module, node: ast.ClassDef) -> StructDefinition:
_not_implemented(not node.bases, 'ClassDef.bases')
_not_implemented(not node.keywords, 'ClassDef.keywords')
_not_implemented(not node.decorator_list, 'ClassDef.decorator_list')
members: Dict[str, type3types.Type3] = {}
for stmt in node.body:
if not isinstance(stmt, ast.AnnAssign):
raise NotImplementedError(f'Class with {stmt} nodes')
if not isinstance(stmt.target, ast.Name):
raise NotImplementedError('Class with default values')
if not stmt.value is None:
raise NotImplementedError('Class with default values')
if stmt.simple != 1:
raise NotImplementedError('Class with non-simple arguments')
if stmt.target.id in members:
_raise_static_error(stmt, 'Struct members must have unique names')
members[stmt.target.id] = self.visit_type(module, stmt.annotation)
return StructDefinition(type3types.StructType3(node.name, members), node.lineno)
def pre_visit_Module_AnnAssign(self, module: Module, node: ast.AnnAssign) -> ModuleConstantDef:
if not isinstance(node.target, ast.Name):
_raise_static_error(node, 'Must be name')
if not isinstance(node.target.ctx, ast.Store):
_raise_static_error(node, 'Must be load context')
if isinstance(node.value, ast.Constant):
type3 = self.visit_type(module, node.annotation)
return ModuleConstantDef(
node.target.id,
node.lineno,
type3,
self.visit_Module_Constant(module, node.value),
None,
)
if isinstance(node.value, ast.Tuple):
tuple_data = [
self.visit_Module_Constant(module, arg_node)
for arg_node in node.value.elts
if isinstance(arg_node, ast.Constant)
]
if len(node.value.elts) != len(tuple_data):
_raise_static_error(node, 'Tuple arguments must be constants')
# Allocate the data
data_block = ModuleDataBlock(tuple_data)
module.data.blocks.append(data_block)
# Then return the constant as a pointer
return ModuleConstantDef(
node.target.id,
node.lineno,
self.visit_type(module, node.annotation),
ConstantTuple(tuple_data),
data_block,
)
raise NotImplementedError('TODO: Broken after new typing system')
# if isinstance(exp_type, TypeTuple):
# if not isinstance(node.value, ast.Tuple):
# _raise_static_error(node, 'Must be tuple')
#
# if len(exp_type.members) != len(node.value.elts):
# _raise_static_error(node, 'Invalid number of tuple values')
#
# tuple_data = [
# self.visit_Module_Constant(module, arg_node)
# for arg_node, mem in zip(node.value.elts, exp_type.members)
# if isinstance(arg_node, ast.Constant)
# ]
# if len(exp_type.members) != len(tuple_data):
# _raise_static_error(node, 'Tuple arguments must be constants')
#
# # Allocate the data
# data_block = ModuleDataBlock(tuple_data)
# module.data.blocks.append(data_block)
#
# # Then return the constant as a pointer
# return ModuleConstantDef(
# node.target.id,
# node.lineno,
# exp_type,
# ConstantTuple(tuple_data),
# data_block,
# )
#
# if isinstance(exp_type, TypeStaticArray):
# if not isinstance(node.value, ast.Tuple):
# _raise_static_error(node, 'Must be static array')
#
# if len(exp_type.members) != len(node.value.elts):
# _raise_static_error(node, 'Invalid number of static array values')
#
# static_array_data = [
# self.visit_Module_Constant(module, arg_node)
# for arg_node in node.value.elts
# if isinstance(arg_node, ast.Constant)
# ]
# if len(exp_type.members) != len(static_array_data):
# _raise_static_error(node, 'Static array arguments must be constants')
#
# # Allocate the data
# data_block = ModuleDataBlock(static_array_data)
# module.data.blocks.append(data_block)
#
# # Then return the constant as a pointer
# return ModuleConstantDef(
# node.target.id,
# node.lineno,
# ConstantStaticArray(static_array_data),
# data_block,
# )
#
# raise NotImplementedError(f'{node} on Module AnnAssign')
def visit_Module_stmt(self, module: Module, node: ast.stmt) -> None:
if isinstance(node, ast.FunctionDef):
self.visit_Module_FunctionDef(module, node)
return
if isinstance(node, ast.ClassDef):
return
if isinstance(node, ast.AnnAssign):
return
raise NotImplementedError(f'{node} on Module')
def visit_Module_FunctionDef(self, module: Module, node: ast.FunctionDef) -> None:
function = module.functions[node.name]
our_locals: OurLocals = {
x.name: x
for x in function.posonlyargs
}
for stmt in node.body:
function.statements.append(
self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt)
)
def visit_Module_FunctionDef_stmt(self, module: Module, function: Function, our_locals: OurLocals, node: ast.stmt) -> Statement:
if isinstance(node, ast.Return):
if node.value is None:
# TODO: Implement methods without return values
_raise_static_error(node, 'Return must have an argument')
return StatementReturn(
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.value)
)
if isinstance(node, ast.If):
result = StatementIf(
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.test)
)
for stmt in node.body:
result.statements.append(
self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt)
)
for stmt in node.orelse:
result.else_statements.append(
self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt)
)
return result
if isinstance(node, ast.Pass):
return StatementPass()
raise NotImplementedError(f'{node} as stmt in FunctionDef')
def visit_Module_FunctionDef_expr(self, module: Module, function: Function, our_locals: OurLocals, node: ast.expr) -> Expression:
if isinstance(node, ast.BinOp):
if isinstance(node.op, ast.Add):
operator = '+'
elif isinstance(node.op, ast.Sub):
operator = '-'
elif isinstance(node.op, ast.Mult):
operator = '*'
elif isinstance(node.op, ast.Div):
operator = '/'
elif isinstance(node.op, ast.LShift):
operator = '<<'
elif isinstance(node.op, ast.RShift):
operator = '>>'
elif isinstance(node.op, ast.BitOr):
operator = '|'
elif isinstance(node.op, ast.BitXor):
operator = '^'
elif isinstance(node.op, ast.BitAnd):
operator = '&'
else:
raise NotImplementedError(f'Operator {node.op}')
# Assume the type doesn't change when descending into a binary operator
# e.g. you can do `"hello" * 3` with the code below (yet)
return BinaryOp(
operator,
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.left),
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.right),
)
if isinstance(node, ast.UnaryOp):
if isinstance(node.op, ast.UAdd):
operator = '+'
elif isinstance(node.op, ast.USub):
operator = '-'
else:
raise NotImplementedError(f'Operator {node.op}')
return UnaryOp(
operator,
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.operand),
)
if isinstance(node, ast.Compare):
if 1 < len(node.ops):
raise NotImplementedError('Multiple operators')
if isinstance(node.ops[0], ast.Gt):
operator = '>'
elif isinstance(node.ops[0], ast.Eq):
operator = '=='
elif isinstance(node.ops[0], ast.Lt):
operator = '<'
else:
raise NotImplementedError(f'Operator {node.ops}')
# Assume the type doesn't change when descending into a binary operator
# e.g. you can do `"hello" * 3` with the code below (yet)
return BinaryOp(
operator,
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.left),
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.comparators[0]),
)
if isinstance(node, ast.Call):
return self.visit_Module_FunctionDef_Call(module, function, our_locals, node)
if isinstance(node, ast.Constant):
return self.visit_Module_Constant(
module, node,
)
if isinstance(node, ast.Attribute):
return self.visit_Module_FunctionDef_Attribute(
module, function, our_locals, node,
)
if isinstance(node, ast.Subscript):
return self.visit_Module_FunctionDef_Subscript(
module, function, our_locals, node,
)
if isinstance(node, ast.Name):
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if node.id in our_locals:
param = our_locals[node.id]
return VariableReference(param)
if node.id in module.constant_defs:
cdef = module.constant_defs[node.id]
return VariableReference(cdef)
_raise_static_error(node, f'Undefined variable {node.id}')
if isinstance(node, ast.Tuple):
raise NotImplementedError('TODO: Broken after new type system')
# if not isinstance(node.ctx, ast.Load):
# _raise_static_error(node, 'Must be load context')
#
# if isinstance(exp_type, TypeTuple):
# if len(exp_type.members) != len(node.elts):
# _raise_static_error(node, f'Expression is expecting a tuple of size {len(exp_type.members)}, but {len(node.elts)} are given')
#
# tuple_constructor = TupleConstructor(exp_type)
#
# func = module.functions[tuple_constructor.name]
#
# result = FunctionCall(func)
# result.arguments = [
# self.visit_Module_FunctionDef_expr(module, function, our_locals, mem.type, arg_node)
# for arg_node, mem in zip(node.elts, exp_type.members)
# ]
# return result
#
# _raise_static_error(node, f'Expression is expecting a {codestyle.type_(exp_type)}, not a tuple')
raise NotImplementedError(f'{node} as expr in FunctionDef')
def visit_Module_FunctionDef_Call(self, module: Module, function: Function, our_locals: OurLocals, node: ast.Call) -> Union[Fold, FunctionCall, UnaryOp]:
if node.keywords:
_raise_static_error(node, 'Keyword calling not supported') # Yet?
if not isinstance(node.func, ast.Name):
raise NotImplementedError(f'Calling methods that are not a name {node.func}')
if not isinstance(node.func.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if node.func.id in module.struct_definitions:
struct_definition = module.struct_definitions[node.func.id]
struct_constructor = StructConstructor(struct_definition.struct_type3)
# FIXME: Defer struct de-allocation
func = module.functions[struct_constructor.name]
elif node.func.id in WEBASSEMBLY_BUILTIN_FLOAT_OPS:
if 1 != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given')
return UnaryOp(
'sqrt',
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[0]),
)
elif node.func.id == 'u32':
if 1 != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given')
# FIXME: This is a stub, proper casting is todo
return UnaryOp(
'cast',
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[0]),
)
elif node.func.id == 'len':
if 1 != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given')
return UnaryOp(
'len',
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[0]),
)
elif node.func.id == 'foldl':
# TODO: This should a much more generic function!
# For development purposes, we're assuming you're doing a foldl(Callable[[u8, u8], u8], u8, bytes)
# In the future, we should probably infer the type of the second argument,
# and use it as expected types for the other u8s and the Iterable[u8] (i.e. bytes)
if 3 != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires 3 arguments but {len(node.args)} are given')
# TODO: This is not generic
subnode = node.args[0]
if not isinstance(subnode, ast.Name):
raise NotImplementedError(f'Calling methods that are not a name {subnode}')
if not isinstance(subnode.ctx, ast.Load):
_raise_static_error(subnode, 'Must be load context')
if subnode.id not in module.functions:
_raise_static_error(subnode, 'Reference to undefined function')
func = module.functions[subnode.id]
if 2 != len(func.posonlyargs):
_raise_static_error(node, f'Function {node.func.id} requires a function with 2 arguments but a function with {len(func.posonlyargs)} args is given')
raise NotImplementedError('TODO: Broken after new type system')
return Fold(
Fold.Direction.LEFT,
func,
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[1]),
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[2]),
)
else:
if node.func.id not in module.functions:
_raise_static_error(node, 'Call to undefined function')
func = module.functions[node.func.id]
if len(func.posonlyargs) != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires {len(func.posonlyargs)} arguments but {len(node.args)} are given')
result = FunctionCall(func)
result.arguments.extend(
self.visit_Module_FunctionDef_expr(module, function, our_locals, arg_expr)
for arg_expr, param in zip(node.args, func.posonlyargs)
)
return result
def visit_Module_FunctionDef_Attribute(self, module: Module, function: Function, our_locals: OurLocals, node: ast.Attribute) -> Expression:
del module
del function
if not isinstance(node.value, ast.Name):
_raise_static_error(node, 'Must reference a name')
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if not node.value.id in our_locals:
_raise_static_error(node, f'Undefined variable {node.value.id}')
param = our_locals[node.value.id]
node_typ = param.type3
if not isinstance(node_typ, type3types.StructType3):
_raise_static_error(node, f'Cannot take attribute of non-struct {node.value.id}')
member = node_typ.members.get(node.attr)
if member is None:
_raise_static_error(node, f'{node_typ.name} has no attribute {node.attr}')
return AccessStructMember(
VariableReference(param),
node_typ,
node.attr,
)
def visit_Module_FunctionDef_Subscript(self, module: Module, function: Function, our_locals: OurLocals, node: ast.Subscript) -> Expression:
if not isinstance(node.value, ast.Name):
_raise_static_error(node, 'Must reference a name')
if not isinstance(node.slice, ast.Index):
_raise_static_error(node, 'Must subscript using an index')
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
varref: VariableReference
if node.value.id in our_locals:
param = our_locals[node.value.id]
varref = VariableReference(param)
elif node.value.id in module.constant_defs:
constant_def = module.constant_defs[node.value.id]
varref = VariableReference(constant_def)
else:
_raise_static_error(node, f'Undefined variable {node.value.id}')
slice_expr = self.visit_Module_FunctionDef_expr(
module, function, our_locals, node.slice.value,
)
return Subscript(varref, slice_expr)
# if isinstance(node_typ, TypeBytes):
# if isinstance(varref, ModuleConstantReference):
# raise NotImplementedError(f'{node} from module constant')
#
# return AccessBytesIndex(
# varref,
# slice_expr,
# )
#
# if isinstance(node_typ, TypeTuple):
# if not isinstance(slice_expr, ConstantPrimitive):
# _raise_static_error(node, 'Must subscript using a constant index')
#
# idx = slice_expr.value
#
# if not isinstance(idx, int):
# _raise_static_error(node, 'Must subscript using a constant integer index')
#
# if not (0 <= idx < len(node_typ.members)):
# _raise_static_error(node, f'Index {idx} out of bounds for tuple {node.value.id}')
#
# tuple_member = node_typ.members[idx]
#
# if isinstance(varref, ModuleConstantReference):
# raise NotImplementedError(f'{node} from module constant')
#
# return AccessTupleMember(
# varref,
# tuple_member,
# )
#
# if isinstance(node_typ, TypeStaticArray):
# if not isinstance(slice_expr, ConstantPrimitive):
# return AccessStaticArrayMember(
# varref,
# node_typ,
# slice_expr,
# )
#
# idx = slice_expr.value
#
# if not isinstance(idx, int):
# _raise_static_error(node, 'Must subscript using an integer index')
#
# if not (0 <= idx < len(node_typ.members)):
# _raise_static_error(node, f'Index {idx} out of bounds for static array {node.value.id}')
#
# static_array_member = node_typ.members[idx]
#
# return AccessStaticArrayMember(
# varref,
# node_typ,
# static_array_member,
# )
#
# _raise_static_error(node, f'Cannot take index of {node_typ} {node.value.id}')
def visit_Module_Constant(self, module: Module, node: ast.Constant) -> ConstantPrimitive:
del module
_not_implemented(node.kind is None, 'Constant.kind')
if isinstance(node.value, (int, float, )):
return ConstantPrimitive(node.value)
raise NotImplementedError(f'{node.value} as constant')
def visit_type(self, module: Module, node: ast.expr) -> type3types.Type3:
if isinstance(node, ast.Constant):
if node.value is None:
return type3types.none
_raise_static_error(node, f'Unrecognized type {node.value}')
if isinstance(node, ast.Name):
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if node.id in type3types.LOOKUP_TABLE:
return type3types.LOOKUP_TABLE[node.id]
if node.id in module.struct_definitions:
return module.struct_definitions[node.id].struct_type3
_raise_static_error(node, f'Unrecognized type {node.id}')
if isinstance(node, ast.Subscript):
if not isinstance(node.value, ast.Name):
_raise_static_error(node, 'Must be name')
if not isinstance(node.slice, ast.Index):
_raise_static_error(node, 'Must subscript using an index')
if not isinstance(node.slice.value, ast.Constant):
_raise_static_error(node, 'Must subscript using a constant index')
if not isinstance(node.slice.value.value, int):
_raise_static_error(node, 'Must subscript using a constant integer index')
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if node.value.id not in type3types.LOOKUP_TABLE: # FIXME: Tuple of tuples?
_raise_static_error(node, f'Unrecognized type {node.value.id}')
return type3types.AppliedType3(
type3types.static_array,
[self.visit_type(module, node.value)],
)
if isinstance(node, ast.Tuple):
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
return type3types.AppliedType3(
type3types.tuple,
(self.visit_type(module, elt) for elt in node.elts)
)
raise NotImplementedError(f'{node} as type')
def _not_implemented(check: Any, msg: str) -> None:
if not check:
raise NotImplementedError(msg)
def _raise_static_error(node: Union[ast.mod, ast.stmt, ast.expr], msg: str) -> NoReturn:
raise StaticError(
f'Static error on line {node.lineno}: {msg}'
)