phasm/phasm/parser.py
2023-04-10 15:24:56 +02:00

641 lines
24 KiB
Python

"""
Parses the source code from the plain text into a syntax tree
"""
from typing import Any, Dict, NoReturn, Union
import ast
from .type3 import types as type3types
from .exceptions import StaticError
from .ourlang import (
WEBASSEMBLY_BUILTIN_FLOAT_OPS,
Module, ModuleDataBlock,
Function,
Expression,
BinaryOp,
ConstantPrimitive, ConstantTuple, ConstantStruct,
TupleInstantiation,
FunctionCall, AccessStructMember, Subscript,
StructDefinition, StructConstructor,
UnaryOp, VariableReference,
Fold,
Statement,
StatementIf, StatementPass, StatementReturn,
StatementExpression,
FunctionParam,
ModuleConstantDef,
)
def phasm_parse(source: str) -> Module:
"""
Public method for parsing Phasm code into a Phasm Module
"""
res = ast.parse(source, '')
our_visitor = OurVisitor()
return our_visitor.visit_Module(res)
OurLocals = Dict[str, Union[FunctionParam]] # FIXME: Does it become easier if we add ModuleConstantDef to this dict?
class OurVisitor:
"""
Class to visit a Python syntax tree and create an ourlang syntax tree
We're (ab)using the Python AST parser to give us a leg up
At some point, we may deviate from Python syntax. If nothing else,
we probably won't keep up with the Python syntax changes.
"""
# pylint: disable=C0103,C0116,C0301,R0201,R0912
def __init__(self) -> None:
pass
def visit_Module(self, node: ast.Module) -> Module:
module = Module()
_not_implemented(not node.type_ignores, 'Module.type_ignores')
# Second pass for the types
for stmt in node.body:
res = self.pre_visit_Module_stmt(module, stmt)
if isinstance(res, ModuleConstantDef):
if res.name in module.constant_defs:
raise StaticError(
f'{res.name} already defined on line {module.constant_defs[res.name].lineno}'
)
module.constant_defs[res.name] = res
if isinstance(res, StructDefinition):
if res.struct_type3.name in module.struct_definitions:
raise StaticError(
f'{res.struct_type3.name} already defined on line {module.struct_definitions[res.struct_type3.name].lineno}'
)
module.struct_definitions[res.struct_type3.name] = res
constructor = StructConstructor(res.struct_type3)
module.functions[constructor.name] = constructor
if isinstance(res, Function):
if res.name in module.functions:
raise StaticError(
f'{res.name} already defined on line {module.functions[res.name].lineno}'
)
module.functions[res.name] = res
# Second pass for the function bodies
for stmt in node.body:
self.visit_Module_stmt(module, stmt)
return module
def pre_visit_Module_stmt(self, module: Module, node: ast.stmt) -> Union[Function, StructDefinition, ModuleConstantDef]:
if isinstance(node, ast.FunctionDef):
return self.pre_visit_Module_FunctionDef(module, node)
if isinstance(node, ast.ClassDef):
return self.pre_visit_Module_ClassDef(module, node)
if isinstance(node, ast.AnnAssign):
return self.pre_visit_Module_AnnAssign(module, node)
raise NotImplementedError(f'{node} on Module')
def pre_visit_Module_FunctionDef(self, module: Module, node: ast.FunctionDef) -> Function:
function = Function(node.name, node.lineno)
_not_implemented(not node.args.posonlyargs, 'FunctionDef.args.posonlyargs')
for arg in node.args.args:
function.posonlyargs.append(FunctionParam(
arg.arg,
self.visit_type(module, arg.annotation) if arg.annotation else None,
))
_not_implemented(not node.args.vararg, 'FunctionDef.args.vararg')
_not_implemented(not node.args.kwonlyargs, 'FunctionDef.args.kwonlyargs')
_not_implemented(not node.args.kw_defaults, 'FunctionDef.args.kw_defaults')
_not_implemented(not node.args.kwarg, 'FunctionDef.args.kwarg')
_not_implemented(not node.args.defaults, 'FunctionDef.args.defaults')
# Do stmts at the end so we have the return value
for decorator in node.decorator_list:
if not isinstance(decorator, ast.Name):
_raise_static_error(decorator, 'Function decorators must be string')
if not isinstance(decorator.ctx, ast.Load):
_raise_static_error(decorator, 'Must be load context')
_not_implemented(decorator.id in ('exported', 'imported'), 'Custom decorators')
if decorator.id == 'exported':
function.exported = True
else:
function.imported = True
if node.returns is not None: # Note: `-> None` would be a ast.Constant
function.returns_type3 = self.visit_type(module, node.returns)
else:
# FIXME: Mostly works already, needs to fix Function.returns_type3 and have it updated
raise NotImplementedError('Function without an explicit return type')
_not_implemented(not node.type_comment, 'FunctionDef.type_comment')
return function
def pre_visit_Module_ClassDef(self, module: Module, node: ast.ClassDef) -> StructDefinition:
_not_implemented(not node.bases, 'ClassDef.bases')
_not_implemented(not node.keywords, 'ClassDef.keywords')
_not_implemented(not node.decorator_list, 'ClassDef.decorator_list')
members: Dict[str, type3types.Type3] = {}
for stmt in node.body:
if not isinstance(stmt, ast.AnnAssign):
raise NotImplementedError(f'Class with {stmt} nodes')
if not isinstance(stmt.target, ast.Name):
raise NotImplementedError('Class with default values')
if not stmt.value is None:
raise NotImplementedError('Class with default values')
if stmt.simple != 1:
raise NotImplementedError('Class with non-simple arguments')
if stmt.target.id in members:
_raise_static_error(stmt, 'Struct members must have unique names')
members[stmt.target.id] = self.visit_type(module, stmt.annotation)
return StructDefinition(type3types.StructType3(node.name, members), node.lineno)
def pre_visit_Module_AnnAssign(self, module: Module, node: ast.AnnAssign) -> ModuleConstantDef:
if not isinstance(node.target, ast.Name):
_raise_static_error(node.target, 'Must be name')
if not isinstance(node.target.ctx, ast.Store):
_raise_static_error(node.target, 'Must be store context')
if isinstance(node.value, ast.Constant):
type3 = self.visit_type(module, node.annotation)
return ModuleConstantDef(
node.target.id,
node.lineno,
type3,
self.visit_Module_Constant(module, node.value),
None,
)
if isinstance(node.value, ast.Tuple):
tuple_data = [
self.visit_Module_Constant(module, arg_node)
for arg_node in node.value.elts
if isinstance(arg_node, ast.Constant)
]
if len(node.value.elts) != len(tuple_data):
_raise_static_error(node, 'Tuple arguments must be constants')
# Allocate the data
data_block = ModuleDataBlock(tuple_data)
module.data.blocks.append(data_block)
# Then return the constant as a pointer
return ModuleConstantDef(
node.target.id,
node.lineno,
self.visit_type(module, node.annotation),
ConstantTuple(tuple_data),
data_block,
)
if isinstance(node.value, ast.Call):
# Struct constant
# Stored in memory like a tuple, so much of the code is the same
if not isinstance(node.value.func, ast.Name):
_raise_static_error(node.value.func, 'Must be name')
if not isinstance(node.value.func.ctx, ast.Load):
_raise_static_error(node.value.func, 'Must be load context')
if not node.value.func.id in module.struct_definitions:
_raise_static_error(node.value.func, 'Undefined struct')
if node.value.keywords:
_raise_static_error(node.value.func, 'Cannot use keywords')
if not isinstance(node.annotation, ast.Name):
_raise_static_error(node.annotation, 'Must be name')
struct_data = [
self.visit_Module_Constant(module, arg_node)
for arg_node in node.value.args
if isinstance(arg_node, ast.Constant)
]
if len(node.value.args) != len(struct_data):
_raise_static_error(node, 'Struct arguments must be constants')
# Allocate the data
data_block = ModuleDataBlock(struct_data)
module.data.blocks.append(data_block)
# Then return the constant as a pointer
return ModuleConstantDef(
node.target.id,
node.lineno,
self.visit_type(module, node.annotation),
ConstantStruct(node.value.func.id, struct_data),
data_block,
)
raise NotImplementedError(f'{node} on Module AnnAssign')
def visit_Module_stmt(self, module: Module, node: ast.stmt) -> None:
if isinstance(node, ast.FunctionDef):
self.visit_Module_FunctionDef(module, node)
return
if isinstance(node, ast.ClassDef):
return
if isinstance(node, ast.AnnAssign):
return
raise NotImplementedError(f'{node} on Module')
def visit_Module_FunctionDef(self, module: Module, node: ast.FunctionDef) -> None:
function = module.functions[node.name]
our_locals: OurLocals = {
x.name: x
for x in function.posonlyargs
}
for stmt in node.body:
function.statements.append(
self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt)
)
def visit_Module_FunctionDef_stmt(self, module: Module, function: Function, our_locals: OurLocals, node: ast.stmt) -> Statement:
if isinstance(node, ast.Return):
if node.value is None:
# TODO: Implement methods without return values
_raise_static_error(node, 'Return must have an argument')
return StatementReturn(
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.value)
)
if isinstance(node, ast.If):
result = StatementIf(
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.test)
)
for stmt in node.body:
result.statements.append(
self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt)
)
for stmt in node.orelse:
result.else_statements.append(
self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt)
)
return result
if isinstance(node, ast.Pass):
return StatementPass()
if isinstance(node, ast.Expr):
return StatementExpression(self.visit_Module_FunctionDef_expr(module, function, our_locals, node.value))
raise NotImplementedError(f'{node} as stmt in FunctionDef')
def visit_Module_FunctionDef_expr(self, module: Module, function: Function, our_locals: OurLocals, node: ast.expr) -> Expression:
if isinstance(node, ast.BinOp):
if isinstance(node.op, ast.Add):
operator = '+'
elif isinstance(node.op, ast.Sub):
operator = '-'
elif isinstance(node.op, ast.Mult):
operator = '*'
elif isinstance(node.op, ast.Div):
operator = '/'
elif isinstance(node.op, ast.LShift):
operator = '<<'
elif isinstance(node.op, ast.RShift):
operator = '>>'
elif isinstance(node.op, ast.BitOr):
operator = '|'
elif isinstance(node.op, ast.BitXor):
operator = '^'
elif isinstance(node.op, ast.BitAnd):
operator = '&'
else:
raise NotImplementedError(f'Operator {node.op}')
return BinaryOp(
operator,
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.left),
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.right),
)
if isinstance(node, ast.UnaryOp):
if isinstance(node.op, ast.UAdd):
operator = '+'
elif isinstance(node.op, ast.USub):
operator = '-'
else:
raise NotImplementedError(f'Operator {node.op}')
return UnaryOp(
operator,
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.operand),
)
if isinstance(node, ast.Compare):
if 1 < len(node.ops):
raise NotImplementedError('Multiple operators')
if isinstance(node.ops[0], ast.Gt):
operator = '>'
elif isinstance(node.ops[0], ast.Eq):
operator = '=='
elif isinstance(node.ops[0], ast.Lt):
operator = '<'
else:
raise NotImplementedError(f'Operator {node.ops}')
return BinaryOp(
operator,
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.left),
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.comparators[0]),
)
if isinstance(node, ast.Call):
return self.visit_Module_FunctionDef_Call(module, function, our_locals, node)
if isinstance(node, ast.Constant):
return self.visit_Module_Constant(
module, node,
)
if isinstance(node, ast.Attribute):
return self.visit_Module_FunctionDef_Attribute(
module, function, our_locals, node,
)
if isinstance(node, ast.Subscript):
return self.visit_Module_FunctionDef_Subscript(
module, function, our_locals, node,
)
if isinstance(node, ast.Name):
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if node.id in our_locals:
param = our_locals[node.id]
return VariableReference(param)
if node.id in module.constant_defs:
cdef = module.constant_defs[node.id]
return VariableReference(cdef)
_raise_static_error(node, f'Undefined variable {node.id}')
if isinstance(node, ast.Tuple):
arguments = [
self.visit_Module_FunctionDef_expr(module, function, our_locals, arg_node)
for arg_node in node.elts
if isinstance(arg_node, ast.Constant)
]
if len(arguments) != len(node.elts):
raise NotImplementedError('Non-constant tuple members')
return TupleInstantiation(arguments)
raise NotImplementedError(f'{node} as expr in FunctionDef')
def visit_Module_FunctionDef_Call(self, module: Module, function: Function, our_locals: OurLocals, node: ast.Call) -> Union[Fold, FunctionCall, UnaryOp]:
if node.keywords:
_raise_static_error(node, 'Keyword calling not supported') # Yet?
if not isinstance(node.func, ast.Name):
raise NotImplementedError(f'Calling methods that are not a name {node.func}')
if not isinstance(node.func.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if node.func.id in module.struct_definitions:
struct_definition = module.struct_definitions[node.func.id]
struct_constructor = StructConstructor(struct_definition.struct_type3)
# FIXME: Defer struct de-allocation
func = module.functions[struct_constructor.name]
elif node.func.id in WEBASSEMBLY_BUILTIN_FLOAT_OPS:
if 1 != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given')
return UnaryOp(
'sqrt',
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[0]),
)
elif node.func.id == 'u32':
if 1 != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given')
return UnaryOp(
'cast',
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[0]),
)
elif node.func.id == 'len':
if 1 != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given')
return UnaryOp(
'len',
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[0]),
)
elif node.func.id == 'foldl':
if 3 != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires 3 arguments but {len(node.args)} are given')
# TODO: This is not generic, you cannot return a function
subnode = node.args[0]
if not isinstance(subnode, ast.Name):
raise NotImplementedError(f'Calling methods that are not a name {subnode}')
if not isinstance(subnode.ctx, ast.Load):
_raise_static_error(subnode, 'Must be load context')
if subnode.id not in module.functions:
_raise_static_error(subnode, 'Reference to undefined function')
func = module.functions[subnode.id]
if 2 != len(func.posonlyargs):
_raise_static_error(node, f'Function {node.func.id} requires a function with 2 arguments but a function with {len(func.posonlyargs)} args is given')
return Fold(
Fold.Direction.LEFT,
func,
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[1]),
self.visit_Module_FunctionDef_expr(module, function, our_locals, node.args[2]),
)
else:
if node.func.id not in module.functions:
_raise_static_error(node, 'Call to undefined function')
func = module.functions[node.func.id]
if len(func.posonlyargs) != len(node.args):
_raise_static_error(node, f'Function {node.func.id} requires {len(func.posonlyargs)} arguments but {len(node.args)} are given')
result = FunctionCall(func)
result.arguments.extend(
self.visit_Module_FunctionDef_expr(module, function, our_locals, arg_expr)
for arg_expr, param in zip(node.args, func.posonlyargs)
)
return result
def visit_Module_FunctionDef_Attribute(self, module: Module, function: Function, our_locals: OurLocals, node: ast.Attribute) -> Expression:
if not isinstance(node.value, ast.Name):
_raise_static_error(node, 'Must reference a name')
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
varref = self.visit_Module_FunctionDef_expr(module, function, our_locals, node.value)
if not isinstance(varref, VariableReference):
_raise_static_error(node.value, 'Must refer to variable')
if not isinstance(varref.variable.type3, type3types.StructType3):
_raise_static_error(node.value, 'Must refer to struct')
return AccessStructMember(
varref,
varref.variable.type3,
node.attr,
)
def visit_Module_FunctionDef_Subscript(self, module: Module, function: Function, our_locals: OurLocals, node: ast.Subscript) -> Expression:
if not isinstance(node.value, ast.Name):
_raise_static_error(node, 'Must reference a name')
if isinstance(node.slice, ast.Slice):
_raise_static_error(node, 'Must subscript using an index')
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
varref: VariableReference
if node.value.id in our_locals:
param = our_locals[node.value.id]
varref = VariableReference(param)
elif node.value.id in module.constant_defs:
constant_def = module.constant_defs[node.value.id]
varref = VariableReference(constant_def)
else:
_raise_static_error(node, f'Undefined variable {node.value.id}')
slice_expr = self.visit_Module_FunctionDef_expr(
module, function, our_locals, node.slice,
)
return Subscript(varref, slice_expr)
def visit_Module_Constant(self, module: Module, node: ast.Constant) -> ConstantPrimitive:
del module
_not_implemented(node.kind is None, 'Constant.kind')
if isinstance(node.value, (int, float, )):
return ConstantPrimitive(node.value)
raise NotImplementedError(f'{node.value} as constant')
def visit_type(self, module: Module, node: ast.expr) -> type3types.Type3:
if isinstance(node, ast.Call):
if not isinstance(node.func, ast.Name):
_raise_static_error(node, 'Can only call Monads by name')
if node.keywords:
_raise_static_error(node, 'Monads cannot have keyword arguments')
monad_type = self.visit_type(module, node.func)
if not isinstance(monad_type, type3types.MonadType3):
_raise_static_error(node, 'Must be a Monad')
return type3types.AppliedType3(
monad_type,
(self.visit_type(module, x) for x in node.args)
)
if isinstance(node, ast.Constant):
if node.value is None:
return type3types.none
_raise_static_error(node, f'Unrecognized type {node.value}')
if isinstance(node, ast.Name):
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if node.id in type3types.LOOKUP_TABLE:
return type3types.LOOKUP_TABLE[node.id]
if node.id in module.struct_definitions:
return module.struct_definitions[node.id].struct_type3
_raise_static_error(node, f'Unrecognized type {node.id}')
if isinstance(node, ast.Subscript):
if not isinstance(node.value, ast.Name):
_raise_static_error(node, 'Must be name')
if isinstance(node.slice, ast.Slice):
_raise_static_error(node, 'Must subscript using an index')
if not isinstance(node.slice, ast.Constant):
_raise_static_error(node, 'Must subscript using a constant index')
if not isinstance(node.slice.value, int):
_raise_static_error(node, 'Must subscript using a constant integer index')
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
if node.value.id not in type3types.LOOKUP_TABLE: # FIXME: Tuple of tuples?
_raise_static_error(node, f'Unrecognized type {node.value.id}')
return type3types.AppliedType3(
type3types.static_array,
[self.visit_type(module, node.value), type3types.IntType3(node.slice.value)],
)
if isinstance(node, ast.Tuple):
if not isinstance(node.ctx, ast.Load):
_raise_static_error(node, 'Must be load context')
return type3types.AppliedType3(
type3types.tuple,
(self.visit_type(module, elt) for elt in node.elts)
)
raise NotImplementedError(f'{node} as type')
def _not_implemented(check: Any, msg: str) -> None:
if not check:
raise NotImplementedError(msg)
def _raise_static_error(node: Union[ast.mod, ast.stmt, ast.expr], msg: str) -> NoReturn:
raise StaticError(
f'Static error on line {node.lineno}: {msg}'
)