phasm/phasm/compiler.py
2025-08-23 15:33:23 +02:00

621 lines
21 KiB
Python

"""
This module contains the code to convert parsed Ourlang into WebAssembly code
"""
import struct
from dataclasses import dataclass
from typing import Any, List, TypeGuard
from . import ourlang, wasm
from .build.base import BuildBase, TypeInfo
from .build.typerouter import BuildTypeRouter
from .stdlib import alloc as stdlib_alloc
from .stdlib import types as stdlib_types
from .type5.constrainedexpr import ConstrainedExpr
from .type5.typeexpr import AtomicType, TypeApplication, TypeExpr, is_concrete
from .wasm import (
WasmTypeFloat32,
WasmTypeFloat64,
WasmTypeInt32,
WasmTypeInt64,
)
from .wasmgenerator import Generator as WasmGenerator
TYPE5_ASSERTION_ERROR = 'You must call phasm_type5 after calling phasm_parse before your program can be compiled'
def phasm_compile(inp: ourlang.Module[WasmGenerator]) -> wasm.Module:
"""
Public method for compiling a parsed Phasm module into
a WebAssembly module
"""
return module(inp)
def type5(mod: ourlang.Module[WasmGenerator], inp: TypeExpr) -> wasm.WasmType:
"""
Compile: type
Types are used for example in WebAssembly function parameters
and return types.
"""
io_arg = mod.build.type5_is_io(inp)
if io_arg is not None:
# IO is type constructor that only exists on the typing layer
inp = io_arg
typ_info = mod.build.type_info_map.get(inp.name)
if typ_info is None:
typ_info = mod.build.type_info_constructed
return typ_info.wasm_type()
@dataclass
class TupleInstantiationResult:
args: list[TypeExpr]
alloc_size: int
header_value: int | None
class TupleInstantiationRouter(BuildTypeRouter[TupleInstantiationResult]):
__slots__ = ('el_count', )
el_count: int
def __init__(self, build: BuildBase[Any], el_count: int) -> None:
super().__init__(build)
self.el_count = el_count
def when_dynamic_array(self, da_arg: TypeExpr) -> TupleInstantiationResult:
return TupleInstantiationResult(
args=[da_arg for _ in range(self.el_count)],
alloc_size=5 + self.el_count * self.build.type5_alloc_size_member(da_arg),
header_value=self.el_count,
)
def when_static_array(self, sa_len: int, sa_typ: TypeExpr) -> TupleInstantiationResult:
return TupleInstantiationResult(
args=[sa_typ for _ in range(sa_len)],
alloc_size=5 + sa_len * self.build.type5_alloc_size_member(sa_typ),
header_value=None,
)
def when_tuple(self, tp_args: list[TypeExpr]) -> TupleInstantiationResult:
return TupleInstantiationResult(
args=tp_args,
alloc_size=sum(
self.build.type5_alloc_size_member(x)
for x in tp_args
),
header_value=None,
)
def tuple_instantiation(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.TupleInstantiation) -> None:
"""
Compile: Instantiation (allocation) of a tuple
"""
assert _is_concrete(inp.type5), TYPE5_ASSERTION_ERROR
result = TupleInstantiationRouter(mod.build, len(inp.elements))(inp.type5)
comment_elements = ''
for element in inp.elements:
assert _is_concrete(element.type5), TYPE5_ASSERTION_ERROR
comment_elements += f'{mod.build.type5_name(element.type5)}, '
tmp_var = wgn.temp_var_i32('tuple_adr')
wgn.add_statement('nop', comment=f'{tmp_var.name} := ({comment_elements})')
# Allocated the required amounts of bytes in memory
wgn.i32.const(result.alloc_size)
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
if result.header_value is not None:
wgn.local.get(tmp_var)
wgn.i32.const(result.header_value )
wgn.i32.store()
# Store each element individually
offset = 0 if result.header_value is None else 4
for element in inp.elements:
assert _is_concrete(element.type5), TYPE5_ASSERTION_ERROR
exp_type_info = mod.build.type_info_map.get(element.type5.name)
if exp_type_info is None:
exp_type_info = mod.build.type_info_constructed
wgn.add_statement('nop', comment='PRE')
wgn.local.get(tmp_var)
expression(wgn, mod, element)
wgn.add_statement(exp_type_info.wasm_store_func, 'offset=' + str(offset))
wgn.add_statement('nop', comment='POST')
offset += mod.build.type5_alloc_size_member(element.type5)
# Return the allocated address
wgn.local.get(tmp_var)
def expression_subscript_tuple(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.Subscript) -> None:
assert isinstance(inp.index, ourlang.ConstantPrimitive)
assert isinstance(inp.index.value, int)
assert _is_concrete(inp.varref.type5), TYPE5_ASSERTION_ERROR
args = mod.build.type5_is_tuple(inp.varref.type5)
assert args is not None
offset = sum(map(
mod.build.type5_alloc_size_member,
args[0:inp.index.value]
))
el_type = args[inp.index.value]
el_type_info = mod.build.type_info_map.get(el_type.name)
if el_type_info is None:
el_type_info = mod.build.type_info_constructed
expression(wgn, mod, inp.varref)
wgn.add_statement(el_type_info.wasm_load_func, f'offset={offset}')
def expression_binary_op(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.BinaryOp) -> None:
expression_function_call(wgn, mod, _binary_op_to_function(inp))
def expression_function_call(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.FunctionCall) -> None:
for arg in inp.arguments:
expression(wgn, mod, arg)
if isinstance(inp.function_instance.function, ourlang.BuiltinFunction):
assert _is_concrete(inp.function_instance.type5), TYPE5_ASSERTION_ERROR
try:
method_type, method_router = mod.build.methods[inp.function_instance.function.name]
except KeyError:
method_type, method_router = mod.build.operators[inp.function_instance.function.name]
impl_lookup = method_router.get((inp.function_instance.type5, ))
assert impl_lookup is not None, (inp.function_instance.function.name, inp.function_instance.type5, )
kwargs, impl = impl_lookup
impl(wgn, kwargs)
return
if isinstance(inp.function_instance.function, ourlang.FunctionParam):
assert _is_concrete(inp.function_instance.type5), TYPE5_ASSERTION_ERROR
fn_args = mod.build.type5_is_function(inp.function_instance.type5)
assert fn_args is not None
params = [
type5(mod, x)
for x in fn_args
]
result = params.pop()
wgn.add_statement('local.get', '${}'.format(inp.function_instance.function.name))
wgn.call_indirect(params=params, result=result)
return
wgn.call(inp.function_instance.function.name)
def expression(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.Expression) -> None:
"""
Compile: Any expression
"""
if isinstance(inp, (ourlang.ConstantStruct, ourlang.ConstantTuple, )):
# These are implemented elsewhere
raise Exception
if isinstance(inp, ourlang.ConstantPrimitive):
assert _is_concrete(inp.type5), TYPE5_ASSERTION_ERROR
type_info = mod.build.type_info_map[inp.type5.name]
if type_info.wasm_type is WasmTypeInt32:
assert isinstance(inp.value, int)
wgn.i32.const(inp.value)
return
if type_info.wasm_type is WasmTypeInt64:
assert isinstance(inp.value, int)
wgn.i64.const(inp.value)
return
if type_info.wasm_type is WasmTypeFloat32:
assert isinstance(inp.value, float)
wgn.f32.const(inp.value)
return
if type_info.wasm_type is WasmTypeFloat64:
assert isinstance(inp.value, float)
wgn.f64.const(inp.value)
return
raise NotImplementedError(inp.type5)
if isinstance(inp, ourlang.ConstantBytes):
assert inp.data_block.address is not None, 'Value not allocated'
wgn.i32.const(inp.data_block.address)
return
if isinstance(inp, ourlang.VariableReference):
if isinstance(inp.variable, ourlang.FunctionParam):
wgn.add_statement('local.get', '${}'.format(inp.variable.name))
return
if isinstance(inp.variable, ourlang.ModuleConstantDef):
assert _is_concrete(inp.type5), TYPE5_ASSERTION_ERROR
if inp.type5.name not in mod.build.type_info_map:
assert isinstance(inp.variable.constant, (ourlang.ConstantBytes, ourlang.ConstantStruct, ourlang.ConstantTuple, ))
address = inp.variable.constant.data_block.address
assert address is not None, 'Value not allocated'
wgn.i32.const(address)
return
expression(wgn, mod, inp.variable.constant)
return
raise NotImplementedError(expression, inp.variable)
if isinstance(inp, ourlang.BinaryOp):
expression_binary_op(wgn, mod, inp)
return
if isinstance(inp, ourlang.FunctionCall):
expression_function_call(wgn, mod, inp)
return
if isinstance(inp, ourlang.FunctionReference):
idx = mod.functions_table.get(inp.function)
if idx is None:
idx = len(mod.functions_table)
mod.functions_table[inp.function] = idx
wgn.add_statement('i32.const', str(idx), comment=inp.function.name)
return
if isinstance(inp, ourlang.TupleInstantiation):
tuple_instantiation(wgn, mod, inp)
return
if isinstance(inp, ourlang.Subscript):
assert _is_concrete(inp.type5), TYPE5_ASSERTION_ERROR
assert _is_concrete(inp.varref.type5), TYPE5_ASSERTION_ERROR
assert _is_concrete(inp.index.type5), TYPE5_ASSERTION_ERROR
if mod.build.type5_is_tuple(inp.varref.type5):
expression_subscript_tuple(wgn, mod, inp)
return
inp_as_fc = ourlang.FunctionCall(
ourlang.FunctionInstance(
ourlang.BuiltinFunction('[]', mod.build.type_classes['Subscriptable'].operators['[]']),
inp.sourceref,
),
inp.sourceref,
)
inp_as_fc.arguments = [inp.varref, inp.index]
inp_as_fc.function_instance.type5 = mod.build.type5_make_function([
inp.varref.type5,
inp.index.type5,
inp.type5,
])
inp_as_fc.type5 = inp.type5
expression_function_call(wgn, mod, inp_as_fc)
return
if isinstance(inp, ourlang.AccessStructMember):
assert _is_concrete(inp.varref.type5), TYPE5_ASSERTION_ERROR
st_args = mod.build.type5_is_struct(inp.varref.type5)
assert st_args is not None
member_type = dict(st_args)[inp.member]
member_type_info = mod.build.type_info_map.get(member_type.name)
if member_type_info is None:
member_type_info = mod.build.type_info_constructed
offset = _type5_struct_offset(mod.build, st_args, inp.member)
expression(wgn, mod, inp.varref)
wgn.add_statement(member_type_info.wasm_load_func, 'offset=' + str(offset))
return
raise NotImplementedError(expression, inp)
def statement_return(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], fun: ourlang.Function, inp: ourlang.StatementReturn) -> None:
"""
Compile: Return statement
"""
# Support tail calls
# https://github.com/WebAssembly/tail-call
# These help a lot with some functional programming techniques
if isinstance(inp.value, ourlang.FunctionCall) and inp.value.function_instance.function is fun:
for arg in inp.value.arguments:
expression(wgn, mod, arg)
wgn.add_statement('return_call', '${}'.format(inp.value.function_instance.function.name))
return
expression(wgn, mod, inp.value)
wgn.return_()
def statement_if(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], fun: ourlang.Function, inp: ourlang.StatementIf) -> None:
"""
Compile: If statement
"""
expression(wgn, mod, inp.test)
with wgn.if_():
for stat in inp.statements:
statement(wgn, mod, fun, stat)
if inp.else_statements:
raise NotImplementedError
# yield wasm.Statement('else')
# for stat in inp.else_statements:
# statement(wgn, stat)
def statement_call(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], fun: ourlang.Function, inp: ourlang.StatementCall) -> None:
expression(wgn, mod, inp.call)
def statement(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], fun: ourlang.Function, inp: ourlang.Statement) -> None:
"""
Compile: any statement
"""
if isinstance(inp, ourlang.StatementReturn):
statement_return(wgn, mod, fun, inp)
return
if isinstance(inp, ourlang.StatementIf):
statement_if(wgn, mod, fun, inp)
return
if isinstance(inp, ourlang.StatementCall):
statement_call(wgn, mod, fun, inp)
return
if isinstance(inp, ourlang.StatementPass):
return
raise NotImplementedError(statement, inp)
def import_(mod: ourlang.Module[WasmGenerator], inp: ourlang.Function) -> wasm.Import:
"""
Compile: imported function
"""
assert inp.imported
assert _is_concrete(inp.type5), TYPE5_ASSERTION_ERROR
fn_args = mod.build.type5_is_function(inp.type5)
assert fn_args is not None
fn_ret = fn_args.pop()
return wasm.Import(
inp.imported,
inp.name,
inp.name,
[
(arg_name, type5(mod, arg_type5), )
for arg_name, arg_type5 in zip(inp.arg_names, fn_args, strict=True)
],
type5(mod, fn_ret)
)
def function(mod: ourlang.Module[WasmGenerator], inp: ourlang.Function) -> wasm.Function:
"""
Compile: function
"""
assert not inp.imported
assert _is_concrete(inp.type5), TYPE5_ASSERTION_ERROR
fn_args = mod.build.type5_is_function(inp.type5)
assert fn_args is not None
fn_ret = fn_args.pop()
wgn = WasmGenerator()
if isinstance(inp, ourlang.StructConstructor):
_generate_struct_constructor(wgn, mod, inp)
else:
for stat in inp.statements:
statement(wgn, mod, inp, stat)
return wasm.Function(
inp.name,
inp.name if inp.exported else None,
[
(arg_name, type5(mod, arg_type5), )
for arg_name, arg_type5 in zip(inp.arg_names, fn_args, strict=True)
],
[
(k, v.wasm_type(), )
for k, v in wgn.locals.items()
],
type5(mod, fn_ret),
wgn.statements
)
def module_data_primitive(type_info: TypeInfo, inp: int | float) -> bytes:
letter_map = {
(WasmTypeInt32, 1, False): 'B',
(WasmTypeInt32, 1, True): 'b',
(WasmTypeInt32, 2, False): 'H',
(WasmTypeInt32, 2, True): 'h',
(WasmTypeInt32, 4, False): 'I',
(WasmTypeInt32, 4, True): 'i',
(WasmTypeInt64, 8, False): 'Q',
(WasmTypeInt64, 8, True): 'q',
(WasmTypeFloat32, 4, None): 'f',
(WasmTypeFloat64, 8, None): 'd',
}
letter = letter_map[(type_info.wasm_type, type_info.alloc_size, type_info.signed, )]
return struct.pack(f'<{letter}', inp)
def module_data(mod: ourlang.Module[WasmGenerator], inp: ourlang.ModuleData) -> bytes:
"""
Compile: module data
"""
unalloc_ptr = stdlib_alloc.UNALLOC_PTR
u32_type_info = mod.build.type_info_map['u32']
ptr_type_info = mod.build.type_info_constructed
allocated_data = b''
for block in inp.blocks:
block.address = unalloc_ptr + 4 # 4 bytes for allocator header
data_list: List[bytes] = []
for constant in block.data:
assert _is_concrete(constant.type5), TYPE5_ASSERTION_ERROR
if isinstance(constant, ourlang.ConstantBytes):
data_list.append(module_data_primitive(u32_type_info, len(constant.value)))
data_list.append(constant.value)
continue
if isinstance(constant, ourlang.ConstantMemoryStored):
if block is constant.data_block:
raise NotImplementedError(block, constant)
# It's stored in a different block
# We only need to store its address
# This happens for example when a tuple refers
# to a bytes constant
assert constant.data_block.address is not None, 'Referred memory not yet stored'
data_list.append(module_data_primitive(ptr_type_info, constant.data_block.address))
continue
type_info = mod.build.type_info_map[constant.type5.name]
data_list.append(module_data_primitive(type_info, constant.value))
block_data = b''.join(data_list)
allocated_data += module_data_primitive(u32_type_info, len(block_data)) + block_data
unalloc_ptr += 4 + len(block_data)
return (
# Store that we've initialized the memory
module_data_primitive(u32_type_info, stdlib_alloc.IDENTIFIER)
# Store the first reserved i32
+ module_data_primitive(u32_type_info, 0)
# Store the pointer towards the first free block
# In this case, 0 since we haven't freed any blocks yet
+ module_data_primitive(u32_type_info, 0)
# Store the pointer towards the first unallocated block
# In this case the end of the stdlib.alloc header at the start
+ module_data_primitive(u32_type_info, unalloc_ptr)
# Store the actual data
+ allocated_data
)
def module(inp: ourlang.Module[WasmGenerator]) -> wasm.Module:
"""
Compile: module
"""
result = wasm.Module()
result.memory.data = module_data(inp, inp.data)
result.imports = [
import_(inp, x)
for x in inp.functions.values()
if x.imported
]
result.functions = [
stdlib_alloc.__find_free_block__,
stdlib_alloc.__alloc__,
stdlib_types.__alloc_bytes__,
stdlib_types.__u32_min__,
stdlib_types.__u64_min__,
stdlib_types.__i32_min__,
stdlib_types.__i64_min__,
stdlib_types.__u32_max__,
stdlib_types.__u64_max__,
stdlib_types.__i32_max__,
stdlib_types.__i64_max__,
stdlib_types.__i32_abs__,
stdlib_types.__i64_abs__,
stdlib_types.__u32_pow2__,
stdlib_types.__u8_rotl__,
stdlib_types.__u8_rotr__,
stdlib_types.__u16_rotl__,
stdlib_types.__u16_rotr__,
] + [
function(inp, x)
for x in inp.functions.values()
if not x.imported
]
# Do this after rendering the functions since that's what populates the tables
result.table = {
v: k.name
for k, v in inp.functions_table.items()
}
return result
def _generate_struct_constructor(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.StructConstructor) -> None:
st_args = mod.build.type5_is_struct(inp.struct_type5)
assert st_args is not None
tmp_var = wgn.temp_var_i32('struct_adr')
# Allocated the required amounts of bytes in memory
wgn.i32.const(mod.build.type5_alloc_size_root(inp.struct_type5))
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
# Store each member individually
offset = 0
for memname, mtyp5 in st_args:
mtyp5_info = mod.build.type_info_map.get(mtyp5.name)
if mtyp5_info is None:
mtyp5_info = mod.build.type_info_constructed
wgn.local.get(tmp_var)
wgn.add_statement('local.get', f'${memname}')
wgn.add_statement(mtyp5_info.wasm_store_func, 'offset=' + str(offset))
offset += mod.build.type5_alloc_size_member(mtyp5)
# Return the allocated address
wgn.local.get(tmp_var)
def _is_concrete(type5: TypeExpr | ConstrainedExpr | None) -> TypeGuard[TypeExpr]:
if type5 is None:
return False
if isinstance(type5, ConstrainedExpr):
type5 = type5.expr
return is_concrete(type5)
def _type5_struct_offset(
build: BuildBase[Any],
fields: tuple[tuple[str, AtomicType | TypeApplication], ...],
needle: str,
) -> int:
"""
Calculates the amount of bytes that should be skipped in memory befor reaching the struct's property with the given name.
"""
result = 0
for memnam, memtyp in fields:
if needle == memnam:
return result
result += build.type5_alloc_size_member(memtyp)
raise RuntimeError('Member not found')
def _binary_op_to_function(inp: ourlang.BinaryOp) -> ourlang.FunctionCall:
"""
For compilation purposes, a binary operator is just a function call.
It's only syntactic sugar - e.g. `1 + 2` vs `+(1, 2)`
"""
assert inp.sourceref is not None # TODO: sourceref required
call = ourlang.FunctionCall(inp.operator, inp.sourceref)
call.arguments = [inp.left, inp.right]
return call