phasm/phasm/compiler.py
Johan B.W. de Vries 4a1b4bfaa1 Notes
2025-08-01 18:51:58 +02:00

593 lines
20 KiB
Python

"""
This module contains the code to convert parsed Ourlang into WebAssembly code
"""
import struct
from typing import List
from . import ourlang, wasm
from .build.base import TypeInfo
from .stdlib import alloc as stdlib_alloc
from .stdlib import types as stdlib_types
from .type3.functions import FunctionArgument, TypeVariable
from .type3.routers import NoRouteForTypeException
from .type3.typeclasses import Type3ClassMethod
from .type3.types import (
Type3,
TypeApplication_Struct,
TypeApplication_Type,
TypeApplication_TypeInt,
TypeApplication_TypeStar,
TypeConstructor_DynamicArray,
TypeConstructor_Function,
TypeConstructor_StaticArray,
TypeConstructor_Tuple,
)
from .wasm import (
WasmTypeFloat32,
WasmTypeFloat64,
WasmTypeInt32,
WasmTypeInt64,
)
from .wasmgenerator import Generator as WasmGenerator
TYPE3_ASSERTION_ERROR = 'You must call phasm_type3 after calling phasm_parse before your program can be compiled'
def phasm_compile(inp: ourlang.Module[WasmGenerator]) -> wasm.Module:
"""
Public method for compiling a parsed Phasm module into
a WebAssembly module
"""
return module(inp)
def type3(mod: ourlang.Module[WasmGenerator], inp: Type3) -> wasm.WasmType:
"""
Compile: type
Types are used for example in WebAssembly function parameters
and return types.
"""
typ_info = mod.build.type_info_map.get(inp.name, )
if typ_info is None:
typ_info = mod.build.type_info_map['ptr']
return typ_info.wasm_type()
def tuple_instantiation(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.TupleInstantiation) -> None:
"""
Compile: Instantiation (allocation) of a tuple
"""
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
args: tuple[Type3, ...]
alloc_size_header = None
if isinstance(inp.type3.application, TypeApplication_Type):
# Possibly paranoid assert. If we have a future variadic type,
# does it also do this tuple instantation like this?
assert isinstance(inp.type3.application.constructor, TypeConstructor_DynamicArray)
sa_type, = inp.type3.application.arguments
args = tuple(sa_type for _ in inp.elements)
# Can't use calculate_alloc_size directly since that doesn't
# know the dynamic array's length
alloc_size = 4 + mod.build.calculate_alloc_size(sa_type, is_member=True) * len(inp.elements)
alloc_size_header = len(inp.elements)
elif isinstance(inp.type3.application, TypeApplication_TypeStar):
# Possibly paranoid assert. If we have a future variadic type,
# does it also do this tuple instantation like this?
assert isinstance(inp.type3.application.constructor, TypeConstructor_Tuple)
args = inp.type3.application.arguments
alloc_size = mod.build.calculate_alloc_size(inp.type3, is_member=False)
elif isinstance(inp.type3.application, TypeApplication_TypeInt):
# Possibly paranoid assert. If we have a future type of kind * -> Int -> *,
# does it also do this tuple instantation like this?
assert isinstance(inp.type3.application.constructor, TypeConstructor_StaticArray)
sa_type, sa_len = inp.type3.application.arguments
args = tuple(sa_type for _ in range(sa_len.value))
alloc_size = mod.build.calculate_alloc_size(inp.type3, is_member=False)
else:
raise NotImplementedError('tuple_instantiation', inp.type3)
comment_elements = ''
for element in inp.elements:
assert element.type3 is not None, TYPE3_ASSERTION_ERROR
comment_elements += f'{element.type3.name}, '
tmp_var = wgn.temp_var_i32('tuple_adr')
wgn.add_statement('nop', comment=f'{tmp_var.name} := ({comment_elements})')
# Allocated the required amounts of bytes in memory
wgn.i32.const(alloc_size)
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
if alloc_size_header is not None:
wgn.local.get(tmp_var)
wgn.i32.const(alloc_size_header)
wgn.i32.store()
# Store each element individually
offset = 0 if alloc_size_header is None else 4
for element, exp_type3 in zip(inp.elements, args, strict=True):
assert element.type3 == exp_type3
exp_type_info = mod.build.type_info_map.get(exp_type3.name)
if exp_type_info is None:
exp_type_info = mod.build.type_info_map['ptr']
wgn.add_statement('nop', comment='PRE')
wgn.local.get(tmp_var)
expression(wgn, mod, element)
wgn.add_statement(exp_type_info.wasm_store_func, 'offset=' + str(offset))
wgn.add_statement('nop', comment='POST')
offset += mod.build.calculate_alloc_size(exp_type3, is_member=True)
# Return the allocated address
wgn.local.get(tmp_var)
def expression_subscript_tuple(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.Subscript) -> None:
assert isinstance(inp.index, ourlang.ConstantPrimitive)
assert isinstance(inp.index.value, int)
assert inp.varref.type3 is not None, TYPE3_ASSERTION_ERROR
assert isinstance(inp.varref.type3.application, TypeApplication_TypeStar)
args = inp.varref.type3.application.arguments
offset = 0
for el_type in args[0:inp.index.value]:
assert el_type is not None, TYPE3_ASSERTION_ERROR
el_type_info = mod.build.type_info_map.get(el_type.name)
if el_type_info is None:
el_type_info = mod.build.type_info_map['ptr']
offset += el_type_info.alloc_size
el_type = args[inp.index.value]
assert el_type is not None, TYPE3_ASSERTION_ERROR
expression(wgn, mod, inp.varref)
el_type_info = mod.build.type_info_map.get(el_type.name)
if el_type_info is None:
el_type_info = mod.build.type_info_map['ptr']
wgn.add_statement(el_type_info.wasm_load_func, f'offset={offset}')
def expression(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.Expression) -> None:
"""
Compile: Any expression
"""
if isinstance(inp, (ourlang.ConstantStruct, ourlang.ConstantTuple, )):
# These are implemented elsewhere
raise Exception
if isinstance(inp, ourlang.ConstantPrimitive):
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
type_info = mod.build.type_info_map[inp.type3.name]
if type_info.wasm_type is WasmTypeInt32:
assert isinstance(inp.value, int)
wgn.i32.const(inp.value)
return
if type_info.wasm_type is WasmTypeInt64:
assert isinstance(inp.value, int)
wgn.i64.const(inp.value)
return
if type_info.wasm_type is WasmTypeFloat32:
assert isinstance(inp.value, float)
wgn.f32.const(inp.value)
return
if type_info.wasm_type is WasmTypeFloat64:
assert isinstance(inp.value, float)
wgn.f64.const(inp.value)
return
raise NotImplementedError(f'Constants with type {inp.type3:s}')
if isinstance(inp, ourlang.ConstantBytes):
assert inp.data_block.address is not None, 'Value not allocated'
wgn.i32.const(inp.data_block.address)
return
if isinstance(inp, ourlang.VariableReference):
if isinstance(inp.variable, ourlang.FunctionParam):
wgn.add_statement('local.get', '${}'.format(inp.variable.name))
return
if isinstance(inp.variable, ourlang.ModuleConstantDef):
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
if inp.type3.name not in mod.build.type_info_map:
assert isinstance(inp.variable.constant, (ourlang.ConstantBytes, ourlang.ConstantStruct, ourlang.ConstantTuple, ))
address = inp.variable.constant.data_block.address
assert address is not None, 'Value not allocated'
wgn.i32.const(address)
return
expression(wgn, mod, inp.variable.constant)
return
raise NotImplementedError(expression, inp.variable)
if isinstance(inp, ourlang.BinaryOp):
expression(wgn, mod, inp.left)
expression(wgn, mod, inp.right)
type_var_map: dict[TypeVariable, Type3] = {}
for type_var, arg_expr in zip(inp.operator.signature.args, [inp.left, inp.right, inp], strict=True):
assert arg_expr.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(type_var, Type3):
# Fixed type, not part of the lookup requirements
continue
if isinstance(type_var, TypeVariable):
type_var_map[type_var] = arg_expr.type3
continue
if isinstance(type_var, FunctionArgument):
# Fixed type, not part of the lookup requirements
continue
raise NotImplementedError(type_var, arg_expr.type3)
router = mod.build.type_class_instance_methods[inp.operator]
router(wgn, type_var_map)
return
if isinstance(inp, ourlang.FunctionCall):
for arg in inp.arguments:
expression(wgn, mod, arg)
if isinstance(inp.function, Type3ClassMethod):
# FIXME: Duplicate code with BinaryOp
type_var_map = {}
for type_var, arg_expr in zip(inp.function.signature.args, inp.arguments + [inp], strict=True):
assert arg_expr.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(type_var, Type3):
# Fixed type, not part of the lookup requirements
continue
if isinstance(type_var, TypeVariable):
type_var_map[type_var] = arg_expr.type3
continue
if isinstance(type_var, FunctionArgument):
# Fixed type, not part of the lookup requirements
continue
raise NotImplementedError(type_var, arg_expr.type3)
router = mod.build.type_class_instance_methods[inp.function]
try:
router(wgn, type_var_map)
except NoRouteForTypeException:
raise NotImplementedError(str(inp.function), type_var_map)
return
if isinstance(inp.function, ourlang.FunctionParam):
assert isinstance(inp.function.type3.application.constructor, TypeConstructor_Function)
params = [
type3(mod, x)
for x in inp.function.type3.application.arguments
]
result = params.pop()
wgn.add_statement('local.get', '${}'.format(inp.function.name))
wgn.call_indirect(params=params, result=result)
return
wgn.call(inp.function.name)
return
if isinstance(inp, ourlang.FunctionReference):
idx = mod.functions_table.get(inp.function)
if idx is None:
idx = len(mod.functions_table)
mod.functions_table[inp.function] = idx
wgn.add_statement('i32.const', str(idx), comment=inp.function.name)
return
if isinstance(inp, ourlang.TupleInstantiation):
tuple_instantiation(wgn, mod, inp)
return
if isinstance(inp, ourlang.Subscript):
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
assert inp.varref.type3 is not None, TYPE3_ASSERTION_ERROR
if inp.varref.type3.application.constructor is mod.build.tuple_:
expression_subscript_tuple(wgn, mod, inp)
return
assert inp.sourceref is not None # TODO: Remove this
inp_as_fc = ourlang.FunctionCall(mod.build.type_classes['Subscriptable'].operators['[]'], inp.sourceref)
inp_as_fc.type3 = inp.type3
inp_as_fc.arguments = [inp.varref, inp.index]
expression(wgn, mod, inp_as_fc)
return
if isinstance(inp, ourlang.AccessStructMember):
assert inp.struct_type3 is not None, TYPE3_ASSERTION_ERROR
assert isinstance(inp.struct_type3.application, TypeApplication_Struct)
member_type = dict(inp.struct_type3.application.arguments)[inp.member]
member_type_info = mod.build.type_info_map.get(member_type.name)
if member_type_info is None:
member_type_info = mod.build.type_info_map['ptr']
expression(wgn, mod, inp.varref)
wgn.add_statement(member_type_info.wasm_load_func, 'offset=' + str(mod.build.calculate_member_offset(
inp.struct_type3.name, inp.struct_type3.application.arguments, inp.member
)))
return
raise NotImplementedError(expression, inp)
def statement_return(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], fun: ourlang.Function, inp: ourlang.StatementReturn) -> None:
"""
Compile: Return statement
"""
# Support tail calls
# https://github.com/WebAssembly/tail-call
# These help a lot with some functional programming techniques
if isinstance(inp.value, ourlang.FunctionCall) and inp.value.function is fun:
for arg in inp.value.arguments:
expression(wgn, mod, arg)
wgn.add_statement('return_call', '${}'.format(inp.value.function.name))
return
expression(wgn, mod, inp.value)
wgn.return_()
def statement_if(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], fun: ourlang.Function, inp: ourlang.StatementIf) -> None:
"""
Compile: If statement
"""
expression(wgn, mod, inp.test)
with wgn.if_():
for stat in inp.statements:
statement(wgn, mod, fun, stat)
if inp.else_statements:
raise NotImplementedError
# yield wasm.Statement('else')
# for stat in inp.else_statements:
# statement(wgn, stat)
def statement(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], fun: ourlang.Function, inp: ourlang.Statement) -> None:
"""
Compile: any statement
"""
if isinstance(inp, ourlang.StatementReturn):
statement_return(wgn, mod, fun, inp)
return
if isinstance(inp, ourlang.StatementIf):
statement_if(wgn, mod, fun, inp)
return
if isinstance(inp, ourlang.StatementPass):
return
raise NotImplementedError(statement, inp)
def function_argument(mod: ourlang.Module[WasmGenerator], inp: ourlang.FunctionParam) -> wasm.Param:
"""
Compile: function argument
"""
return (inp.name, type3(mod, inp.type3), )
def import_(mod: ourlang.Module[WasmGenerator], inp: ourlang.Function) -> wasm.Import:
"""
Compile: imported function
"""
assert inp.imported
return wasm.Import(
inp.imported,
inp.name,
inp.name,
[
function_argument(mod, x)
for x in inp.posonlyargs
],
type3(mod, inp.returns_type3)
)
def function(mod: ourlang.Module[WasmGenerator], inp: ourlang.Function) -> wasm.Function:
"""
Compile: function
"""
assert not inp.imported
wgn = WasmGenerator()
if isinstance(inp, ourlang.StructConstructor):
_generate_struct_constructor(wgn, mod, inp)
else:
for stat in inp.statements:
statement(wgn, mod, inp, stat)
return wasm.Function(
inp.name,
inp.name if inp.exported else None,
[
function_argument(mod, x)
for x in inp.posonlyargs
],
[
(k, v.wasm_type(), )
for k, v in wgn.locals.items()
],
type3(mod, inp.returns_type3),
wgn.statements
)
def module_data_primitive(type_info: TypeInfo, inp: int | float) -> bytes:
letter_map = {
(WasmTypeInt32, 1, False): 'B',
(WasmTypeInt32, 1, True): 'b',
(WasmTypeInt32, 2, False): 'H',
(WasmTypeInt32, 2, True): 'h',
(WasmTypeInt32, 4, False): 'I',
(WasmTypeInt32, 4, True): 'i',
(WasmTypeInt64, 8, False): 'Q',
(WasmTypeInt64, 8, True): 'q',
(WasmTypeFloat32, 4, None): 'f',
(WasmTypeFloat64, 8, None): 'd',
}
letter = letter_map[(type_info.wasm_type, type_info.alloc_size, type_info.signed, )]
return struct.pack(f'<{letter}', inp)
def module_data(mod: ourlang.Module[WasmGenerator], inp: ourlang.ModuleData) -> bytes:
"""
Compile: module data
"""
unalloc_ptr = stdlib_alloc.UNALLOC_PTR
u32_type_info = mod.build.type_info_map['u32']
ptr_type_info = mod.build.type_info_map['ptr']
allocated_data = b''
for block in inp.blocks:
block.address = unalloc_ptr + 4 # 4 bytes for allocator header
data_list: List[bytes] = []
for constant in block.data:
assert constant.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(constant, ourlang.ConstantBytes):
data_list.append(module_data_primitive(u32_type_info, len(constant.value)))
data_list.append(constant.value)
continue
if isinstance(constant, ourlang.ConstantMemoryStored):
if block is constant.data_block:
raise NotImplementedError(block, constant)
# It's stored in a different block
# We only need to store its address
# This happens for example when a tuple refers
# to a bytes constant
assert constant.data_block.address is not None, 'Referred memory not yet stored'
data_list.append(module_data_primitive(ptr_type_info, constant.data_block.address))
continue
type_info = mod.build.type_info_map[constant.type3.name]
data_list.append(module_data_primitive(type_info, constant.value))
block_data = b''.join(data_list)
allocated_data += module_data_primitive(u32_type_info, len(block_data)) + block_data
unalloc_ptr += 4 + len(block_data)
return (
# Store that we've initialized the memory
module_data_primitive(u32_type_info, stdlib_alloc.IDENTIFIER)
# Store the first reserved i32
+ module_data_primitive(u32_type_info, 0)
# Store the pointer towards the first free block
# In this case, 0 since we haven't freed any blocks yet
+ module_data_primitive(u32_type_info, 0)
# Store the pointer towards the first unallocated block
# In this case the end of the stdlib.alloc header at the start
+ module_data_primitive(u32_type_info, unalloc_ptr)
# Store the actual data
+ allocated_data
)
def module(inp: ourlang.Module[WasmGenerator]) -> wasm.Module:
"""
Compile: module
"""
result = wasm.Module()
result.memory.data = module_data(inp, inp.data)
result.imports = [
import_(inp, x)
for x in inp.functions.values()
if x.imported
]
result.functions = [
stdlib_alloc.__find_free_block__,
stdlib_alloc.__alloc__,
stdlib_types.__alloc_bytes__,
stdlib_types.__u32_min__,
stdlib_types.__u64_min__,
stdlib_types.__i32_min__,
stdlib_types.__i64_min__,
stdlib_types.__u32_max__,
stdlib_types.__u64_max__,
stdlib_types.__i32_max__,
stdlib_types.__i64_max__,
stdlib_types.__i32_abs__,
stdlib_types.__i64_abs__,
stdlib_types.__u32_pow2__,
stdlib_types.__u8_rotl__,
stdlib_types.__u8_rotr__,
stdlib_types.__u16_rotl__,
stdlib_types.__u16_rotr__,
] + [
function(inp, x)
for x in inp.functions.values()
if not x.imported
]
# Do this after rendering the functions since that's what populates the tables
result.table = {
v: k.name
for k, v in inp.functions_table.items()
}
return result
def _generate_struct_constructor(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.StructConstructor) -> None:
assert isinstance(inp.struct_type3.application, TypeApplication_Struct)
st_args = inp.struct_type3.application.arguments
tmp_var = wgn.temp_var_i32('struct_adr')
# Allocated the required amounts of bytes in memory
wgn.i32.const(mod.build.calculate_alloc_size(inp.struct_type3))
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
# Store each member individually
for memname, mtyp3 in st_args:
mtyp3_info = mod.build.type_info_map.get(mtyp3.name)
if mtyp3_info is None:
mtyp3_info = mod.build.type_info_map['ptr']
wgn.local.get(tmp_var)
wgn.add_statement('local.get', f'${memname}')
wgn.add_statement(mtyp3_info.wasm_store_func, 'offset=' + str(mod.build.calculate_member_offset(
inp.struct_type3.name, st_args, memname
)))
# Return the allocated address
wgn.local.get(tmp_var)