phasm/phasm/compiler.py
Johan B.W. de Vries f3a6fbb804 Moves the prelude to runtime
Previously, it was hardcoded at 'compile' time (in as much
Python has that). This would make it more difficult to add
stuff to it. Also, in a lot of places we made assumptions
about prelude instead of checking properly.
2025-05-27 20:01:06 +02:00

720 lines
24 KiB
Python

"""
This module contains the code to convert parsed Ourlang into WebAssembly code
"""
import struct
from typing import List
from . import ourlang, prelude, wasm
from .build import builtins
from .stdlib import alloc as stdlib_alloc
from .stdlib import types as stdlib_types
from .stdlib.types import TYPE_INFO_CONSTRUCTED, TYPE_INFO_MAP
from .type3.functions import FunctionArgument, TypeVariable
from .type3.routers import NoRouteForTypeException, TypeApplicationRouter
from .type3.typeclasses import Type3ClassMethod
from .type3.types import (
IntType3,
Type3,
TypeApplication_Struct,
TypeApplication_Type,
TypeApplication_TypeInt,
TypeApplication_TypeStar,
TypeConstructor_DynamicArray,
TypeConstructor_Function,
TypeConstructor_StaticArray,
TypeConstructor_Tuple,
)
from .wasm import (
WasmTypeInt32,
WasmTypeInt64,
WasmTypeFloat32,
WasmTypeFloat64,
)
from .wasmgenerator import Generator as WasmGenerator
TYPE3_ASSERTION_ERROR = 'You must call phasm_type3 after calling phasm_parse before your program can be compiled'
def phasm_compile(inp: ourlang.Module) -> wasm.Module:
"""
Public method for compiling a parsed Phasm module into
a WebAssembly module
"""
return module(inp)
def type3(inp: Type3) -> wasm.WasmType:
"""
Compile: type
Types are used for example in WebAssembly function parameters
and return types.
"""
typ_info = TYPE_INFO_MAP.get(inp.name, TYPE_INFO_CONSTRUCTED)
return typ_info.wasm_type()
def tuple_instantiation(wgn: WasmGenerator, mod: ourlang.Module, inp: ourlang.TupleInstantiation) -> None:
"""
Compile: Instantiation (allocation) of a tuple
"""
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
args: tuple[Type3, ...]
alloc_size_header = None
if isinstance(inp.type3.application, TypeApplication_Type):
# Possibly paranoid assert. If we have a future variadic type,
# does it also do this tuple instantation like this?
assert isinstance(inp.type3.application.constructor, TypeConstructor_DynamicArray)
sa_type, = inp.type3.application.arguments
args = tuple(sa_type for _ in inp.elements)
# Can't use calculate_alloc_size directly since that doesn't
# know the dynamic array's length
alloc_size = 4 + calculate_alloc_size(sa_type, is_member=True) * len(inp.elements)
alloc_size_header = len(inp.elements)
elif isinstance(inp.type3.application, TypeApplication_TypeStar):
# Possibly paranoid assert. If we have a future variadic type,
# does it also do this tuple instantation like this?
assert isinstance(inp.type3.application.constructor, TypeConstructor_Tuple)
args = inp.type3.application.arguments
alloc_size = calculate_alloc_size(inp.type3, is_member=False)
elif isinstance(inp.type3.application, TypeApplication_TypeInt):
# Possibly paranoid assert. If we have a future type of kind * -> Int -> *,
# does it also do this tuple instantation like this?
assert isinstance(inp.type3.application.constructor, TypeConstructor_StaticArray)
sa_type, sa_len = inp.type3.application.arguments
args = tuple(sa_type for _ in range(sa_len.value))
alloc_size = calculate_alloc_size(inp.type3, is_member=False)
else:
raise NotImplementedError('tuple_instantiation', inp.type3)
comment_elements = ''
for element in inp.elements:
assert element.type3 is not None, TYPE3_ASSERTION_ERROR
comment_elements += f'{element.type3.name}, '
tmp_var = wgn.temp_var_i32('tuple_adr')
wgn.add_statement('nop', comment=f'{tmp_var.name} := ({comment_elements})')
# Allocated the required amounts of bytes in memory
wgn.i32.const(alloc_size)
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
if alloc_size_header is not None:
wgn.local.get(tmp_var)
wgn.i32.const(alloc_size_header)
wgn.i32.store()
# Store each element individually
offset = 0 if alloc_size_header is None else 4
for element, exp_type3 in zip(inp.elements, args, strict=True):
assert element.type3 == exp_type3
exp_type_info = TYPE_INFO_MAP.get(exp_type3.name, TYPE_INFO_CONSTRUCTED)
wgn.add_statement('nop', comment='PRE')
wgn.local.get(tmp_var)
expression(wgn, mod, element)
wgn.add_statement(exp_type_info.wasm_store_func, 'offset=' + str(offset))
wgn.add_statement('nop', comment='POST')
offset += calculate_alloc_size(exp_type3, is_member=True)
# Return the allocated address
wgn.local.get(tmp_var)
def expression_subscript_bytes(
attrs: tuple[WasmGenerator, ourlang.Module, ourlang.Subscript],
) -> None:
wgn, mod, inp = attrs
expression(wgn, mod, inp.varref)
expression(wgn, mod, inp.index)
wgn.call(stdlib_types.__subscript_bytes__)
def expression_subscript_static_array(
attrs: tuple[WasmGenerator, ourlang.Module, ourlang.Subscript],
args: tuple[Type3, IntType3],
) -> None:
wgn, mod, inp = attrs
el_type, el_len = args
# OPTIMIZE: If index is a constant, we can use offset instead of multiply
# and we don't need to do the out of bounds check
expression(wgn, mod, inp.varref)
tmp_var = wgn.temp_var_i32('index')
expression(wgn, mod, inp.index)
wgn.local.tee(tmp_var)
# Out of bounds check based on el_len.value
wgn.i32.const(el_len.value)
wgn.i32.ge_u()
with wgn.if_():
wgn.unreachable(comment='Out of bounds')
el_type_info = TYPE_INFO_MAP.get(el_type.name, TYPE_INFO_CONSTRUCTED)
wgn.local.get(tmp_var)
wgn.i32.const(el_type_info.alloc_size)
wgn.i32.mul()
wgn.i32.add()
wgn.add_statement(el_type_info.wasm_load_func)
def expression_subscript_tuple(
attrs: tuple[WasmGenerator, ourlang.Module, ourlang.Subscript],
args: tuple[Type3, ...],
) -> None:
wgn, mod, inp = attrs
assert isinstance(inp.index, ourlang.ConstantPrimitive)
assert isinstance(inp.index.value, int)
offset = 0
for el_type in args[0:inp.index.value]:
assert el_type is not None, TYPE3_ASSERTION_ERROR
el_type_info = TYPE_INFO_MAP.get(el_type.name, TYPE_INFO_CONSTRUCTED)
offset += el_type_info.alloc_size
el_type = args[inp.index.value]
assert el_type is not None, TYPE3_ASSERTION_ERROR
expression(wgn, mod, inp.varref)
el_type_info = TYPE_INFO_MAP.get(el_type.name, TYPE_INFO_CONSTRUCTED)
wgn.add_statement(el_type_info.wasm_load_func, f'offset={offset}')
SUBSCRIPT_ROUTER = TypeApplicationRouter[tuple[WasmGenerator, ourlang.Module, ourlang.Subscript], None]()
# SUBSCRIPT_ROUTER.add(builtins.dynamic_array, expression_subscript_dynamic_array)
SUBSCRIPT_ROUTER.add(builtins.static_array, expression_subscript_static_array)
SUBSCRIPT_ROUTER.add(builtins.tuple_, expression_subscript_tuple)
def expression(wgn: WasmGenerator, mod: ourlang.Module[WasmGenerator], inp: ourlang.Expression) -> None:
"""
Compile: Any expression
"""
if isinstance(inp, (ourlang.ConstantStruct, ourlang.ConstantTuple, )):
# These are implemented elsewhere
raise Exception
if isinstance(inp, ourlang.ConstantPrimitive):
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
type_info = mod.build.type_info_map[inp.type3.name]
if type_info.wasm_type is WasmTypeInt32:
assert isinstance(inp.value, int)
wgn.i32.const(inp.value)
return
if type_info.wasm_type is WasmTypeInt64:
assert isinstance(inp.value, int)
wgn.i64.const(inp.value)
return
if type_info.wasm_type is WasmTypeFloat32:
assert isinstance(inp.value, float)
wgn.f32.const(inp.value)
return
if type_info.wasm_type is WasmTypeFloat64:
assert isinstance(inp.value, float)
wgn.f64.const(inp.value)
return
raise NotImplementedError(f'Constants with type {inp.type3:s}')
if isinstance(inp, ourlang.ConstantBytes):
assert inp.data_block.address is not None, 'Value not allocated'
wgn.i32.const(inp.data_block.address)
return
if isinstance(inp, ourlang.VariableReference):
if isinstance(inp.variable, ourlang.FunctionParam):
wgn.add_statement('local.get', '${}'.format(inp.variable.name))
return
if isinstance(inp.variable, ourlang.ModuleConstantDef):
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
if inp.type3.name not in TYPE_INFO_MAP:
assert isinstance(inp.variable.constant, (ourlang.ConstantBytes, ourlang.ConstantStruct, ourlang.ConstantTuple, ))
address = inp.variable.constant.data_block.address
assert address is not None, 'Value not allocated'
wgn.i32.const(address)
return
expression(wgn, mod, inp.variable.constant)
return
raise NotImplementedError(expression, inp.variable)
if isinstance(inp, ourlang.BinaryOp):
expression(wgn, mod, inp.left)
expression(wgn, mod, inp.right)
type_var_map: dict[TypeVariable, Type3] = {}
for type_var, arg_expr in zip(inp.operator.signature.args, [inp.left, inp.right, inp], strict=True):
assert arg_expr.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(type_var, Type3):
# Fixed type, not part of the lookup requirements
continue
if isinstance(type_var, TypeVariable):
type_var_map[type_var] = arg_expr.type3
continue
if isinstance(type_var, FunctionArgument):
# Fixed type, not part of the lookup requirements
continue
raise NotImplementedError(type_var, arg_expr.type3)
router = mod.build.type_class_instance_methods[inp.operator]
router(wgn, type_var_map)
return
if isinstance(inp, ourlang.FunctionCall):
for arg in inp.arguments:
expression(wgn, mod, arg)
if isinstance(inp.function, Type3ClassMethod):
# FIXME: Duplicate code with BinaryOp
type_var_map = {}
for type_var, arg_expr in zip(inp.function.signature.args, inp.arguments + [inp], strict=True):
assert arg_expr.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(type_var, Type3):
# Fixed type, not part of the lookup requirements
continue
if isinstance(type_var, TypeVariable):
type_var_map[type_var] = arg_expr.type3
continue
if isinstance(type_var, FunctionArgument):
# Fixed type, not part of the lookup requirements
continue
raise NotImplementedError(type_var, arg_expr.type3)
router = prelude.PRELUDE_TYPE_CLASS_INSTANCE_METHODS[inp.function]
try:
router(wgn, type_var_map)
except NoRouteForTypeException:
raise NotImplementedError(str(inp.function), type_var_map)
return
if isinstance(inp.function, ourlang.FunctionParam):
assert isinstance(inp.function.type3.application.constructor, TypeConstructor_Function)
params = [
type3(x)
for x in inp.function.type3.application.arguments
]
result = params.pop()
wgn.add_statement('local.get', '${}'.format(inp.function.name))
wgn.call_indirect(params=params, result=result)
return
wgn.call(inp.function.name)
return
if isinstance(inp, ourlang.FunctionReference):
idx = mod.functions_table.get(inp.function)
if idx is None:
idx = len(mod.functions_table)
mod.functions_table[inp.function] = idx
wgn.add_statement('i32.const', str(idx), comment=inp.function.name)
return
if isinstance(inp, ourlang.TupleInstantiation):
tuple_instantiation(wgn, mod, inp)
return
if isinstance(inp, ourlang.Subscript):
assert inp.varref.type3 is not None, TYPE3_ASSERTION_ERROR
# Type checker guarantees we don't get routing errors
SUBSCRIPT_ROUTER((wgn, mod, inp, ), inp.varref.type3)
return
if isinstance(inp, ourlang.AccessStructMember):
assert inp.struct_type3 is not None, TYPE3_ASSERTION_ERROR
assert isinstance(inp.struct_type3.application, TypeApplication_Struct)
member_type = dict(inp.struct_type3.application.arguments)[inp.member]
member_type_info = TYPE_INFO_MAP.get(member_type.name, TYPE_INFO_CONSTRUCTED)
expression(wgn, mod, inp.varref)
wgn.add_statement(member_type_info.wasm_load_func, 'offset=' + str(calculate_member_offset(
inp.struct_type3.name, inp.struct_type3.application.arguments, inp.member
)))
return
raise NotImplementedError(expression, inp)
def statement_return(wgn: WasmGenerator, mod: ourlang.Module, fun: ourlang.Function, inp: ourlang.StatementReturn) -> None:
"""
Compile: Return statement
"""
# Support tail calls
# https://github.com/WebAssembly/tail-call
# These help a lot with some functional programming techniques
if isinstance(inp.value, ourlang.FunctionCall) and inp.value.function is fun:
for arg in inp.value.arguments:
expression(wgn, mod, arg)
wgn.add_statement('return_call', '${}'.format(inp.value.function.name))
return
expression(wgn, mod, inp.value)
wgn.return_()
def statement_if(wgn: WasmGenerator, mod: ourlang.Module, fun: ourlang.Function, inp: ourlang.StatementIf) -> None:
"""
Compile: If statement
"""
expression(wgn, mod, inp.test)
with wgn.if_():
for stat in inp.statements:
statement(wgn, mod, fun, stat)
if inp.else_statements:
raise NotImplementedError
# yield wasm.Statement('else')
# for stat in inp.else_statements:
# statement(wgn, stat)
def statement(wgn: WasmGenerator, mod: ourlang.Module, fun: ourlang.Function, inp: ourlang.Statement) -> None:
"""
Compile: any statement
"""
if isinstance(inp, ourlang.StatementReturn):
statement_return(wgn, mod, fun, inp)
return
if isinstance(inp, ourlang.StatementIf):
statement_if(wgn, mod, fun, inp)
return
if isinstance(inp, ourlang.StatementPass):
return
raise NotImplementedError(statement, inp)
def function_argument(inp: ourlang.FunctionParam) -> wasm.Param:
"""
Compile: function argument
"""
return (inp.name, type3(inp.type3), )
def import_(inp: ourlang.Function) -> wasm.Import:
"""
Compile: imported function
"""
assert inp.imported
return wasm.Import(
inp.imported,
inp.name,
inp.name,
[
function_argument(x)
for x in inp.posonlyargs
],
type3(inp.returns_type3)
)
def function(mod: ourlang.Module, inp: ourlang.Function) -> wasm.Function:
"""
Compile: function
"""
assert not inp.imported
wgn = WasmGenerator()
if isinstance(inp, ourlang.StructConstructor):
_generate_struct_constructor(wgn, inp)
else:
for stat in inp.statements:
statement(wgn, mod, inp, stat)
return wasm.Function(
inp.name,
inp.name if inp.exported else None,
[
function_argument(x)
for x in inp.posonlyargs
],
[
(k, v.wasm_type(), )
for k, v in wgn.locals.items()
],
type3(inp.returns_type3),
wgn.statements
)
def module_data_u8(inp: int) -> bytes:
"""
Compile: module data, u8 value
"""
return struct.pack('<B', inp)
def module_data_u16(inp: int) -> bytes:
"""
Compile: module data, u16 value
"""
return struct.pack('<H', inp)
def module_data_u32(inp: int) -> bytes:
"""
Compile: module data, u32 value
"""
return struct.pack('<I', inp)
def module_data_u64(inp: int) -> bytes:
"""
Compile: module data, u64 value
"""
return struct.pack('<Q', inp)
def module_data_i8(inp: int) -> bytes:
"""
Compile: module data, i8 value
"""
return struct.pack('<b', inp)
def module_data_i16(inp: int) -> bytes:
"""
Compile: module data, i16 value
"""
return struct.pack('<h', inp)
def module_data_i32(inp: int) -> bytes:
"""
Compile: module data, i32 value
"""
return struct.pack('<i', inp)
def module_data_i64(inp: int) -> bytes:
"""
Compile: module data, i64 value
"""
return struct.pack('<q', inp)
def module_data_f32(inp: float) -> bytes:
"""
Compile: module data, f32 value
"""
return struct.pack('<f', inp)
def module_data_f64(inp: float) -> bytes:
"""
Compile: module data, f64 value
"""
return struct.pack('<d', inp)
def module_data(inp: ourlang.ModuleData) -> bytes:
"""
Compile: module data
"""
unalloc_ptr = stdlib_alloc.UNALLOC_PTR
allocated_data = b''
for block in inp.blocks:
block.address = unalloc_ptr + 4 # 4 bytes for allocator header
data_list: List[bytes] = []
for constant in block.data:
assert constant.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(constant, ourlang.ConstantMemoryStored) and block is not constant.data_block:
# It's stored in a different block
# We only need to store its address
# This happens for example when a tuple refers
# to a bytes constant
assert constant.data_block.address is not None, 'Referred memory not yet stored'
data_list.append(module_data_u32(constant.data_block.address))
continue
if constant.type3 == prelude.u8:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_u8(constant.value))
continue
if constant.type3 == prelude.u16:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_u16(constant.value))
continue
if constant.type3 == prelude.u32:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_u32(constant.value))
continue
if constant.type3 == prelude.u64:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_u64(constant.value))
continue
if constant.type3 == prelude.i8:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_i8(constant.value))
continue
if constant.type3 == prelude.i16:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_i16(constant.value))
continue
if constant.type3 == prelude.i32:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_i32(constant.value))
continue
if constant.type3 == prelude.i64:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_i64(constant.value))
continue
if constant.type3 == prelude.f32:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, float)
data_list.append(module_data_f32(constant.value))
continue
if constant.type3 == prelude.f64:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, float)
data_list.append(module_data_f64(constant.value))
continue
if constant.type3 == prelude.bytes_:
assert isinstance(constant, ourlang.ConstantBytes)
assert isinstance(constant.value, bytes)
data_list.append(module_data_u32(len(constant.value)))
data_list.append(constant.value)
continue
raise NotImplementedError(constant, constant.type3)
block_data = b''.join(data_list)
allocated_data += module_data_u32(len(block_data)) + block_data
unalloc_ptr += 4 + len(block_data)
return (
# Store that we've initialized the memory
module_data_u32(stdlib_alloc.IDENTIFIER)
# Store the first reserved i32
+ module_data_u32(0)
# Store the pointer towards the first free block
# In this case, 0 since we haven't freed any blocks yet
+ module_data_u32(0)
# Store the pointer towards the first unallocated block
# In this case the end of the stdlib.alloc header at the start
+ module_data_u32(unalloc_ptr)
# Store the actual data
+ allocated_data
)
def module(inp: ourlang.Module) -> wasm.Module:
"""
Compile: module
"""
result = wasm.Module()
result.memory.data = module_data(inp.data)
result.imports = [
import_(x)
for x in inp.functions.values()
if x.imported
]
result.functions = [
stdlib_alloc.__find_free_block__,
stdlib_alloc.__alloc__,
stdlib_types.__alloc_bytes__,
stdlib_types.__subscript_bytes__,
stdlib_types.__u32_ord_min__,
stdlib_types.__u64_ord_min__,
stdlib_types.__i32_ord_min__,
stdlib_types.__i64_ord_min__,
stdlib_types.__u32_ord_max__,
stdlib_types.__u64_ord_max__,
stdlib_types.__i32_ord_max__,
stdlib_types.__i64_ord_max__,
stdlib_types.__i32_intnum_abs__,
stdlib_types.__i64_intnum_abs__,
stdlib_types.__u32_pow2__,
stdlib_types.__u8_rotl__,
stdlib_types.__u8_rotr__,
stdlib_types.__u16_rotl__,
stdlib_types.__u16_rotr__,
] + [
function(inp, x)
for x in inp.functions.values()
if not x.imported
]
# Do this after rendering the functions since that's what populates the tables
result.table = {
v: k.name
for k, v in inp.functions_table.items()
}
return result
def _generate_struct_constructor(wgn: WasmGenerator, inp: ourlang.StructConstructor) -> None:
assert isinstance(inp.struct_type3.application, TypeApplication_Struct)
st_args = inp.struct_type3.application.arguments
tmp_var = wgn.temp_var_i32('struct_adr')
# Allocated the required amounts of bytes in memory
wgn.i32.const(calculate_alloc_size(inp.struct_type3))
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
# Store each member individually
for memname, mtyp3 in st_args:
mtyp3_info = TYPE_INFO_MAP.get(mtyp3.name, TYPE_INFO_CONSTRUCTED)
wgn.local.get(tmp_var)
wgn.add_statement('local.get', f'${memname}')
wgn.add_statement(mtyp3_info.wasm_store_func, 'offset=' + str(calculate_member_offset(
inp.struct_type3.name, st_args, memname
)))
# Return the allocated address
wgn.local.get(tmp_var)