phasm/phasm/compiler.py
Johan B.W. de Vries dd4b9373ac Removes the special casing for foldl
Has to implement both functions as arguments and type
place holders (variables) for type constructors.

Probably have to introduce a type for functions
2025-05-13 19:38:20 +02:00

698 lines
22 KiB
Python

"""
This module contains the code to convert parsed Ourlang into WebAssembly code
"""
import struct
from typing import List, Optional
from . import ourlang, prelude, wasm
from .runtime import calculate_alloc_size, calculate_member_offset
from .stdlib import alloc as stdlib_alloc
from .stdlib import types as stdlib_types
from .type3.functions import TypeVariable
from .type3.routers import NoRouteForTypeException, TypeApplicationRouter
from .type3.typeclasses import Type3ClassMethod
from .type3.types import (
IntType3,
Type3,
TypeApplication_Struct,
TypeApplication_TypeInt,
TypeApplication_TypeStar,
TypeConstructor_StaticArray,
TypeConstructor_Tuple,
)
from .wasmgenerator import Generator as WasmGenerator
TYPE3_ASSERTION_ERROR = 'You must call phasm_type3 after calling phasm_parse before your program can be compiled'
LOAD_STORE_TYPE_MAP = {
'i8': 'i32', # Have to use an u32, since there is no native i8 type
'u8': 'i32', # Have to use an u32, since there is no native u8 type
'i32': 'i32',
'i64': 'i64',
'u32': 'i32',
'u64': 'i64',
'f32': 'f32',
'f64': 'f64',
'bytes': 'i32', # Bytes are passed around as pointers
}
def phasm_compile(inp: ourlang.Module) -> wasm.Module:
"""
Public method for compiling a parsed Phasm module into
a WebAssembly module
"""
return module(inp)
def type3(inp: Type3) -> wasm.WasmType:
"""
Compile: type
Types are used for example in WebAssembly function parameters
and return types.
"""
assert inp is not None, TYPE3_ASSERTION_ERROR
if inp == prelude.none:
return wasm.WasmTypeNone()
if inp == prelude.bool_:
# WebAssembly stores booleans as i32
# See e.g. f32.eq, which is [f32 f32] -> [i32]
return wasm.WasmTypeInt32()
if inp == prelude.u8:
# WebAssembly has only support for 32 and 64 bits
# So we need to store more memory per byte
return wasm.WasmTypeInt32()
if inp == prelude.u32:
return wasm.WasmTypeInt32()
if inp == prelude.u64:
return wasm.WasmTypeInt64()
if inp == prelude.i8:
# WebAssembly has only support for 32 and 64 bits
# So we need to store more memory per byte
return wasm.WasmTypeInt32()
if inp == prelude.i32:
return wasm.WasmTypeInt32()
if inp == prelude.i64:
return wasm.WasmTypeInt64()
if inp == prelude.f32:
return wasm.WasmTypeFloat32()
if inp == prelude.f64:
return wasm.WasmTypeFloat64()
if inp == prelude.bytes_:
# bytes are passed as pointer
# And pointers are i32
return wasm.WasmTypeInt32()
if (prelude.InternalPassAsPointer, (inp, )) in prelude.PRELUDE_TYPE_CLASS_INSTANCES_EXISTING:
return wasm.WasmTypeInt32()
raise NotImplementedError(type3, inp)
def tuple_instantiation(wgn: WasmGenerator, inp: ourlang.TupleInstantiation) -> None:
"""
Compile: Instantiation (allocation) of a tuple
"""
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
args: tuple[Type3, ...]
if isinstance(inp.type3.application, TypeApplication_TypeStar):
# Possibly paranoid assert. If we have a future variadic type,
# does it also do this tuple instantation like this?
assert isinstance(inp.type3.application.constructor, TypeConstructor_Tuple)
args = inp.type3.application.arguments
elif isinstance(inp.type3.application, TypeApplication_TypeInt):
# Possibly paranoid assert. If we have a future type of kind * -> Int -> *,
# does it also do this tuple instantation like this?
assert isinstance(inp.type3.application.constructor, TypeConstructor_StaticArray)
sa_type, sa_len = inp.type3.application.arguments
args = tuple(sa_type for _ in range(sa_len.value))
else:
raise NotImplementedError('tuple_instantiation', inp.type3)
comment_elements = ''
for element in inp.elements:
assert element.type3 is not None, TYPE3_ASSERTION_ERROR
comment_elements += f'{element.type3.name}, '
tmp_var = wgn.temp_var_i32('tuple_adr')
wgn.add_statement('nop', comment=f'{tmp_var.name} := ({comment_elements})')
# Allocated the required amounts of bytes in memory
wgn.i32.const(calculate_alloc_size(inp.type3, is_member=False))
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
# Store each element individually
offset = 0
for element, exp_type3 in zip(inp.elements, args, strict=True):
assert element.type3 == exp_type3
if (prelude.InternalPassAsPointer, (exp_type3, )) in prelude.PRELUDE_TYPE_CLASS_INSTANCES_EXISTING:
mtyp = 'i32'
else:
mtyp = LOAD_STORE_TYPE_MAP[exp_type3.name]
wgn.add_statement('nop', comment='PRE')
wgn.local.get(tmp_var)
expression(wgn, element)
wgn.add_statement(f'{mtyp}.store', 'offset=' + str(offset))
wgn.add_statement('nop', comment='POST')
offset += calculate_alloc_size(exp_type3, is_member=True)
# Return the allocated address
wgn.local.get(tmp_var)
def expression_subscript_bytes(
attrs: tuple[WasmGenerator, ourlang.Subscript],
) -> None:
wgn, inp = attrs
expression(wgn, inp.varref)
expression(wgn, inp.index)
wgn.call(stdlib_types.__subscript_bytes__)
def expression_subscript_static_array(
attrs: tuple[WasmGenerator, ourlang.Subscript],
args: tuple[Type3, IntType3],
) -> None:
wgn, inp = attrs
el_type, el_len = args
# OPTIMIZE: If index is a constant, we can use offset instead of multiply
# and we don't need to do the out of bounds check
expression(wgn, inp.varref)
tmp_var = wgn.temp_var_i32('index')
expression(wgn, inp.index)
wgn.local.tee(tmp_var)
# Out of bounds check based on el_len.value
wgn.i32.const(el_len.value)
wgn.i32.ge_u()
with wgn.if_():
wgn.unreachable(comment='Out of bounds')
wgn.local.get(tmp_var)
wgn.i32.const(calculate_alloc_size(el_type))
wgn.i32.mul()
wgn.i32.add()
mtyp = LOAD_STORE_TYPE_MAP[el_type.name]
wgn.add_statement(f'{mtyp}.load')
def expression_subscript_tuple(
attrs: tuple[WasmGenerator, ourlang.Subscript],
args: tuple[Type3, ...],
) -> None:
wgn, inp = attrs
assert isinstance(inp.index, ourlang.ConstantPrimitive)
assert isinstance(inp.index.value, int)
offset = 0
for el_type in args[0:inp.index.value]:
assert el_type is not None, TYPE3_ASSERTION_ERROR
offset += calculate_alloc_size(el_type)
el_type = args[inp.index.value]
assert el_type is not None, TYPE3_ASSERTION_ERROR
expression(wgn, inp.varref)
if (prelude.InternalPassAsPointer, (el_type, )) in prelude.PRELUDE_TYPE_CLASS_INSTANCES_EXISTING:
mtyp = 'i32'
else:
mtyp = LOAD_STORE_TYPE_MAP[el_type.name]
wgn.add_statement(f'{mtyp}.load', f'offset={offset}')
SUBSCRIPT_ROUTER = TypeApplicationRouter[tuple[WasmGenerator, ourlang.Subscript], None]()
SUBSCRIPT_ROUTER.add_n(prelude.bytes_, expression_subscript_bytes)
SUBSCRIPT_ROUTER.add(prelude.static_array, expression_subscript_static_array)
SUBSCRIPT_ROUTER.add(prelude.tuple_, expression_subscript_tuple)
def expression(wgn: WasmGenerator, inp: ourlang.Expression) -> None:
"""
Compile: Any expression
"""
if isinstance(inp, (ourlang.ConstantStruct, ourlang.ConstantTuple, )):
# These are implemented elsewhere
raise Exception
if isinstance(inp, ourlang.ConstantPrimitive):
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
if inp.type3 in (prelude.i8, prelude.u8, ):
# No native u8 type - treat as i32, with caution
assert isinstance(inp.value, int)
wgn.i32.const(inp.value)
return
if inp.type3 in (prelude.i32, prelude.u32, ):
assert isinstance(inp.value, int)
wgn.i32.const(inp.value)
return
if inp.type3 in (prelude.i64, prelude.u64, ):
assert isinstance(inp.value, int)
wgn.i64.const(inp.value)
return
if inp.type3 == prelude.f32:
assert isinstance(inp.value, float)
wgn.f32.const(inp.value)
return
if inp.type3 == prelude.f64:
assert isinstance(inp.value, float)
wgn.f64.const(inp.value)
return
raise NotImplementedError(f'Constants with type {inp.type3:s}')
if isinstance(inp, ourlang.ConstantBytes):
assert inp.data_block.address is not None, 'Value not allocated'
wgn.i32.const(inp.data_block.address)
return
if isinstance(inp, ourlang.VariableReference):
if isinstance(inp.variable, ourlang.FunctionParam):
wgn.add_statement('local.get', '${}'.format(inp.variable.name))
return
if isinstance(inp.variable, ourlang.ModuleConstantDef):
assert inp.type3 is not None, TYPE3_ASSERTION_ERROR
if (prelude.InternalPassAsPointer, (inp.type3, )) in prelude.PRELUDE_TYPE_CLASS_INSTANCES_EXISTING:
assert isinstance(inp.variable.constant, (ourlang.ConstantBytes, ourlang.ConstantStruct, ourlang.ConstantTuple, ))
address = inp.variable.constant.data_block.address
assert address is not None, 'Value not allocated'
wgn.i32.const(address)
return
expression(wgn, inp.variable.constant)
return
raise NotImplementedError(expression, inp.variable)
if isinstance(inp, ourlang.BinaryOp):
expression(wgn, inp.left)
expression(wgn, inp.right)
type_var_map: dict[TypeVariable, Type3] = {}
for type_var, arg_expr in zip(inp.operator.signature.args, [inp.left, inp.right, inp], strict=True):
assert arg_expr.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(type_var, Type3):
# Fixed type, not part of the lookup requirements
continue
if isinstance(type_var, TypeVariable):
type_var_map[type_var] = arg_expr.type3
continue
raise NotImplementedError(type_var, arg_expr.type3)
router = prelude.PRELUDE_TYPE_CLASS_INSTANCE_METHODS[inp.operator]
router(wgn, type_var_map)
return
if isinstance(inp, ourlang.FunctionCall):
for arg in inp.arguments:
expression(wgn, arg)
if isinstance(inp.function, Type3ClassMethod):
# FIXME: Duplicate code with BinaryOp
type_var_map = {}
for type_var, arg_expr in zip(inp.function.signature.args, inp.arguments + [inp], strict=True):
assert arg_expr.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(type_var, Type3):
# Fixed type, not part of the lookup requirements
continue
if isinstance(type_var, TypeVariable):
type_var_map[type_var] = arg_expr.type3
continue
raise NotImplementedError(type_var, arg_expr.type3)
router = prelude.PRELUDE_TYPE_CLASS_INSTANCE_METHODS[inp.function]
try:
router(wgn, type_var_map)
except NoRouteForTypeException:
raise NotImplementedError(str(inp.function), type_var_map)
return
wgn.add_statement('call', '${}'.format(inp.function.name))
return
if isinstance(inp, ourlang.TupleInstantiation):
tuple_instantiation(wgn, inp)
return
if isinstance(inp, ourlang.Subscript):
assert inp.varref.type3 is not None, TYPE3_ASSERTION_ERROR
# Type checker guarantees we don't get routing errors
SUBSCRIPT_ROUTER((wgn, inp, ), inp.varref.type3)
return
if isinstance(inp, ourlang.AccessStructMember):
assert inp.struct_type3 is not None, TYPE3_ASSERTION_ERROR
assert isinstance(inp.struct_type3.application, TypeApplication_Struct)
member_type = dict(inp.struct_type3.application.arguments)[inp.member]
mtyp = LOAD_STORE_TYPE_MAP[member_type.name]
expression(wgn, inp.varref)
wgn.add_statement(f'{mtyp}.load', 'offset=' + str(calculate_member_offset(
inp.struct_type3.name, inp.struct_type3.application.arguments, inp.member
)))
return
raise NotImplementedError(expression, inp)
def statement_return(wgn: WasmGenerator, inp: ourlang.StatementReturn) -> None:
"""
Compile: Return statement
"""
expression(wgn, inp.value)
wgn.return_()
def statement_if(wgn: WasmGenerator, inp: ourlang.StatementIf) -> None:
"""
Compile: If statement
"""
expression(wgn, inp.test)
with wgn.if_():
for stat in inp.statements:
statement(wgn, stat)
if inp.else_statements:
raise NotImplementedError
# yield wasm.Statement('else')
# for stat in inp.else_statements:
# statement(wgn, stat)
def statement(wgn: WasmGenerator, inp: ourlang.Statement) -> None:
"""
Compile: any statement
"""
if isinstance(inp, ourlang.StatementReturn):
statement_return(wgn, inp)
return
if isinstance(inp, ourlang.StatementIf):
statement_if(wgn, inp)
return
if isinstance(inp, ourlang.StatementPass):
return
raise NotImplementedError(statement, inp)
def function_argument(inp: ourlang.FunctionParam) -> wasm.Param:
"""
Compile: function argument
"""
return (inp.name, type3(inp.type3), )
def import_(inp: ourlang.Function) -> wasm.Import:
"""
Compile: imported function
"""
assert inp.imported
return wasm.Import(
inp.imported,
inp.name,
inp.name,
[
function_argument(x)
for x in inp.posonlyargs
],
type3(inp.returns_type3)
)
def function(inp: ourlang.Function) -> wasm.Function:
"""
Compile: function
"""
assert not inp.imported
wgn = WasmGenerator()
if isinstance(inp, ourlang.StructConstructor):
_generate_struct_constructor(wgn, inp)
else:
for stat in inp.statements:
statement(wgn, stat)
return wasm.Function(
inp.name,
inp.name if inp.exported else None,
[
function_argument(x)
for x in inp.posonlyargs
],
[
(k, v.wasm_type(), )
for k, v in wgn.locals.items()
],
type3(inp.returns_type3),
wgn.statements
)
def module_data_u8(inp: int) -> bytes:
"""
Compile: module data, u8 value
# FIXME: All u8 values are stored as u32
"""
return struct.pack('<I', inp) # Should be 'B'
def module_data_u32(inp: int) -> bytes:
"""
Compile: module data, u32 value
"""
return struct.pack('<I', inp)
def module_data_u64(inp: int) -> bytes:
"""
Compile: module data, u64 value
"""
return struct.pack('<Q', inp)
def module_data_i8(inp: int) -> bytes:
"""
Compile: module data, i8 value
# FIXME: All i8 values are stored as i32
"""
return struct.pack('<i', inp) # Should be a 'b'
def module_data_i32(inp: int) -> bytes:
"""
Compile: module data, i32 value
"""
return struct.pack('<i', inp)
def module_data_i64(inp: int) -> bytes:
"""
Compile: module data, i64 value
"""
return struct.pack('<q', inp)
def module_data_f32(inp: float) -> bytes:
"""
Compile: module data, f32 value
"""
return struct.pack('<f', inp)
def module_data_f64(inp: float) -> bytes:
"""
Compile: module data, f64 value
"""
return struct.pack('<d', inp)
def module_data(inp: ourlang.ModuleData) -> bytes:
"""
Compile: module data
"""
unalloc_ptr = stdlib_alloc.UNALLOC_PTR
allocated_data = b''
for block in inp.blocks:
block.address = unalloc_ptr + 4 # 4 bytes for allocator header
data_list: List[bytes] = []
for constant in block.data:
assert constant.type3 is not None, TYPE3_ASSERTION_ERROR
if isinstance(constant, ourlang.ConstantMemoryStored) and block is not constant.data_block:
# It's stored in a different block
# We only need to store its address
# This happens for example when a tuple refers
# to a bytes constant
assert constant.data_block.address is not None, 'Referred memory not yet stored'
data_list.append(module_data_u32(constant.data_block.address))
continue
if constant.type3 == prelude.u8:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_u8(constant.value))
continue
if constant.type3 == prelude.u32:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_u32(constant.value))
continue
if constant.type3 == prelude.u64:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_u64(constant.value))
continue
if constant.type3 == prelude.i8:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_i8(constant.value))
continue
if constant.type3 == prelude.i32:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_i32(constant.value))
continue
if constant.type3 == prelude.i64:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, int)
data_list.append(module_data_i64(constant.value))
continue
if constant.type3 == prelude.f32:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, float)
data_list.append(module_data_f32(constant.value))
continue
if constant.type3 == prelude.f64:
assert isinstance(constant, ourlang.ConstantPrimitive)
assert isinstance(constant.value, float)
data_list.append(module_data_f64(constant.value))
continue
if constant.type3 == prelude.bytes_:
assert isinstance(constant, ourlang.ConstantBytes)
assert isinstance(constant.value, bytes)
data_list.append(module_data_u32(len(constant.value)))
data_list.append(constant.value)
continue
raise NotImplementedError(constant, constant.type3)
block_data = b''.join(data_list)
allocated_data += module_data_u32(len(block_data)) + block_data
unalloc_ptr += 4 + len(block_data)
return (
# Store that we've initialized the memory
module_data_u32(stdlib_alloc.IDENTIFIER)
# Store the first reserved i32
+ module_data_u32(0)
# Store the pointer towards the first free block
# In this case, 0 since we haven't freed any blocks yet
+ module_data_u32(0)
# Store the pointer towards the first unallocated block
# In this case the end of the stdlib.alloc header at the start
+ module_data_u32(unalloc_ptr)
# Store the actual data
+ allocated_data
)
def module(inp: ourlang.Module) -> wasm.Module:
"""
Compile: module
"""
result = wasm.Module()
result.memory.data = module_data(inp.data)
result.imports = [
import_(x)
for x in inp.functions.values()
if x.imported
]
result.functions = [
stdlib_alloc.__find_free_block__,
stdlib_alloc.__alloc__,
stdlib_types.__alloc_bytes__,
stdlib_types.__subscript_bytes__,
stdlib_types.__u32_ord_min__,
stdlib_types.__u64_ord_min__,
stdlib_types.__i32_ord_min__,
stdlib_types.__i64_ord_min__,
stdlib_types.__u32_ord_max__,
stdlib_types.__u64_ord_max__,
stdlib_types.__i32_ord_max__,
stdlib_types.__i64_ord_max__,
stdlib_types.__i32_intnum_abs__,
stdlib_types.__i64_intnum_abs__,
stdlib_types.__u32_pow2__,
stdlib_types.__u8_rotl__,
stdlib_types.__u8_rotr__,
] + [
function(x)
for x in inp.functions.values()
if not x.imported
]
return result
def _generate_struct_constructor(wgn: WasmGenerator, inp: ourlang.StructConstructor) -> None:
assert isinstance(inp.struct_type3.application, TypeApplication_Struct)
st_args = inp.struct_type3.application.arguments
tmp_var = wgn.temp_var_i32('struct_adr')
# Allocated the required amounts of bytes in memory
wgn.i32.const(calculate_alloc_size(inp.struct_type3))
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
# Store each member individually
for memname, mtyp3 in st_args:
mtyp: Optional[str]
if (prelude.InternalPassAsPointer, (mtyp3, )) in prelude.PRELUDE_TYPE_CLASS_INSTANCES_EXISTING:
mtyp = 'i32'
else:
mtyp = LOAD_STORE_TYPE_MAP.get(mtyp3.name)
if mtyp is None:
raise NotImplementedError(expression, inp, mtyp3)
wgn.local.get(tmp_var)
wgn.add_statement('local.get', f'${memname}')
wgn.add_statement(f'{mtyp}.store', 'offset=' + str(calculate_member_offset(
inp.struct_type3.name, st_args, memname
)))
# Return the allocated address
wgn.local.get(tmp_var)