phasm/phasm/compiler.py
Johan B.W. de Vries 05e7c356ea Fixes
- Moved the three ConstraintBase functions into one, as it was
  giving trouble calculating everything
- Gave each constraint the option to have a comment
- SameTypeConstraint now can have multiple arguments
- Redid the infinite loop detection
- Implemented some more basic operators
- Redid the division by zero for floats, it wasn't giving the
  right results.
- Also postponed the updating the AST type3 until afterwards
- Various linting fixes
2022-11-27 14:11:14 +01:00

773 lines
25 KiB
Python

"""
This module contains the code to convert parsed Ourlang into WebAssembly code
"""
from typing import List, Union
import struct
from . import codestyle
from . import ourlang
from .type3 import types as type3types
from . import wasm
from .stdlib import alloc as stdlib_alloc
from .stdlib import types as stdlib_types
from .wasmgenerator import Generator as WasmGenerator
LOAD_STORE_TYPE_MAP = {
'u8': 'i32', # Have to use an u32, since there is no native u8 type
'i32': 'i32',
'i64': 'i64',
'u32': 'i32',
'u64': 'i64',
'f32': 'f32',
'f64': 'f64',
}
def phasm_compile(inp: ourlang.Module) -> wasm.Module:
"""
Public method for compiling a parsed Phasm module into
a WebAssembly module
"""
return module(inp)
def type3(inp: type3types.Type3OrPlaceholder) -> wasm.WasmType:
"""
Compile: type
Types are used for example in WebAssembly function parameters
and return types.
"""
assert isinstance(inp, type3types.Type3), type3types.TYPE3_ASSERTION_ERROR
if inp is type3types.u8:
# WebAssembly has only support for 32 and 64 bits
# So we need to store more memory per byte
return wasm.WasmTypeInt32()
if inp is type3types.u32:
return wasm.WasmTypeInt32()
if inp is type3types.u64:
return wasm.WasmTypeInt64()
if inp is type3types.i32:
return wasm.WasmTypeInt32()
if inp is type3types.i64:
return wasm.WasmTypeInt64()
if inp is type3types.f32:
return wasm.WasmTypeFloat32()
if inp is type3types.f64:
return wasm.WasmTypeFloat64()
if isinstance(inp, type3types.StructType3):
# Structs and tuples are passed as pointer
# And pointers are i32
return wasm.WasmTypeInt32()
raise NotImplementedError(type3, inp)
# Operators that work for i32, i64, f32, f64
OPERATOR_MAP = {
'+': 'add',
'-': 'sub',
'*': 'mul',
'==': 'eq',
}
U8_OPERATOR_MAP = {
# Under the hood, this is an i32
# Implementing Right Shift XOR, OR, AND is fine since the 3 remaining
# bytes stay zero after this operation
'>>': 'shr_u',
'^': 'xor',
'|': 'or',
'&': 'and',
}
U32_OPERATOR_MAP = {
'<': 'lt_u',
'>': 'gt_u',
'<=': 'le_u',
'>=': 'ge_u',
'<<': 'shl',
'>>': 'shr_u',
'^': 'xor',
'|': 'or',
'&': 'and',
'/': 'div_u' # Division by zero is a trap and the program will panic
}
U64_OPERATOR_MAP = {
'<': 'lt_u',
'>': 'gt_u',
'<=': 'le_u',
'>=': 'ge_u',
'<<': 'shl',
'>>': 'shr_u',
'^': 'xor',
'|': 'or',
'&': 'and',
'/': 'div_u' # Division by zero is a trap and the program will panic
}
I32_OPERATOR_MAP = {
'<': 'lt_s',
'>': 'gt_s',
'<=': 'le_s',
'>=': 'ge_s',
'/': 'div_s' # Division by zero is a trap and the program will panic
}
I64_OPERATOR_MAP = {
'<': 'lt_s',
'>': 'gt_s',
'<=': 'le_s',
'>=': 'ge_s',
'/': 'div_s' # Division by zero is a trap and the program will panic
}
F32_OPERATOR_MAP = {
'/': 'div' # Division by zero is a trap and the program will panic
}
F64_OPERATOR_MAP = {
'/': 'div' # Division by zero is a trap and the program will panic
}
def expression(wgn: WasmGenerator, inp: ourlang.Expression) -> None:
"""
Compile: Any expression
"""
if isinstance(inp, ourlang.ConstantPrimitive):
assert isinstance(inp.type3, type3types.Type3), type3types.TYPE3_ASSERTION_ERROR
if inp.type3 is type3types.u8:
# No native u8 type - treat as i32, with caution
assert isinstance(inp.value, int)
wgn.i32.const(inp.value)
return
if inp.type3 is type3types.i32 or inp.type3 is type3types.u32:
assert isinstance(inp.value, int)
wgn.i32.const(inp.value)
return
if inp.type3 is type3types.i64 or inp.type3 is type3types.u64:
assert isinstance(inp.value, int)
wgn.i64.const(inp.value)
return
if inp.type3 is type3types.f32:
assert isinstance(inp.value, float)
wgn.f32.const(inp.value)
return
if inp.type3 is type3types.f64:
assert isinstance(inp.value, float)
wgn.f64.const(inp.value)
return
raise NotImplementedError(f'Constants with type {inp.type3}')
if isinstance(inp, ourlang.VariableReference):
if isinstance(inp.variable, ourlang.FunctionParam):
wgn.add_statement('local.get', '${}'.format(inp.variable.name))
return
if isinstance(inp.variable, ourlang.ModuleConstantDef):
assert isinstance(inp.type3, type3types.Type3), type3types.TYPE3_ASSERTION_ERROR
# TODO: Broken after new type system
# if isinstance(inp.type, typing.TypeTuple):
# assert isinstance(inp.definition.constant, ourlang.ConstantTuple)
# assert inp.definition.data_block is not None, 'Combined values are memory stored'
# assert inp.definition.data_block.address is not None, 'Value not allocated'
# wgn.i32.const(inp.definition.data_block.address)
# return
#
# if tc_prim.primitive == typing.TypeConstraintPrimitive.Primitive.STATIC_ARRAY:
# assert inp.variable.data_block is not None, 'Combined values are memory stored'
# assert inp.variable.data_block.address is not None, 'Value not allocated'
# wgn.i32.const(inp.variable.data_block.address)
# return
assert inp.variable.data_block is None, 'Primitives are not memory stored'
expression(wgn, inp.variable.constant)
return
raise NotImplementedError(expression, inp.variable)
if isinstance(inp, ourlang.BinaryOp):
expression(wgn, inp.left)
expression(wgn, inp.right)
assert isinstance(inp.type3, type3types.Type3), type3types.TYPE3_ASSERTION_ERROR
# FIXME: Re-implement build-in operators
if inp.type3 is type3types.u8:
if operator := U8_OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i32.{operator}')
return
if inp.type3 is type3types.u32:
if operator := OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i32.{operator}')
return
if operator := U32_OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i32.{operator}')
return
if inp.type3 is type3types.u64:
if operator := OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i64.{operator}')
return
if operator := U64_OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i64.{operator}')
return
if inp.type3 is type3types.i32:
if operator := OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i32.{operator}')
return
if operator := I32_OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i32.{operator}')
return
if inp.type3 is type3types.i64:
if operator := OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i64.{operator}')
return
if operator := I64_OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'i64.{operator}')
return
if inp.type3 is type3types.f32:
if operator := OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'f32.{operator}')
return
if operator := F32_OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'f32.{operator}')
return
if inp.type3 is type3types.f64:
if operator := OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'f64.{operator}')
return
if operator := F64_OPERATOR_MAP.get(inp.operator, None):
wgn.add_statement(f'f64.{operator}')
return
raise NotImplementedError(expression, inp.type3, inp.operator)
if isinstance(inp, ourlang.UnaryOp):
expression(wgn, inp.right)
assert isinstance(inp.type3, type3types.Type3), type3types.TYPE3_ASSERTION_ERROR
if inp.type3 is type3types.f32:
if inp.operator in ourlang.WEBASSEMBLY_BUILTIN_FLOAT_OPS:
wgn.add_statement(f'f32.{inp.operator}')
return
if inp.type3 is type3types.f64:
if inp.operator in ourlang.WEBASSEMBLY_BUILTIN_FLOAT_OPS:
wgn.add_statement(f'f64.{inp.operator}')
return
# TODO: Broken after new type system
# if isinstance(inp.type, typing.TypeInt32):
# if inp.operator == 'len':
# if isinstance(inp.right.type, typing.TypeBytes):
# wgn.i32.load()
# return
# if inp.operator == 'cast':
# if isinstance(inp.type, typing.TypeUInt32) and isinstance(inp.right.type, typing.TypeUInt8):
# # Nothing to do, you can use an u8 value as a u32 no problem
# return
raise NotImplementedError(expression, inp.type3, inp.operator)
if isinstance(inp, ourlang.FunctionCall):
for arg in inp.arguments:
expression(wgn, arg)
wgn.add_statement('call', '${}'.format(inp.function.name))
return
if isinstance(inp, ourlang.Subscript):
# assert inp.varref.type3 is not None, typing.ASSERTION_ERROR
#
# assert inp.varref.type_var is not None, typing.ASSERTION_ERROR
# tc_type = inp.varref.type_var.get_type()
# if tc_type is None:
# raise NotImplementedError(expression, inp, inp.varref.type_var)
# if tc_prim.primitive == typing.TypeConstraintPrimitive.Primitive.STATIC_ARRAY:
# if not isinstance(inp.index, ourlang.ConstantPrimitive):
# raise NotImplementedError(expression, inp, inp.index)
# if not isinstance(inp.index.value, int):
# raise NotImplementedError(expression, inp, inp.index.value)
#
# assert inp.type_var is not None, typing.ASSERTION_ERROR
# mtyp = typing.simplify(inp.type_var)
# if mtyp is None:
# raise NotImplementedError(expression, inp, inp.varref.type_var, mtyp)
#
# if mtyp == 'u8':
# # u8 operations are done using i32, since WASM does not have u8 operations
# mtyp = 'i32'
# elif mtyp == 'u32':
# # u32 operations are done using i32, using _u operations
# mtyp = 'i32'
# elif mtyp == 'u64':
# # u64 operations are done using i64, using _u operations
# mtyp = 'i64'
#
# tc_subs = inp.varref.type_var.get_constraint(typing.TypeConstraintSubscript)
# if tc_subs is None:
# raise NotImplementedError(expression, inp, inp.varref.type_var)
#
# assert 0 < len(tc_subs.members)
# tc_bits = tc_subs.members[0].get_constraint(typing.TypeConstraintBitWidth)
# if tc_bits is None or len(tc_bits.oneof) > 1:
# raise NotImplementedError(expression, inp, inp.varref.type_var)
#
# bitwidth = next(iter(tc_bits.oneof))
# if bitwidth % 8 != 0:
# raise NotImplementedError(expression, inp, inp.varref.type_var)
#
# expression(wgn, inp.varref)
# wgn.add_statement(f'{mtyp}.load', 'offset=' + str(bitwidth // 8 * inp.index.value))
# return
raise NotImplementedError(expression, inp, inp.varref.type3)
# TODO: Broken after new type system
# if isinstance(inp, ourlang.AccessBytesIndex):
# if not isinstance(inp.type, typing.TypeUInt8):
# raise NotImplementedError(inp, inp.type)
#
# expression(wgn, inp.varref)
# expression(wgn, inp.index)
# wgn.call(stdlib_types.__subscript_bytes__)
# return
if isinstance(inp, ourlang.AccessStructMember):
mtyp = LOAD_STORE_TYPE_MAP.get(inp.struct_type3.members[inp.member].name)
if mtyp is None:
# In the future might extend this by having structs or tuples
# as members of struct or tuples
raise NotImplementedError(expression, inp, inp.struct_type3)
expression(wgn, inp.varref)
wgn.add_statement(f'{mtyp}.load', 'offset=' + str(_calculate_member_offset(
inp.struct_type3, inp.member
)))
return
# if isinstance(inp, ourlang.AccessTupleMember):
# mtyp = LOAD_STORE_TYPE_MAP.get(inp.member.type.__class__)
# if mtyp is None:
# # In the future might extend this by having structs or tuples
# # as members of struct or tuples
# raise NotImplementedError(expression, inp, inp.member)
#
# expression(wgn, inp.varref)
# wgn.add_statement(f'{mtyp}.load', 'offset=' + str(inp.member.offset))
# return
#
# if isinstance(inp, ourlang.AccessStaticArrayMember):
# mtyp = LOAD_STORE_TYPE_MAP.get(inp.static_array.member_type.__class__)
# if mtyp is None:
# # In the future might extend this by having structs or tuples
# # as members of static arrays
# raise NotImplementedError(expression, inp, inp.member)
#
# expression(wgn, inp.varref)
# expression(wgn, inp.member)
# wgn.i32.const(inp.static_array.member_type.alloc_size())
# wgn.i32.mul()
# wgn.i32.add()
# wgn.add_statement(f'{mtyp}.load')
# return
if isinstance(inp, ourlang.Fold):
expression_fold(wgn, inp)
return
raise NotImplementedError(expression, inp)
def expression_fold(wgn: WasmGenerator, inp: ourlang.Fold) -> None:
"""
Compile: Fold expression
"""
assert isinstance(inp.type3, type3types.Type3), type3types.TYPE3_ASSERTION_ERROR
raise NotImplementedError('TODO: Broken after new type system')
if inp.iter.type.__class__.__name__ != 'TypeBytes':
raise NotImplementedError(expression, inp, inp.iter.type)
wgn.add_statement('nop', comment='acu :: u8')
acu_var = wgn.temp_var_u8(f'fold_{codestyle.type3(inp.type3)}_acu')
wgn.add_statement('nop', comment='adr :: bytes*')
adr_var = wgn.temp_var_i32('fold_i32_adr')
wgn.add_statement('nop', comment='len :: i32')
len_var = wgn.temp_var_i32('fold_i32_len')
wgn.add_statement('nop', comment='acu = base')
expression(wgn, inp.base)
wgn.local.set(acu_var)
wgn.add_statement('nop', comment='adr = adr(iter)')
expression(wgn, inp.iter)
wgn.local.set(adr_var)
wgn.add_statement('nop', comment='len = len(iter)')
wgn.local.get(adr_var)
wgn.i32.load()
wgn.local.set(len_var)
wgn.add_statement('nop', comment='i = 0')
idx_var = wgn.temp_var_i32(f'fold_{codestyle.type3(inp.type3)}_idx')
wgn.i32.const(0)
wgn.local.set(idx_var)
wgn.add_statement('nop', comment='if i < len')
wgn.local.get(idx_var)
wgn.local.get(len_var)
wgn.i32.lt_u()
with wgn.if_():
# From here on, adr_var is the address of byte we're referencing
# This is akin to calling stdlib_types.__subscript_bytes__
# But since we already know we are inside of bounds,
# can just bypass it and load the memory directly.
wgn.local.get(adr_var)
wgn.i32.const(3) # Bytes header -1, since we do a +1 every loop
wgn.i32.add()
wgn.local.set(adr_var)
wgn.add_statement('nop', comment='while True')
with wgn.loop():
wgn.add_statement('nop', comment='acu = func(acu, iter[i])')
wgn.local.get(acu_var)
# Get the next byte, write back the address
wgn.local.get(adr_var)
wgn.i32.const(1)
wgn.i32.add()
wgn.local.tee(adr_var)
wgn.i32.load8_u()
wgn.add_statement('call', f'${inp.func.name}')
wgn.local.set(acu_var)
wgn.add_statement('nop', comment='i = i + 1')
wgn.local.get(idx_var)
wgn.i32.const(1)
wgn.i32.add()
wgn.local.set(idx_var)
wgn.add_statement('nop', comment='if i >= len: break')
wgn.local.get(idx_var)
wgn.local.get(len_var)
wgn.i32.lt_u()
wgn.br_if(0)
# return acu
wgn.local.get(acu_var)
def statement_return(wgn: WasmGenerator, inp: ourlang.StatementReturn) -> None:
"""
Compile: Return statement
"""
expression(wgn, inp.value)
wgn.return_()
def statement_if(wgn: WasmGenerator, inp: ourlang.StatementIf) -> None:
"""
Compile: If statement
"""
expression(wgn, inp.test)
with wgn.if_():
for stat in inp.statements:
statement(wgn, stat)
if inp.else_statements:
raise NotImplementedError
# yield wasm.Statement('else')
# for stat in inp.else_statements:
# statement(wgn, stat)
def statement(wgn: WasmGenerator, inp: ourlang.Statement) -> None:
"""
Compile: any statement
"""
if isinstance(inp, ourlang.StatementReturn):
statement_return(wgn, inp)
return
if isinstance(inp, ourlang.StatementIf):
statement_if(wgn, inp)
return
if isinstance(inp, ourlang.StatementPass):
return
raise NotImplementedError(statement, inp)
def function_argument(inp: ourlang.FunctionParam) -> wasm.Param:
"""
Compile: function argument
"""
return (inp.name, type3(inp.type3), )
def import_(inp: ourlang.Function) -> wasm.Import:
"""
Compile: imported function
"""
assert inp.imported
return wasm.Import(
'imports',
inp.name,
inp.name,
[
function_argument(x)
for x in inp.posonlyargs
],
type3(inp.returns_type3)
)
def function(inp: ourlang.Function) -> wasm.Function:
"""
Compile: function
"""
assert not inp.imported
wgn = WasmGenerator()
if False: # TODO: isinstance(inp, ourlang.TupleConstructor):
pass # _generate_tuple_constructor(wgn, inp)
elif isinstance(inp, ourlang.StructConstructor):
_generate_struct_constructor(wgn, inp)
else:
for stat in inp.statements:
statement(wgn, stat)
return wasm.Function(
inp.name,
inp.name if inp.exported else None,
[
function_argument(x)
for x in inp.posonlyargs
],
[
(k, v.wasm_type(), )
for k, v in wgn.locals.items()
],
type3(inp.returns_type3),
wgn.statements
)
def module_data_u8(inp: int) -> bytes:
"""
Compile: module data, u8 value
# FIXME: All u8 values are stored as u32
"""
return struct.pack('<i', inp) # Should be B
def module_data_u32(inp: int) -> bytes:
"""
Compile: module data, u32 value
"""
return struct.pack('<I', inp)
def module_data_u64(inp: int) -> bytes:
"""
Compile: module data, u64 value
"""
return struct.pack('<Q', inp)
def module_data_i32(inp: int) -> bytes:
"""
Compile: module data, i32 value
"""
return struct.pack('<i', inp)
def module_data_i64(inp: int) -> bytes:
"""
Compile: module data, i64 value
"""
return struct.pack('<q', inp)
def module_data_f32(inp: float) -> bytes:
"""
Compile: module data, f32 value
"""
return struct.pack('<f', inp)
def module_data_f64(inp: float) -> bytes:
"""
Compile: module data, f64 value
"""
return struct.pack('<d', inp)
def module_data(inp: ourlang.ModuleData) -> bytes:
"""
Compile: module data
"""
unalloc_ptr = stdlib_alloc.UNALLOC_PTR
allocated_data = b''
for block in inp.blocks:
block.address = unalloc_ptr + 4 # 4 bytes for allocator header
data_list: List[bytes] = []
for constant in block.data:
assert constant.type3 is not None
if constant.type3 is type3types.u8:
assert isinstance(constant.value, int)
data_list.append(module_data_u8(constant.value))
continue
if constant.type3 is type3types.u32:
assert isinstance(constant.value, int)
data_list.append(module_data_u32(constant.value))
continue
if constant.type3 is type3types.u64:
assert isinstance(constant.value, int)
data_list.append(module_data_u64(constant.value))
continue
if constant.type3 is type3types.i32:
assert isinstance(constant.value, int)
data_list.append(module_data_i32(constant.value))
continue
if constant.type3 is type3types.i64:
assert isinstance(constant.value, int)
data_list.append(module_data_i64(constant.value))
continue
if constant.type3 is type3types.f32:
assert isinstance(constant.value, float)
data_list.append(module_data_f32(constant.value))
continue
if constant.type3 is type3types.f64:
assert isinstance(constant.value, float)
data_list.append(module_data_f64(constant.value))
continue
raise NotImplementedError(constant, constant.type3, constant.value)
block_data = b''.join(data_list)
allocated_data += module_data_u32(len(block_data)) + block_data
unalloc_ptr += 4 + len(block_data)
return (
# Store that we've initialized the memory
module_data_u32(stdlib_alloc.IDENTIFIER)
# Store the first reserved i32
+ module_data_u32(0)
# Store the pointer towards the first free block
# In this case, 0 since we haven't freed any blocks yet
+ module_data_u32(0)
# Store the pointer towards the first unallocated block
# In this case the end of the stdlib.alloc header at the start
+ module_data_u32(unalloc_ptr)
# Store the actual data
+ allocated_data
)
def module(inp: ourlang.Module) -> wasm.Module:
"""
Compile: module
"""
result = wasm.Module()
result.memory.data = module_data(inp.data)
result.imports = [
import_(x)
for x in inp.functions.values()
if x.imported
]
result.functions = [
stdlib_alloc.__find_free_block__,
stdlib_alloc.__alloc__,
stdlib_types.__alloc_bytes__,
stdlib_types.__subscript_bytes__,
] + [
function(x)
for x in inp.functions.values()
if not x.imported
]
return result
# TODO: Broken after new type system
# def _generate_tuple_constructor(wgn: WasmGenerator, inp: ourlang.TupleConstructor) -> None:
# tmp_var = wgn.temp_var_i32('tuple_adr')
#
# # Allocated the required amounts of bytes in memory
# wgn.i32.const(inp.tuple.alloc_size())
# wgn.call(stdlib_alloc.__alloc__)
# wgn.local.set(tmp_var)
#
# # Store each member individually
# for member in inp.tuple.members:
# mtyp = LOAD_STORE_TYPE_MAP.get(member.type.__class__)
# if mtyp is None:
# # In the future might extend this by having structs or tuples
# # as members of struct or tuples
# raise NotImplementedError(expression, inp, member)
#
# wgn.local.get(tmp_var)
# wgn.add_statement('local.get', f'$arg{member.idx}')
# wgn.add_statement(f'{mtyp}.store', 'offset=' + str(member.offset))
#
# # Return the allocated address
# wgn.local.get(tmp_var)
def _generate_struct_constructor(wgn: WasmGenerator, inp: ourlang.StructConstructor) -> None:
tmp_var = wgn.temp_var_i32('struct_adr')
# Allocated the required amounts of bytes in memory
wgn.i32.const(_calculate_alloc_size(inp.struct_type3))
wgn.call(stdlib_alloc.__alloc__)
wgn.local.set(tmp_var)
# Store each member individually
for memname, mtyp3 in inp.struct_type3.members.items():
mtyp = LOAD_STORE_TYPE_MAP.get(mtyp3.name)
if mtyp is None:
# In the future might extend this by having structs or tuples
# as members of struct or tuples
raise NotImplementedError(expression, inp, mtyp3)
wgn.local.get(tmp_var)
wgn.add_statement('local.get', f'${memname}')
wgn.add_statement(f'{mtyp}.store', 'offset=' + str(_calculate_member_offset(
inp.struct_type3, memname
)))
# Return the allocated address
wgn.local.get(tmp_var)
def _calculate_alloc_size(typ: Union[type3types.StructType3, type3types.Type3]) -> int:
return 0 # FIXME: Stub
def _calculate_member_offset(struct_type3: type3types.StructType3, member: str) -> int:
return 0 # FIXME: Stub