From c4ee2ab3dc52c638ada69302906899a04ff2f449 Mon Sep 17 00:00:00 2001 From: "Johan B.W. de Vries" Date: Wed, 17 Aug 2022 21:07:33 +0200 Subject: [PATCH] Memory initialization is now done during compilation Also, the user can now define tuple module constants --- TODO.md | 2 + examples/buffer.html | 3 - examples/fold.html | 3 - phasm/codestyle.py | 6 ++ phasm/compiler.py | 83 ++++++++++++++++++++++- phasm/ourlang.py | 48 +++++++++++-- phasm/parser.py | 56 ++++++++++++--- phasm/stdlib/alloc.py | 37 +--------- tests/integration/helpers.py | 2 - tests/integration/test_constants.py | 37 +++++++++- tests/integration/test_static_checking.py | 27 ++++++++ tests/integration/test_stdlib_alloc.py | 46 ------------- 12 files changed, 243 insertions(+), 107 deletions(-) diff --git a/TODO.md b/TODO.md index 29f8e80..41ae3da 100644 --- a/TODO.md +++ b/TODO.md @@ -2,3 +2,5 @@ - Implement a trace() builtin for debugging - Implement a proper type matching / checking system +- Check if we can use DataView in the Javascript examples, e.g. with setUint32 +- Storing u8 in memory still claims 32 bits (since that's what you need in local variables). However, using load8_u / loadu_s we can optimize this. diff --git a/examples/buffer.html b/examples/buffer.html index 3264f9b..cf4e442 100644 --- a/examples/buffer.html +++ b/examples/buffer.html @@ -30,9 +30,6 @@ WebAssembly.instantiateStreaming(fetch('buffer.wasm'), importObject) // Allocate room within the memory of the WebAssembly class let size = 8; - stdlib_alloc___init__ = app.instance.exports['stdlib.alloc.__init__'] - stdlib_alloc___init__() - stdlib_types___alloc_bytes__ = app.instance.exports['stdlib.types.__alloc_bytes__'] let offset = stdlib_types___alloc_bytes__(size) diff --git a/examples/fold.html b/examples/fold.html index 0007e6f..d47e580 100644 --- a/examples/fold.html +++ b/examples/fold.html @@ -31,9 +31,6 @@ function log(txt) WebAssembly.instantiateStreaming(fetch('fold.wasm'), importObject) .then(app => { - stdlib_alloc___init__ = app.instance.exports['stdlib.alloc.__init__'] - stdlib_alloc___init__() - stdlib_types___alloc_bytes__ = app.instance.exports['stdlib.types.__alloc_bytes__'] let offset0 = stdlib_types___alloc_bytes__(0); diff --git a/phasm/codestyle.py b/phasm/codestyle.py index 4d19bd6..a09b524 100644 --- a/phasm/codestyle.py +++ b/phasm/codestyle.py @@ -94,6 +94,12 @@ def expression(inp: ourlang.Expression) -> str: # could not fit in the given float type return str(inp.value) + if isinstance(inp, ourlang.ConstantTuple): + return '(' + ', '.join( + expression(x) + for x in inp.value + ) + ', )' + if isinstance(inp, ourlang.VariableReference): return str(inp.name) diff --git a/phasm/compiler.py b/phasm/compiler.py index 9490ad4..1f14aae 100644 --- a/phasm/compiler.py +++ b/phasm/compiler.py @@ -1,6 +1,8 @@ """ This module contains the code to convert parsed Ourlang into WebAssembly code """ +import struct + from . import codestyle from . import ourlang from . import typing @@ -276,6 +278,15 @@ def expression(wgn: WasmGenerator, inp: ourlang.Expression) -> None: return if isinstance(inp, ourlang.ModuleConstantReference): + if isinstance(inp.type, typing.TypeTuple): + assert isinstance(inp.definition.constant, ourlang.ConstantTuple) + assert inp.definition.data_block is not None, 'Combined values are memory stored' + assert inp.definition.data_block.address is not None, 'Value not allocated' + wgn.i32.const(inp.definition.data_block.address) + return + + assert inp.definition.data_block is None, 'Primitives are not memory stored' + mtyp = LOAD_STORE_TYPE_MAP.get(inp.type.__class__) if mtyp is None: # In the future might extend this by having structs or tuples @@ -448,12 +459,83 @@ def function(inp: ourlang.Function) -> wasm.Function: wgn.statements ) +def module_data_u8(inp: int) -> bytes: + """ + Compile: module data, u8 value + + # FIXME: All u8 values are stored as u32 + """ + return struct.pack(' bytes: + """ + Compile: module data, u32 value + """ + return struct.pack(' bytes: + """ + Compile: module data, u64 value + """ + return struct.pack(' bytes: + """ + Compile: module data + """ + unalloc_ptr = stdlib_alloc.UNALLOC_PTR + + allocated_data = b'' + + for block in inp.blocks: + block.address = unalloc_ptr + 4 # 4 bytes for allocator header + + data_list = [] + + for constant in block.data: + if isinstance(constant, ourlang.ConstantUInt8): + data_list.append(module_data_u8(constant.value)) + continue + + if isinstance(constant, ourlang.ConstantUInt32): + data_list.append(module_data_u32(constant.value)) + continue + + if isinstance(constant, ourlang.ConstantUInt64): + data_list.append(module_data_u64(constant.value)) + continue + + raise NotImplementedError(constant) + + block_data = b''.join(data_list) + + allocated_data += module_data_u32(len(block_data)) + block_data + + unalloc_ptr += 4 + len(block_data) + + return ( + # Store that we've initialized the memory + module_data_u32(stdlib_alloc.IDENTIFIER) + # Store the first reserved i32 + + module_data_u32(0) + # Store the pointer towards the first free block + # In this case, 0 since we haven't freed any blocks yet + + module_data_u32(0) + # Store the pointer towards the first unallocated block + # In this case the end of the stdlib.alloc header at the start + + module_data_u32(unalloc_ptr) + # Store the actual data + + allocated_data + ) + def module(inp: ourlang.Module) -> wasm.Module: """ Compile: module """ result = wasm.Module() + result.memory.data = module_data(inp.data) + result.imports = [ import_(x) for x in inp.functions.values() @@ -461,7 +543,6 @@ def module(inp: ourlang.Module) -> wasm.Module: ] result.functions = [ - stdlib_alloc.__init__, stdlib_alloc.__find_free_block__, stdlib_alloc.__alloc__, stdlib_types.__alloc_bytes__, diff --git a/phasm/ourlang.py b/phasm/ourlang.py index 4e245a2..d7cfffe 100644 --- a/phasm/ourlang.py +++ b/phasm/ourlang.py @@ -1,7 +1,7 @@ """ Contains the syntax tree for ourlang """ -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Optional import enum @@ -123,6 +123,18 @@ class ConstantFloat64(Constant): super().__init__(type_) self.value = value +class ConstantTuple(Constant): + """ + A Tuple constant value expression within a statement + """ + __slots__ = ('value', ) + + value: List[Constant] + + def __init__(self, type_: TypeTuple, value: List[Constant]) -> None: + super().__init__(type_) + self.value = value + class VariableReference(Expression): """ An variable reference expression within a statement @@ -376,25 +388,52 @@ class ModuleConstantDef: """ A constant definition within a module """ - __slots__ = ('name', 'lineno', 'type', 'constant', ) + __slots__ = ('name', 'lineno', 'type', 'constant', 'data_block', ) name: str lineno: int type: TypeBase constant: Constant + data_block: Optional['ModuleDataBlock'] - def __init__(self, name: str, lineno: int, type_: TypeBase, constant: Constant) -> None: + def __init__(self, name: str, lineno: int, type_: TypeBase, constant: Constant, data_block: Optional['ModuleDataBlock']) -> None: self.name = name self.lineno = lineno self.type = type_ self.constant = constant + self.data_block = data_block + +class ModuleDataBlock: + """ + A single allocated block for module data + """ + __slots__ = ('data', 'address', ) + + data: List[Constant] + address: Optional[int] + + def __init__(self, data: List[Constant]) -> None: + self.data = data + self.address = None + +class ModuleData: + """ + The data for when a module is loaded into memory + """ + __slots__ = ('blocks', ) + + blocks: List[ModuleDataBlock] + + def __init__(self) -> None: + self.blocks = [] class Module: """ A module is a file and consists of functions """ - __slots__ = ('types', 'structs', 'constant_defs', 'functions',) + __slots__ = ('data', 'types', 'structs', 'constant_defs', 'functions',) + data: ModuleData types: Dict[str, TypeBase] structs: Dict[str, TypeStruct] constant_defs: Dict[str, ModuleConstantDef] @@ -412,6 +451,7 @@ class Module: 'f64': TypeFloat64(), 'bytes': TypeBytes(), } + self.data = ModuleData() self.structs = {} self.constant_defs = {} self.functions = {} diff --git a/phasm/parser.py b/phasm/parser.py index 30d2b26..a9f2f63 100644 --- a/phasm/parser.py +++ b/phasm/parser.py @@ -26,7 +26,7 @@ from .exceptions import StaticError from .ourlang import ( WEBASSEMBLY_BUILDIN_FLOAT_OPS, - Module, + Module, ModuleDataBlock, Function, Expression, @@ -35,6 +35,8 @@ from .ourlang import ( Constant, ConstantFloat32, ConstantFloat64, ConstantInt32, ConstantInt64, ConstantUInt8, ConstantUInt32, ConstantUInt64, + ConstantTuple, + FunctionCall, StructConstructor, TupleConstructor, UnaryOp, VariableReference, @@ -203,19 +205,51 @@ class OurVisitor: _raise_static_error(node, 'Must be name') if not isinstance(node.target.ctx, ast.Store): _raise_static_error(node, 'Must be load context') - if not isinstance(node.value, ast.Constant): - _raise_static_error(node, 'Must be constant') exp_type = self.visit_type(module, node.annotation) - constant = ModuleConstantDef( - node.target.id, - node.lineno, - exp_type, - self.visit_Module_Constant(module, exp_type, node.value) - ) + if isinstance(exp_type, TypeInt32): + if not isinstance(node.value, ast.Constant): + _raise_static_error(node, 'Must be constant') - return constant + constant = ModuleConstantDef( + node.target.id, + node.lineno, + exp_type, + self.visit_Module_Constant(module, exp_type, node.value), + None, + ) + return constant + + if isinstance(exp_type, TypeTuple): + if not isinstance(node.value, ast.Tuple): + _raise_static_error(node, 'Must be tuple') + + if len(exp_type.members) != len(node.value.elts): + _raise_static_error(node, 'Invalid number of tuple values') + + tuple_data = [ + self.visit_Module_Constant(module, mem.type, arg_node) + for arg_node, mem in zip(node.value.elts, exp_type.members) + if isinstance(arg_node, ast.Constant) + ] + if len(exp_type.members) != len(tuple_data): + _raise_static_error(node, 'Tuple arguments must be constants') + + # Allocate the data + data_block = ModuleDataBlock(tuple_data) + module.data.blocks.append(data_block) + + # Then return the constant as a pointer + return ModuleConstantDef( + node.target.id, + node.lineno, + exp_type, + ConstantTuple(exp_type, tuple_data), + data_block, + ) + + raise NotImplementedError(f'{node} on Module AnnAssign') def visit_Module_stmt(self, module: Module, node: ast.stmt) -> None: if isinstance(node, ast.FunctionDef): @@ -372,7 +406,7 @@ class OurVisitor: if node.id in module.constant_defs: cdef = module.constant_defs[node.id] if exp_type != cdef.type: - _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {node.id} is actually {codestyle.type_(act_type)}') + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {node.id} is actually {codestyle.type_(cdef.type)}') return ModuleConstantReference(exp_type, cdef) diff --git a/phasm/stdlib/alloc.py b/phasm/stdlib/alloc.py index a803545..2761bfb 100644 --- a/phasm/stdlib/alloc.py +++ b/phasm/stdlib/alloc.py @@ -12,42 +12,7 @@ ADR_UNALLOC_PTR = ADR_FREE_BLOCK_PTR + 4 UNALLOC_PTR = ADR_UNALLOC_PTR + 4 -@func_wrapper() -def __init__(g: Generator) -> None: - """ - Initializes the memory so we can allocate it - """ - - # Check if the memory is already initialized - g.i32.const(ADR_IDENTIFIER) - g.i32.load() - g.i32.const(IDENTIFIER) - g.i32.eq() - with g.if_(): - # Already initialized, return without any changes - g.return_() - - # Store the first reserved i32 - g.i32.const(ADR_RESERVED0) - g.i32.const(0) - g.i32.store() - - # Store the pointer towards the first free block - # In this case, 0 since we haven't freed any blocks yet - g.i32.const(ADR_FREE_BLOCK_PTR) - g.i32.const(0) - g.i32.store() - - # Store the pointer towards the first unallocated block - # In this case the end of the stdlib.alloc header at the start - g.i32.const(ADR_UNALLOC_PTR) - g.i32.const(UNALLOC_PTR) - g.i32.store() - - # Store that we've initialized the memory - g.i32.const(0) - g.i32.const(IDENTIFIER) - g.i32.store() +# For memory initialization see phasm.compiler.module_data @func_wrapper(exported=False) def __find_free_block__(g: Generator, alloc_size: i32) -> i32: diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index 2e6b706..2858472 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -88,8 +88,6 @@ class Suite: # Check if code formatting works assert self.code_py == '\n' + phasm_render(runner.phasm_ast) # \n for formatting in tests - runner.call('stdlib.alloc.__init__') - wasm_args = [] if args: write_header(sys.stderr, 'Memory (pre alloc)') diff --git a/tests/integration/test_constants.py b/tests/integration/test_constants.py index 1593915..36626b1 100644 --- a/tests/integration/test_constants.py +++ b/tests/integration/test_constants.py @@ -3,7 +3,7 @@ import pytest from .helpers import Suite @pytest.mark.integration_test -def test_return(): +def test_i32(): code_py = """ CONSTANT: i32 = 13 @@ -15,3 +15,38 @@ def testEntry() -> i32: result = Suite(code_py).run_code() assert 65 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['u8', 'u32', 'u64', ]) +def test_tuple_1(type_): + code_py = f""" +CONSTANT: ({type_}, ) = (65, ) + +@exported +def testEntry() -> {type_}: + return helper(CONSTANT) + +def helper(vector: ({type_}, )) -> {type_}: + return vector[0] +""" + + result = Suite(code_py).run_code() + + assert 65 == result.returned_value + +@pytest.mark.integration_test +def test_tuple_6(): + code_py = """ +CONSTANT: (u8, u8, u32, u32, u64, u64, ) = (11, 22, 3333, 4444, 555555, 666666, ) + +@exported +def testEntry() -> u32: + return helper(CONSTANT) + +def helper(vector: (u8, u8, u32, u32, u64, u64, )) -> u32: + return vector[2] +""" + + result = Suite(code_py).run_code() + + assert 3333 == result.returned_value diff --git a/tests/integration/test_static_checking.py b/tests/integration/test_static_checking.py index 69d0724..ed25023 100644 --- a/tests/integration/test_static_checking.py +++ b/tests/integration/test_static_checking.py @@ -53,3 +53,30 @@ def testEntry() -> (i32, i32, ): with pytest.raises(StaticError, match=f'Static error on line 7: Expected \\(i32, i32, \\), helper actually returns {type_}'): phasm_parse(code_py) + +@pytest.mark.integration_test +def test_tuple_constant_too_few_values(): + code_py = """ +CONSTANT: (u32, u8, u8, ) = (24, 57, ) +""" + + with pytest.raises(StaticError, match=f'Static error on line 2: Invalid number of tuple values'): + phasm_parse(code_py) + +@pytest.mark.integration_test +def test_tuple_constant_too_many_values(): + code_py = """ +CONSTANT: (u32, u8, u8, ) = (24, 57, 1, 1, ) +""" + + with pytest.raises(StaticError, match=f'Static error on line 2: Invalid number of tuple values'): + phasm_parse(code_py) + +@pytest.mark.integration_test +def test_tuple_constant_type_mismatch(): + code_py = """ +CONSTANT: (u32, u8, u8, ) = (24, 4000, 1, ) +""" + + with pytest.raises(StaticError, match=f'Static error on line 2: Integer value out of range; expected 0..255, actual 4000'): + phasm_parse(code_py) diff --git a/tests/integration/test_stdlib_alloc.py b/tests/integration/test_stdlib_alloc.py index a9faf35..da8ccea 100644 --- a/tests/integration/test_stdlib_alloc.py +++ b/tests/integration/test_stdlib_alloc.py @@ -22,51 +22,6 @@ def setup_interpreter(phash_code: str) -> Runner: return runner -@pytest.mark.integration_test -def test___init__(): - code_py = """ -@exported -def testEntry() -> u8: - return 13 -""" - - runner = setup_interpreter(code_py) - - # Garbage in the memory so we can test for it - runner.interpreter_write_memory(0, range(128)) - - write_header(sys.stderr, 'Memory (pre run)') - runner.interpreter_dump_memory(sys.stderr) - - runner.call('stdlib.alloc.__init__') - - write_header(sys.stderr, 'Memory (post run)') - runner.interpreter_dump_memory(sys.stderr) - - assert ( - b'\xC0\xA1\x00\x00' - b'\x00\x00\x00\x00' - b'\x00\x00\x00\x00' - b'\x10\x00\x00\x00' - b'\x10\x11\x12\x13' # Untouched because unused - ) == runner.interpreter_read_memory(0, 20) - -@pytest.mark.integration_test -def test___alloc___no_init(): - code_py = """ -@exported -def testEntry() -> u8: - return 13 -""" - - runner = setup_interpreter(code_py) - - write_header(sys.stderr, 'Memory (pre run)') - runner.interpreter_dump_memory(sys.stderr) - - with pytest.raises(Exception, match='unreachable'): - runner.call('stdlib.alloc.__alloc__', 32) - @pytest.mark.integration_test def test___alloc___ok(): code_py = """ @@ -80,7 +35,6 @@ def testEntry() -> u8: write_header(sys.stderr, 'Memory (pre run)') runner.interpreter_dump_memory(sys.stderr) - runner.call('stdlib.alloc.__init__') offset0 = runner.call('stdlib.alloc.__alloc__', 32) offset1 = runner.call('stdlib.alloc.__alloc__', 32) offset2 = runner.call('stdlib.alloc.__alloc__', 32)