diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b8a823a --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/*.wasm +/*.wat +/.coverage +/venv + +__pycache__ diff --git a/Makefile b/Makefile index 58bd1f5..ad186c2 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,44 @@ -%.wat: %.py compile.py - python3.8 compile.py $< > $@ +WABT_DIR := /home/johan/Sources/github.com/WebAssembly/wabt + +WAT2WASM := $(WABT_DIR)/bin/wat2wasm +WASM2C := $(WABT_DIR)/bin/wasm2c + +%.wat: %.py $(shell find phasm -name '*.py') venv/.done + venv/bin/python -m phasm $< $@ + +%.wat.html: %.wat + venv/bin/pygmentize -l wat -O full -f html $^ -o $@ + +%.py.html: %.py + venv/bin/pygmentize -l py -O full -f html $^ -o $@ %.wasm: %.wat - wat2wasm $^ -o $@ + $(WAT2WASM) $^ -o $@ -server: - python3.8 -m http.server +%.c: %.wasm + $(WASM2C) $^ -o $@ + +# %.exe: %.c +# cc $^ -o $@ -I $(WABT_DIR)/wasm2c + +examples: venv/.done $(subst .py,.wasm,$(wildcard examples/*.py)) $(subst .py,.wat.html,$(wildcard examples/*.py)) $(subst .py,.py.html,$(wildcard examples/*.py)) + venv/bin/python3 -m http.server --directory examples + +test: venv/.done + venv/bin/pytest tests $(TEST_FLAGS) + +lint: venv/.done + venv/bin/pylint phasm + +typecheck: venv/.done + venv/bin/mypy --strict phasm tests/integration/runners.py + +venv/.done: requirements.txt + python3.8 -m venv venv + venv/bin/python3 -m pip install wheel pip --upgrade + venv/bin/python3 -m pip install -r $^ + touch $@ + +.SECONDARY: # Keep intermediate files + +.PHONY: examples diff --git a/README.md b/README.md new file mode 100644 index 0000000..c9cb056 --- /dev/null +++ b/README.md @@ -0,0 +1,99 @@ +phasm +===== + +Elevator pitch +-------------- +A programming language, that looks like Python, handles like Haskell, +and compiles directly to WebAssembly. + +Project state +------------- +This is a hobby project for now. Use at your own risk. + +How to run +---------- +You should only need make and python3. Currently, we're working with python3.8, +since we're using the python ast parser, it might not work on other versions. + +To run the examples: +```sh +make examples +``` + +To run the tests: +```sh +make test +``` + +To run the linting and type checking: +```sh +make lint typecheck +``` + +To compile a Phasm file: +```sh +python3.8 -m phasm source.py output.wat +``` + +Additional required tools +------------------------- +At the moment, the compiler outputs WebAssembly text format. To actually +get a binary, you will need the wat2wasm tool[6]. + +Example +------- +For more examples, see the examples directory. +```py +def helper(n: u64, a: u64, b: u64) -> u64: + if n < 1: + return a + b + + return helper(n - 1, a + b, a) + +@exported +def fib(n: u64) -> u64: + if n == 0: + return 0 + + if n == 1: + return 1 + + return helper(n - 1, 0, 1) +``` + +Gotcha's +-------- +- When importing and exporting unsigned values to WebAssembly, they will become + signed, as WebAssembly has no native unsigned type. You may need to cast + or reinterpret them. +- Currently, Phasm files have the .py extension, which helps with syntax + highlighting, that might change in the future. + +Contributing +------------ +At this time, we're mostly looking for use cases for WebAssembly, other than to +compile existing C code and running them in the browser. The goal of WebAssembly +is to enable high-performance applications on web pages[5]. Though most people +seem to use it to have existing code run in the browser. + +If you have a situation where WebAssembly would be useful for it's speed, we're +interested to see what you want to use it for. + +Also, if you are trying out Phasm, and you're running into a limitation, we're +interested in a minimal test case that shows what you want to achieve and how +Phasm currently fails you. + +Name origin +----------- +- p from python +- ha from Haskell +- asm from WebAssembly + +References +---------- +[1] https://www.python.org/ +[2] https://www.haskell.org/ +[3] https://webassembly.org/ +[4] https://www.w3.org/TR/wasm-core-1/ +[5] https://en.wikipedia.org/w/index.php?title=WebAssembly&oldid=1103639883 +[6] https://github.com/WebAssembly/wabt diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..0da4619 --- /dev/null +++ b/TODO.md @@ -0,0 +1,8 @@ +# TODO + +- Implement a trace() builtin for debugging +- Implement a proper type matching / checking system +- Check if we can use DataView in the Javascript examples, e.g. with setUint32 +- Storing u8 in memory still claims 32 bits (since that's what you need in local variables). However, using load8_u / loadu_s we can optimize this. +- Implement a FizzBuzz example +- Also, check the codes for FIXME and TODO diff --git a/compile.py b/compile.py deleted file mode 100644 index 6720a4a..0000000 --- a/compile.py +++ /dev/null @@ -1,133 +0,0 @@ -import _ast -import ast -import sys - -class Import: - def __init__(self, module, name, intname): - self.module = module - self.name = name - self.intname = intname - self.params = None - - def generate(self): - return '(import "{}" "{}" (func ${}{}))'.format( - self.module, - self.name, - self.intname, - ''.join(' (param {})'.format(x) for x in self.params) - ) - -class Statement: - def __init__(self, name, *args): - self.name = name - self.args = args - - def generate(self): - return '{} {}'.format(self.name, ' '.join(self.args)) - -class Function: - def __init__(self, name, exported=True): - self.name = name - self.exported = exported # TODO: Use __all__! - self.statements = [] - - def generate(self): - return '(func {}\n {})'.format( - ('(export "{}")' if self.exported else '${}').format(self.name), - '\n '.join(x.generate() for x in self.statements), - ) - -class Visitor(ast.NodeVisitor): - def __init__(self): - self._stack = [] - self.imports = [] - self.functions = [] - - def visit_ImportFrom(self, node): - for alias in node.names: - self.imports.append(Import( - node.module, - alias.name, - alias.asname, - )) - - def visit_FunctionDef(self, node): - func = Function( - node.name, - ) - - self._stack.append(func) - self.generic_visit(node) - self._stack.pop() - - self.functions.append(func) - - def visit_Expr(self, node): - self.generic_visit(node) - - def visit_Call(self, node): - self.generic_visit(node) - - func = self._stack[-1] - func.statements.append( - Statement('call', '$' + node.func.id) - ) - - def visit_BinOp(self, node): - self.generic_visit(node) - - func = self._stack[-1] - - if 'Add' == node.op.__class__.__name__: - func.statements.append( - Statement('i32.add') - ) - elif 'Mult' == node.op.__class__.__name__: - func.statements.append( - Statement('i32.mul') - ) - else: - err(node.op) - - def visit_Constant(self, node): - if not self._stack: - # Constant outside of any function - imp = self.imports[-1] - prefix = imp.name + '(' - val = node.value.strip() - - if val.startswith(prefix) and val.endswith(')'): - imp.params = val[len(prefix):-1].split(',') - else: - func = self._stack[-1] - if isinstance(node.value, int): - func.statements.append( - Statement('i32.const', str(node.value)) - ) - - self.generic_visit(node) - - def generate(self): - return '(module\n {}\n {})'.format( - '\n '.join(x.generate() for x in self.imports), - '\n '.join(x.generate() for x in self.functions), - ) - -def err(msg: str) -> None: - sys.stderr.write('{}\n'.format(msg)) - -def main(source: str) -> int: - with open(source, 'r') as fil: - code = fil.read() - - res = ast.parse(code, source) - - visitor = Visitor() - visitor.visit(res) - - print(visitor.generate()) - - return 0 - -if __name__ == '__main__': - sys.exit(main(*sys.argv[1:])) diff --git a/examples/.gitignore b/examples/.gitignore new file mode 100644 index 0000000..de726e0 --- /dev/null +++ b/examples/.gitignore @@ -0,0 +1,4 @@ +*.py.html +*.wasm +*.wat +*.wat.html diff --git a/examples/buffer.html b/examples/buffer.html new file mode 100644 index 0000000..a9e46ee --- /dev/null +++ b/examples/buffer.html @@ -0,0 +1,51 @@ + + +
+| Test | +Interpreter | +Setup | +WebAssembly | +Javascript | +
| Lynx * 65536 | +Chromium 104.0.5112.101 | +DevTools closed | +9.35 | +12.56 | +
| Lynx * 65536 | +Chromium 104.0.5112.101 | +DevTools open | +14.71 | +12.72 | +
| Lynx * 65536 | +Chromium 104.0.5112.101 | +Record page load | +9.44 | +12.69 | +
| Lynx * 65536 | +Firefox 103 | +DevTools closed | +9.02 | +5.86 | +
| Lynx * 65536 | +Firefox 103 | +DevTools open | +9.01 | +5.83 | +
| Lynx * 65536 | +Firefox 103 | +Record page load | +72.41 | +5.85 | +
| Lynx * 1048576 | +Chromium 104.0.5112.101 | +DevTools closed | +149.24 | +202.36 | +
| Lynx * 1048576 | +Firefox 103 | +DevTools closed | +145.01 | +91.44 | +
bytes: + """ + Compile: module data, i32 value + """ + return struct.pack(' bytes: + """ + Compile: module data, i64 value + """ + return struct.pack('bytes: + """ + Compile: module data, f32 value + """ + return struct.pack('bytes: + """ + Compile: module data, f64 value + """ + return struct.pack(' bytes: + """ + Compile: module data + """ + unalloc_ptr = stdlib_alloc.UNALLOC_PTR + + allocated_data = b'' + + for block in inp.blocks: + block.address = unalloc_ptr + 4 # 4 bytes for allocator header + + data_list = [] + + for constant in block.data: + if isinstance(constant, ourlang.ConstantUInt8): + data_list.append(module_data_u8(constant.value)) + continue + + if isinstance(constant, ourlang.ConstantUInt32): + data_list.append(module_data_u32(constant.value)) + continue + + if isinstance(constant, ourlang.ConstantUInt64): + data_list.append(module_data_u64(constant.value)) + continue + + if isinstance(constant, ourlang.ConstantInt32): + data_list.append(module_data_i32(constant.value)) + continue + + if isinstance(constant, ourlang.ConstantInt64): + data_list.append(module_data_i64(constant.value)) + continue + + if isinstance(constant, ourlang.ConstantFloat32): + data_list.append(module_data_f32(constant.value)) + continue + + if isinstance(constant, ourlang.ConstantFloat64): + data_list.append(module_data_f64(constant.value)) + continue + + raise NotImplementedError(constant) + + block_data = b''.join(data_list) + + allocated_data += module_data_u32(len(block_data)) + block_data + + unalloc_ptr += 4 + len(block_data) + + return ( + # Store that we've initialized the memory + module_data_u32(stdlib_alloc.IDENTIFIER) + # Store the first reserved i32 + + module_data_u32(0) + # Store the pointer towards the first free block + # In this case, 0 since we haven't freed any blocks yet + + module_data_u32(0) + # Store the pointer towards the first unallocated block + # In this case the end of the stdlib.alloc header at the start + + module_data_u32(unalloc_ptr) + # Store the actual data + + allocated_data + ) + +def module(inp: ourlang.Module) -> wasm.Module: + """ + Compile: module + """ + result = wasm.Module() + + result.memory.data = module_data(inp.data) + + result.imports = [ + import_(x) + for x in inp.functions.values() + if x.imported + ] + + result.functions = [ + stdlib_alloc.__find_free_block__, + stdlib_alloc.__alloc__, + stdlib_types.__alloc_bytes__, + stdlib_types.__subscript_bytes__, + ] + [ + function(x) + for x in inp.functions.values() + if not x.imported + ] + + return result + +def _generate_tuple_constructor(wgn: WasmGenerator, inp: ourlang.TupleConstructor) -> None: + tmp_var = wgn.temp_var_i32('tuple_adr') + + # Allocated the required amounts of bytes in memory + wgn.i32.const(inp.tuple.alloc_size()) + wgn.call(stdlib_alloc.__alloc__) + wgn.local.set(tmp_var) + + # Store each member individually + for member in inp.tuple.members: + mtyp = LOAD_STORE_TYPE_MAP.get(member.type.__class__) + if mtyp is None: + # In the future might extend this by having structs or tuples + # as members of struct or tuples + raise NotImplementedError(expression, inp, member) + + wgn.local.get(tmp_var) + wgn.add_statement('local.get', f'$arg{member.idx}') + wgn.add_statement(f'{mtyp}.store', 'offset=' + str(member.offset)) + + # Return the allocated address + wgn.local.get(tmp_var) + +def _generate_struct_constructor(wgn: WasmGenerator, inp: ourlang.StructConstructor) -> None: + tmp_var = wgn.temp_var_i32('struct_adr') + + # Allocated the required amounts of bytes in memory + wgn.i32.const(inp.struct.alloc_size()) + wgn.call(stdlib_alloc.__alloc__) + wgn.local.set(tmp_var) + + # Store each member individually + for member in inp.struct.members: + mtyp = LOAD_STORE_TYPE_MAP.get(member.type.__class__) + if mtyp is None: + # In the future might extend this by having structs or tuples + # as members of struct or tuples + raise NotImplementedError(expression, inp, member) + + wgn.local.get(tmp_var) + wgn.add_statement('local.get', f'${member.name}') + wgn.add_statement(f'{mtyp}.store', 'offset=' + str(member.offset)) + + # Return the allocated address + wgn.local.get(tmp_var) diff --git a/phasm/exceptions.py b/phasm/exceptions.py new file mode 100644 index 0000000..b459c22 --- /dev/null +++ b/phasm/exceptions.py @@ -0,0 +1,8 @@ +""" +Exceptions for the phasm compiler +""" + +class StaticError(Exception): + """ + An error found during static analysis + """ diff --git a/phasm/ourlang.py b/phasm/ourlang.py new file mode 100644 index 0000000..5f19d2e --- /dev/null +++ b/phasm/ourlang.py @@ -0,0 +1,487 @@ +""" +Contains the syntax tree for ourlang +""" +from typing import Dict, List, Tuple, Optional, Union + +import enum + +from typing_extensions import Final + +WEBASSEMBLY_BUILDIN_FLOAT_OPS: Final = ('abs', 'sqrt', 'ceil', 'floor', 'trunc', 'nearest', ) +WEBASSEMBLY_BUILDIN_BYTES_OPS: Final = ('len', ) + +from .typing import ( + TypeBase, + TypeNone, + TypeBool, + TypeUInt8, TypeUInt32, TypeUInt64, + TypeInt32, TypeInt64, + TypeFloat32, TypeFloat64, + TypeBytes, + TypeTuple, TypeTupleMember, + TypeStaticArray, TypeStaticArrayMember, + TypeStruct, TypeStructMember, +) + +class Expression: + """ + An expression within a statement + """ + __slots__ = ('type', ) + + type: TypeBase + + def __init__(self, type_: TypeBase) -> None: + self.type = type_ + +class Constant(Expression): + """ + An constant value expression within a statement + """ + __slots__ = () + +class ConstantUInt8(Constant): + """ + An UInt8 constant value expression within a statement + """ + __slots__ = ('value', ) + + value: int + + def __init__(self, type_: TypeUInt8, value: int) -> None: + super().__init__(type_) + self.value = value + +class ConstantUInt32(Constant): + """ + An UInt32 constant value expression within a statement + """ + __slots__ = ('value', ) + + value: int + + def __init__(self, type_: TypeUInt32, value: int) -> None: + super().__init__(type_) + self.value = value + +class ConstantUInt64(Constant): + """ + An UInt64 constant value expression within a statement + """ + __slots__ = ('value', ) + + value: int + + def __init__(self, type_: TypeUInt64, value: int) -> None: + super().__init__(type_) + self.value = value + +class ConstantInt32(Constant): + """ + An Int32 constant value expression within a statement + """ + __slots__ = ('value', ) + + value: int + + def __init__(self, type_: TypeInt32, value: int) -> None: + super().__init__(type_) + self.value = value + +class ConstantInt64(Constant): + """ + An Int64 constant value expression within a statement + """ + __slots__ = ('value', ) + + value: int + + def __init__(self, type_: TypeInt64, value: int) -> None: + super().__init__(type_) + self.value = value + +class ConstantFloat32(Constant): + """ + An Float32 constant value expression within a statement + """ + __slots__ = ('value', ) + + value: float + + def __init__(self, type_: TypeFloat32, value: float) -> None: + super().__init__(type_) + self.value = value + +class ConstantFloat64(Constant): + """ + An Float64 constant value expression within a statement + """ + __slots__ = ('value', ) + + value: float + + def __init__(self, type_: TypeFloat64, value: float) -> None: + super().__init__(type_) + self.value = value + +class ConstantTuple(Constant): + """ + A Tuple constant value expression within a statement + """ + __slots__ = ('value', ) + + value: List[Constant] + + def __init__(self, type_: TypeTuple, value: List[Constant]) -> None: + super().__init__(type_) + self.value = value + +class ConstantStaticArray(Constant): + """ + A StaticArray constant value expression within a statement + """ + __slots__ = ('value', ) + + value: List[Constant] + + def __init__(self, type_: TypeStaticArray, value: List[Constant]) -> None: + super().__init__(type_) + self.value = value + +class VariableReference(Expression): + """ + An variable reference expression within a statement + """ + __slots__ = ('name', ) + + name: str + + def __init__(self, type_: TypeBase, name: str) -> None: + super().__init__(type_) + self.name = name + +class UnaryOp(Expression): + """ + A unary operator expression within a statement + """ + __slots__ = ('operator', 'right', ) + + operator: str + right: Expression + + def __init__(self, type_: TypeBase, operator: str, right: Expression) -> None: + super().__init__(type_) + + self.operator = operator + self.right = right + +class BinaryOp(Expression): + """ + A binary operator expression within a statement + """ + __slots__ = ('operator', 'left', 'right', ) + + operator: str + left: Expression + right: Expression + + def __init__(self, type_: TypeBase, operator: str, left: Expression, right: Expression) -> None: + super().__init__(type_) + + self.operator = operator + self.left = left + self.right = right + +class FunctionCall(Expression): + """ + A function call expression within a statement + """ + __slots__ = ('function', 'arguments', ) + + function: 'Function' + arguments: List[Expression] + + def __init__(self, function: 'Function') -> None: + super().__init__(function.returns) + + self.function = function + self.arguments = [] + +class AccessBytesIndex(Expression): + """ + Access a bytes index for reading + """ + __slots__ = ('varref', 'index', ) + + varref: VariableReference + index: Expression + + def __init__(self, type_: TypeBase, varref: VariableReference, index: Expression) -> None: + super().__init__(type_) + + self.varref = varref + self.index = index + +class AccessStructMember(Expression): + """ + Access a struct member for reading of writing + """ + __slots__ = ('varref', 'member', ) + + varref: VariableReference + member: TypeStructMember + + def __init__(self, varref: VariableReference, member: TypeStructMember) -> None: + super().__init__(member.type) + + self.varref = varref + self.member = member + +class AccessTupleMember(Expression): + """ + Access a tuple member for reading of writing + """ + __slots__ = ('varref', 'member', ) + + varref: VariableReference + member: TypeTupleMember + + def __init__(self, varref: VariableReference, member: TypeTupleMember, ) -> None: + super().__init__(member.type) + + self.varref = varref + self.member = member + +class AccessStaticArrayMember(Expression): + """ + Access a tuple member for reading of writing + """ + __slots__ = ('varref', 'static_array', 'member', ) + + varref: Union['ModuleConstantReference', VariableReference] + static_array: TypeStaticArray + member: Union[Expression, TypeStaticArrayMember] + + def __init__(self, varref: Union['ModuleConstantReference', VariableReference], static_array: TypeStaticArray, member: Union[TypeStaticArrayMember, Expression], ) -> None: + super().__init__(static_array.member_type) + + self.varref = varref + self.static_array = static_array + self.member = member + +class Fold(Expression): + """ + A (left or right) fold + """ + class Direction(enum.Enum): + """ + Which direction to fold in + """ + LEFT = 0 + RIGHT = 1 + + dir: Direction + func: 'Function' + base: Expression + iter: Expression + + def __init__( + self, + type_: TypeBase, + dir_: Direction, + func: 'Function', + base: Expression, + iter_: Expression, + ) -> None: + super().__init__(type_) + + self.dir = dir_ + self.func = func + self.base = base + self.iter = iter_ + +class ModuleConstantReference(Expression): + """ + An reference to a module constant expression within a statement + """ + __slots__ = ('definition', ) + + definition: 'ModuleConstantDef' + + def __init__(self, type_: TypeBase, definition: 'ModuleConstantDef') -> None: + super().__init__(type_) + self.definition = definition + +class Statement: + """ + A statement within a function + """ + __slots__ = () + +class StatementPass(Statement): + """ + A pass statement + """ + __slots__ = () + +class StatementReturn(Statement): + """ + A return statement within a function + """ + __slots__ = ('value', ) + + def __init__(self, value: Expression) -> None: + self.value = value + +class StatementIf(Statement): + """ + An if statement within a function + """ + __slots__ = ('test', 'statements', 'else_statements', ) + + test: Expression + statements: List[Statement] + else_statements: List[Statement] + + def __init__(self, test: Expression) -> None: + self.test = test + self.statements = [] + self.else_statements = [] + +FunctionParam = Tuple[str, TypeBase] + +class Function: + """ + A function processes input and produces output + """ + __slots__ = ('name', 'lineno', 'exported', 'imported', 'statements', 'returns', 'posonlyargs', ) + + name: str + lineno: int + exported: bool + imported: bool + statements: List[Statement] + returns: TypeBase + posonlyargs: List[FunctionParam] + + def __init__(self, name: str, lineno: int) -> None: + self.name = name + self.lineno = lineno + self.exported = False + self.imported = False + self.statements = [] + self.returns = TypeNone() + self.posonlyargs = [] + +class StructConstructor(Function): + """ + The constructor method for a struct + + A function will generated to instantiate a struct. The arguments + will be the defaults + """ + __slots__ = ('struct', ) + + struct: TypeStruct + + def __init__(self, struct: TypeStruct) -> None: + super().__init__(f'@{struct.name}@__init___@', -1) + + self.returns = struct + + for mem in struct.members: + self.posonlyargs.append((mem.name, mem.type, )) + + self.struct = struct + +class TupleConstructor(Function): + """ + The constructor method for a tuple + """ + __slots__ = ('tuple', ) + + tuple: TypeTuple + + def __init__(self, tuple_: TypeTuple) -> None: + name = tuple_.render_internal_name() + + super().__init__(f'@{name}@__init___@', -1) + + self.returns = tuple_ + + for mem in tuple_.members: + self.posonlyargs.append((f'arg{mem.idx}', mem.type, )) + + self.tuple = tuple_ + +class ModuleConstantDef: + """ + A constant definition within a module + """ + __slots__ = ('name', 'lineno', 'type', 'constant', 'data_block', ) + + name: str + lineno: int + type: TypeBase + constant: Constant + data_block: Optional['ModuleDataBlock'] + + def __init__(self, name: str, lineno: int, type_: TypeBase, constant: Constant, data_block: Optional['ModuleDataBlock']) -> None: + self.name = name + self.lineno = lineno + self.type = type_ + self.constant = constant + self.data_block = data_block + +class ModuleDataBlock: + """ + A single allocated block for module data + """ + __slots__ = ('data', 'address', ) + + data: List[Constant] + address: Optional[int] + + def __init__(self, data: List[Constant]) -> None: + self.data = data + self.address = None + +class ModuleData: + """ + The data for when a module is loaded into memory + """ + __slots__ = ('blocks', ) + + blocks: List[ModuleDataBlock] + + def __init__(self) -> None: + self.blocks = [] + +class Module: + """ + A module is a file and consists of functions + """ + __slots__ = ('data', 'types', 'structs', 'constant_defs', 'functions',) + + data: ModuleData + types: Dict[str, TypeBase] + structs: Dict[str, TypeStruct] + constant_defs: Dict[str, ModuleConstantDef] + functions: Dict[str, Function] + + def __init__(self) -> None: + self.types = { + 'None': TypeNone(), + 'u8': TypeUInt8(), + 'u32': TypeUInt32(), + 'u64': TypeUInt64(), + 'i32': TypeInt32(), + 'i64': TypeInt64(), + 'f32': TypeFloat32(), + 'f64': TypeFloat64(), + 'bytes': TypeBytes(), + } + self.data = ModuleData() + self.structs = {} + self.constant_defs = {} + self.functions = {} diff --git a/phasm/parser.py b/phasm/parser.py new file mode 100644 index 0000000..d95bfce --- /dev/null +++ b/phasm/parser.py @@ -0,0 +1,844 @@ +""" +Parses the source code from the plain text into a syntax tree +""" +from typing import Any, Dict, NoReturn, Union + +import ast + +from .typing import ( + TypeBase, + TypeUInt8, + TypeUInt32, + TypeUInt64, + TypeInt32, + TypeInt64, + TypeFloat32, + TypeFloat64, + TypeBytes, + TypeStruct, + TypeStructMember, + TypeTuple, + TypeTupleMember, + TypeStaticArray, + TypeStaticArrayMember, +) + +from . import codestyle +from .exceptions import StaticError +from .ourlang import ( + WEBASSEMBLY_BUILDIN_FLOAT_OPS, + + Module, ModuleDataBlock, + Function, + + Expression, + AccessBytesIndex, AccessStructMember, AccessTupleMember, AccessStaticArrayMember, + BinaryOp, + Constant, + ConstantFloat32, ConstantFloat64, ConstantInt32, ConstantInt64, + ConstantUInt8, ConstantUInt32, ConstantUInt64, + ConstantTuple, ConstantStaticArray, + + FunctionCall, + StructConstructor, TupleConstructor, + UnaryOp, VariableReference, + + Fold, ModuleConstantReference, + + Statement, + StatementIf, StatementPass, StatementReturn, + + ModuleConstantDef, +) + +def phasm_parse(source: str) -> Module: + """ + Public method for parsing Phasm code into a Phasm Module + """ + res = ast.parse(source, '') + + our_visitor = OurVisitor() + return our_visitor.visit_Module(res) + +OurLocals = Dict[str, TypeBase] + +class OurVisitor: + """ + Class to visit a Python syntax tree and create an ourlang syntax tree + + We're (ab)using the Python AST parser to give us a leg up + + At some point, we may deviate from Python syntax. If nothing else, + we probably won't keep up with the Python syntax changes. + """ + + # pylint: disable=C0103,C0116,C0301,R0201,R0912 + + def __init__(self) -> None: + pass + + def visit_Module(self, node: ast.Module) -> Module: + module = Module() + + _not_implemented(not node.type_ignores, 'Module.type_ignores') + + # Second pass for the types + + for stmt in node.body: + res = self.pre_visit_Module_stmt(module, stmt) + + if isinstance(res, ModuleConstantDef): + if res.name in module.constant_defs: + raise StaticError( + f'{res.name} already defined on line {module.constant_defs[res.name].lineno}' + ) + + module.constant_defs[res.name] = res + + if isinstance(res, TypeStruct): + if res.name in module.structs: + raise StaticError( + f'{res.name} already defined on line {module.structs[res.name].lineno}' + ) + + module.structs[res.name] = res + constructor = StructConstructor(res) + module.functions[constructor.name] = constructor + + if isinstance(res, Function): + if res.name in module.functions: + raise StaticError( + f'{res.name} already defined on line {module.functions[res.name].lineno}' + ) + + module.functions[res.name] = res + + # Second pass for the function bodies + + for stmt in node.body: + self.visit_Module_stmt(module, stmt) + + return module + + def pre_visit_Module_stmt(self, module: Module, node: ast.stmt) -> Union[Function, TypeStruct, ModuleConstantDef]: + if isinstance(node, ast.FunctionDef): + return self.pre_visit_Module_FunctionDef(module, node) + + if isinstance(node, ast.ClassDef): + return self.pre_visit_Module_ClassDef(module, node) + + if isinstance(node, ast.AnnAssign): + return self.pre_visit_Module_AnnAssign(module, node) + + raise NotImplementedError(f'{node} on Module') + + def pre_visit_Module_FunctionDef(self, module: Module, node: ast.FunctionDef) -> Function: + function = Function(node.name, node.lineno) + + _not_implemented(not node.args.posonlyargs, 'FunctionDef.args.posonlyargs') + + for arg in node.args.args: + if not arg.annotation: + _raise_static_error(node, 'Type is required') + + function.posonlyargs.append(( + arg.arg, + self.visit_type(module, arg.annotation), + )) + + _not_implemented(not node.args.vararg, 'FunctionDef.args.vararg') + _not_implemented(not node.args.kwonlyargs, 'FunctionDef.args.kwonlyargs') + _not_implemented(not node.args.kw_defaults, 'FunctionDef.args.kw_defaults') + _not_implemented(not node.args.kwarg, 'FunctionDef.args.kwarg') + _not_implemented(not node.args.defaults, 'FunctionDef.args.defaults') + + # Do stmts at the end so we have the return value + + for decorator in node.decorator_list: + if not isinstance(decorator, ast.Name): + _raise_static_error(decorator, 'Function decorators must be string') + if not isinstance(decorator.ctx, ast.Load): + _raise_static_error(decorator, 'Must be load context') + _not_implemented(decorator.id in ('exported', 'imported'), 'Custom decorators') + + if decorator.id == 'exported': + function.exported = True + else: + function.imported = True + + if node.returns: + function.returns = self.visit_type(module, node.returns) + + _not_implemented(not node.type_comment, 'FunctionDef.type_comment') + + return function + + def pre_visit_Module_ClassDef(self, module: Module, node: ast.ClassDef) -> TypeStruct: + struct = TypeStruct(node.name, node.lineno) + + _not_implemented(not node.bases, 'ClassDef.bases') + _not_implemented(not node.keywords, 'ClassDef.keywords') + _not_implemented(not node.decorator_list, 'ClassDef.decorator_list') + + offset = 0 + + for stmt in node.body: + if not isinstance(stmt, ast.AnnAssign): + raise NotImplementedError(f'Class with {stmt} nodes') + + if not isinstance(stmt.target, ast.Name): + raise NotImplementedError('Class with default values') + + if not stmt.value is None: + raise NotImplementedError('Class with default values') + + if stmt.simple != 1: + raise NotImplementedError('Class with non-simple arguments') + + member = TypeStructMember(stmt.target.id, self.visit_type(module, stmt.annotation), offset) + + struct.members.append(member) + offset += member.type.alloc_size() + + return struct + + def pre_visit_Module_AnnAssign(self, module: Module, node: ast.AnnAssign) -> ModuleConstantDef: + if not isinstance(node.target, ast.Name): + _raise_static_error(node, 'Must be name') + if not isinstance(node.target.ctx, ast.Store): + _raise_static_error(node, 'Must be load context') + + exp_type = self.visit_type(module, node.annotation) + + if isinstance(exp_type, TypeInt32): + if not isinstance(node.value, ast.Constant): + _raise_static_error(node, 'Must be constant') + + constant = ModuleConstantDef( + node.target.id, + node.lineno, + exp_type, + self.visit_Module_Constant(module, exp_type, node.value), + None, + ) + return constant + + if isinstance(exp_type, TypeTuple): + if not isinstance(node.value, ast.Tuple): + _raise_static_error(node, 'Must be tuple') + + if len(exp_type.members) != len(node.value.elts): + _raise_static_error(node, 'Invalid number of tuple values') + + tuple_data = [ + self.visit_Module_Constant(module, mem.type, arg_node) + for arg_node, mem in zip(node.value.elts, exp_type.members) + if isinstance(arg_node, ast.Constant) + ] + if len(exp_type.members) != len(tuple_data): + _raise_static_error(node, 'Tuple arguments must be constants') + + # Allocate the data + data_block = ModuleDataBlock(tuple_data) + module.data.blocks.append(data_block) + + # Then return the constant as a pointer + return ModuleConstantDef( + node.target.id, + node.lineno, + exp_type, + ConstantTuple(exp_type, tuple_data), + data_block, + ) + + if isinstance(exp_type, TypeStaticArray): + if not isinstance(node.value, ast.Tuple): + _raise_static_error(node, 'Must be static array') + + if len(exp_type.members) != len(node.value.elts): + _raise_static_error(node, 'Invalid number of static array values') + + static_array_data = [ + self.visit_Module_Constant(module, exp_type.member_type, arg_node) + for arg_node in node.value.elts + if isinstance(arg_node, ast.Constant) + ] + if len(exp_type.members) != len(static_array_data): + _raise_static_error(node, 'Static array arguments must be constants') + + # Allocate the data + data_block = ModuleDataBlock(static_array_data) + module.data.blocks.append(data_block) + + # Then return the constant as a pointer + return ModuleConstantDef( + node.target.id, + node.lineno, + exp_type, + ConstantStaticArray(exp_type, static_array_data), + data_block, + ) + + raise NotImplementedError(f'{node} on Module AnnAssign') + + def visit_Module_stmt(self, module: Module, node: ast.stmt) -> None: + if isinstance(node, ast.FunctionDef): + self.visit_Module_FunctionDef(module, node) + return + + if isinstance(node, ast.ClassDef): + return + + if isinstance(node, ast.AnnAssign): + return + + raise NotImplementedError(f'{node} on Module') + + def visit_Module_FunctionDef(self, module: Module, node: ast.FunctionDef) -> None: + function = module.functions[node.name] + + our_locals = dict(function.posonlyargs) + + for stmt in node.body: + function.statements.append( + self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt) + ) + + def visit_Module_FunctionDef_stmt(self, module: Module, function: Function, our_locals: OurLocals, node: ast.stmt) -> Statement: + if isinstance(node, ast.Return): + if node.value is None: + # TODO: Implement methods without return values + _raise_static_error(node, 'Return must have an argument') + + return StatementReturn( + self.visit_Module_FunctionDef_expr(module, function, our_locals, function.returns, node.value) + ) + + if isinstance(node, ast.If): + result = StatementIf( + self.visit_Module_FunctionDef_expr(module, function, our_locals, function.returns, node.test) + ) + + for stmt in node.body: + result.statements.append( + self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt) + ) + + for stmt in node.orelse: + result.else_statements.append( + self.visit_Module_FunctionDef_stmt(module, function, our_locals, stmt) + ) + + return result + + if isinstance(node, ast.Pass): + return StatementPass() + + raise NotImplementedError(f'{node} as stmt in FunctionDef') + + def visit_Module_FunctionDef_expr(self, module: Module, function: Function, our_locals: OurLocals, exp_type: TypeBase, node: ast.expr) -> Expression: + if isinstance(node, ast.BinOp): + if isinstance(node.op, ast.Add): + operator = '+' + elif isinstance(node.op, ast.Sub): + operator = '-' + elif isinstance(node.op, ast.Mult): + operator = '*' + elif isinstance(node.op, ast.LShift): + operator = '<<' + elif isinstance(node.op, ast.RShift): + operator = '>>' + elif isinstance(node.op, ast.BitOr): + operator = '|' + elif isinstance(node.op, ast.BitXor): + operator = '^' + elif isinstance(node.op, ast.BitAnd): + operator = '&' + else: + raise NotImplementedError(f'Operator {node.op}') + + # Assume the type doesn't change when descending into a binary operator + # e.g. you can do `"hello" * 3` with the code below (yet) + + return BinaryOp( + exp_type, + operator, + self.visit_Module_FunctionDef_expr(module, function, our_locals, exp_type, node.left), + self.visit_Module_FunctionDef_expr(module, function, our_locals, exp_type, node.right), + ) + + if isinstance(node, ast.UnaryOp): + if isinstance(node.op, ast.UAdd): + operator = '+' + elif isinstance(node.op, ast.USub): + operator = '-' + else: + raise NotImplementedError(f'Operator {node.op}') + + return UnaryOp( + exp_type, + operator, + self.visit_Module_FunctionDef_expr(module, function, our_locals, exp_type, node.operand), + ) + + if isinstance(node, ast.Compare): + if 1 < len(node.ops): + raise NotImplementedError('Multiple operators') + + if isinstance(node.ops[0], ast.Gt): + operator = '>' + elif isinstance(node.ops[0], ast.Eq): + operator = '==' + elif isinstance(node.ops[0], ast.Lt): + operator = '<' + else: + raise NotImplementedError(f'Operator {node.ops}') + + # Assume the type doesn't change when descending into a binary operator + # e.g. you can do `"hello" * 3` with the code below (yet) + + return BinaryOp( + exp_type, + operator, + self.visit_Module_FunctionDef_expr(module, function, our_locals, exp_type, node.left), + self.visit_Module_FunctionDef_expr(module, function, our_locals, exp_type, node.comparators[0]), + ) + + if isinstance(node, ast.Call): + return self.visit_Module_FunctionDef_Call(module, function, our_locals, exp_type, node) + + if isinstance(node, ast.Constant): + return self.visit_Module_Constant( + module, exp_type, node, + ) + + if isinstance(node, ast.Attribute): + return self.visit_Module_FunctionDef_Attribute( + module, function, our_locals, exp_type, node, + ) + + if isinstance(node, ast.Subscript): + return self.visit_Module_FunctionDef_Subscript( + module, function, our_locals, exp_type, node, + ) + + if isinstance(node, ast.Name): + if not isinstance(node.ctx, ast.Load): + _raise_static_error(node, 'Must be load context') + + if node.id in our_locals: + act_type = our_locals[node.id] + if exp_type != act_type: + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {node.id} is actually {codestyle.type_(act_type)}') + + return VariableReference(act_type, node.id) + + if node.id in module.constant_defs: + cdef = module.constant_defs[node.id] + if exp_type != cdef.type: + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {node.id} is actually {codestyle.type_(cdef.type)}') + + return ModuleConstantReference(exp_type, cdef) + + _raise_static_error(node, f'Undefined variable {node.id}') + + if isinstance(node, ast.Tuple): + if not isinstance(node.ctx, ast.Load): + _raise_static_error(node, 'Must be load context') + + if isinstance(exp_type, TypeTuple): + if len(exp_type.members) != len(node.elts): + _raise_static_error(node, f'Expression is expecting a tuple of size {len(exp_type.members)}, but {len(node.elts)} are given') + + tuple_constructor = TupleConstructor(exp_type) + + func = module.functions[tuple_constructor.name] + + result = FunctionCall(func) + result.arguments = [ + self.visit_Module_FunctionDef_expr(module, function, our_locals, mem.type, arg_node) + for arg_node, mem in zip(node.elts, exp_type.members) + ] + return result + + _raise_static_error(node, f'Expression is expecting a {codestyle.type_(exp_type)}, not a tuple') + + raise NotImplementedError(f'{node} as expr in FunctionDef') + + def visit_Module_FunctionDef_Call(self, module: Module, function: Function, our_locals: OurLocals, exp_type: TypeBase, node: ast.Call) -> Union[Fold, FunctionCall, UnaryOp]: + if node.keywords: + _raise_static_error(node, 'Keyword calling not supported') # Yet? + + if not isinstance(node.func, ast.Name): + raise NotImplementedError(f'Calling methods that are not a name {node.func}') + if not isinstance(node.func.ctx, ast.Load): + _raise_static_error(node, 'Must be load context') + + if node.func.id in module.structs: + struct = module.structs[node.func.id] + struct_constructor = StructConstructor(struct) + + func = module.functions[struct_constructor.name] + elif node.func.id in WEBASSEMBLY_BUILDIN_FLOAT_OPS: + if not isinstance(exp_type, (TypeFloat32, TypeFloat64, )): + _raise_static_error(node, f'Cannot make {node.func.id} result in {codestyle.type_(exp_type)}') + + if 1 != len(node.args): + _raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given') + + return UnaryOp( + exp_type, + 'sqrt', + self.visit_Module_FunctionDef_expr(module, function, our_locals, exp_type, node.args[0]), + ) + elif node.func.id == 'u32': + if not isinstance(exp_type, TypeUInt32): + _raise_static_error(node, f'Cannot make {node.func.id} result in {exp_type}') + + if 1 != len(node.args): + _raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given') + + # FIXME: This is a stub, proper casting is todo + + return UnaryOp( + exp_type, + 'cast', + self.visit_Module_FunctionDef_expr(module, function, our_locals, module.types['u8'], node.args[0]), + ) + elif node.func.id == 'len': + if not isinstance(exp_type, TypeInt32): + _raise_static_error(node, f'Cannot make {node.func.id} result in {exp_type}') + + if 1 != len(node.args): + _raise_static_error(node, f'Function {node.func.id} requires 1 arguments but {len(node.args)} are given') + + return UnaryOp( + exp_type, + 'len', + self.visit_Module_FunctionDef_expr(module, function, our_locals, module.types['bytes'], node.args[0]), + ) + elif node.func.id == 'foldl': + # TODO: This should a much more generic function! + # For development purposes, we're assuming you're doing a foldl(Callable[[u8, u8], u8], u8, bytes) + # In the future, we should probably infer the type of the second argument, + # and use it as expected types for the other u8s and the Iterable[u8] (i.e. bytes) + + if 3 != len(node.args): + _raise_static_error(node, f'Function {node.func.id} requires 3 arguments but {len(node.args)} are given') + + # TODO: This is not generic + subnode = node.args[0] + if not isinstance(subnode, ast.Name): + raise NotImplementedError(f'Calling methods that are not a name {subnode}') + if not isinstance(subnode.ctx, ast.Load): + _raise_static_error(subnode, 'Must be load context') + if subnode.id not in module.functions: + _raise_static_error(subnode, 'Reference to undefined function') + func = module.functions[subnode.id] + if 2 != len(func.posonlyargs): + _raise_static_error(node, f'Function {node.func.id} requires a function with 2 arguments but a function with {len(func.posonlyargs)} args is given') + + if exp_type.__class__ != func.returns.__class__: + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {func.name} actually returns {codestyle.type_(func.returns)}') + + if func.returns.__class__ != func.posonlyargs[0][1].__class__: + _raise_static_error(node, f'Expected a foldable function, {func.name} returns a {codestyle.type_(func.returns)} but expects a {codestyle.type_(func.posonlyargs[0][1])}') + + if module.types['u8'].__class__ != func.posonlyargs[1][1].__class__: + _raise_static_error(node, 'Only folding over bytes (u8) is supported at this time') + + return Fold( + exp_type, + Fold.Direction.LEFT, + func, + self.visit_Module_FunctionDef_expr(module, function, our_locals, func.returns, node.args[1]), + self.visit_Module_FunctionDef_expr(module, function, our_locals, module.types['bytes'], node.args[2]), + ) + else: + if node.func.id not in module.functions: + _raise_static_error(node, 'Call to undefined function') + + func = module.functions[node.func.id] + + if func.returns != exp_type: + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {func.name} actually returns {codestyle.type_(func.returns)}') + + if len(func.posonlyargs) != len(node.args): + _raise_static_error(node, f'Function {node.func.id} requires {len(func.posonlyargs)} arguments but {len(node.args)} are given') + + result = FunctionCall(func) + result.arguments.extend( + self.visit_Module_FunctionDef_expr(module, function, our_locals, arg_type, arg_expr) + for arg_expr, (_, arg_type) in zip(node.args, func.posonlyargs) + ) + return result + + def visit_Module_FunctionDef_Attribute(self, module: Module, function: Function, our_locals: OurLocals, exp_type: TypeBase, node: ast.Attribute) -> Expression: + del module + del function + + if not isinstance(node.value, ast.Name): + _raise_static_error(node, 'Must reference a name') + + if not isinstance(node.ctx, ast.Load): + _raise_static_error(node, 'Must be load context') + + if not node.value.id in our_locals: + _raise_static_error(node, f'Undefined variable {node.value.id}') + + node_typ = our_locals[node.value.id] + if not isinstance(node_typ, TypeStruct): + _raise_static_error(node, f'Cannot take attribute of non-struct {node.value.id}') + + member = node_typ.get_member(node.attr) + if member is None: + _raise_static_error(node, f'{node_typ.name} has no attribute {node.attr}') + + if exp_type != member.type: + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {node.value.id}.{member.name} is actually {codestyle.type_(member.type)}') + + return AccessStructMember( + VariableReference(node_typ, node.value.id), + member, + ) + + def visit_Module_FunctionDef_Subscript(self, module: Module, function: Function, our_locals: OurLocals, exp_type: TypeBase, node: ast.Subscript) -> Expression: + if not isinstance(node.value, ast.Name): + _raise_static_error(node, 'Must reference a name') + + if not isinstance(node.slice, ast.Index): + _raise_static_error(node, 'Must subscript using an index') + + if not isinstance(node.ctx, ast.Load): + _raise_static_error(node, 'Must be load context') + + varref: Union[ModuleConstantReference, VariableReference] + if node.value.id in our_locals: + node_typ = our_locals[node.value.id] + varref = VariableReference(node_typ, node.value.id) + elif node.value.id in module.constant_defs: + constant_def = module.constant_defs[node.value.id] + node_typ = constant_def.type + varref = ModuleConstantReference(node_typ, constant_def) + else: + _raise_static_error(node, f'Undefined variable {node.value.id}') + + slice_expr = self.visit_Module_FunctionDef_expr( + module, function, our_locals, module.types['u32'], node.slice.value, + ) + + if isinstance(node_typ, TypeBytes): + t_u8 = module.types['u8'] + if exp_type != t_u8: + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {node.value.id}[{codestyle.expression(slice_expr)}] is actually {codestyle.type_(t_u8)}') + + if isinstance(varref, ModuleConstantReference): + raise NotImplementedError(f'{node} from module constant') + + return AccessBytesIndex( + t_u8, + varref, + slice_expr, + ) + + if isinstance(node_typ, TypeTuple): + if not isinstance(slice_expr, ConstantUInt32): + _raise_static_error(node, 'Must subscript using a constant index') + + idx = slice_expr.value + + if len(node_typ.members) <= idx: + _raise_static_error(node, f'Index {idx} out of bounds for tuple {node.value.id}') + + tuple_member = node_typ.members[idx] + if exp_type != tuple_member.type: + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {node.value.id}[{idx}] is actually {codestyle.type_(tuple_member.type)}') + + if isinstance(varref, ModuleConstantReference): + raise NotImplementedError(f'{node} from module constant') + + return AccessTupleMember( + varref, + tuple_member, + ) + + if isinstance(node_typ, TypeStaticArray): + if exp_type != node_typ.member_type: + _raise_static_error(node, f'Expected {codestyle.type_(exp_type)}, {node.value.id}[{idx}] is actually {codestyle.type_(node_typ.member_type)}') + + if not isinstance(slice_expr, ConstantInt32): + return AccessStaticArrayMember( + varref, + node_typ, + slice_expr, + ) + + idx = slice_expr.value + + if len(node_typ.members) <= idx: + _raise_static_error(node, f'Index {idx} out of bounds for static array {node.value.id}') + + static_array_member = node_typ.members[idx] + + return AccessStaticArrayMember( + varref, + node_typ, + static_array_member, + ) + + _raise_static_error(node, f'Cannot take index of {node_typ} {node.value.id}') + + def visit_Module_Constant(self, module: Module, exp_type: TypeBase, node: ast.Constant) -> Constant: + del module + + _not_implemented(node.kind is None, 'Constant.kind') + + if isinstance(exp_type, TypeUInt8): + if not isinstance(node.value, int): + _raise_static_error(node, 'Expected integer value') + + if node.value < 0 or node.value > 255: + _raise_static_error(node, f'Integer value out of range; expected 0..255, actual {node.value}') + + return ConstantUInt8(exp_type, node.value) + + if isinstance(exp_type, TypeUInt32): + if not isinstance(node.value, int): + _raise_static_error(node, 'Expected integer value') + + if node.value < 0 or node.value > 4294967295: + _raise_static_error(node, 'Integer value out of range') + + return ConstantUInt32(exp_type, node.value) + + if isinstance(exp_type, TypeUInt64): + if not isinstance(node.value, int): + _raise_static_error(node, 'Expected integer value') + + if node.value < 0 or node.value > 18446744073709551615: + _raise_static_error(node, 'Integer value out of range') + + return ConstantUInt64(exp_type, node.value) + + if isinstance(exp_type, TypeInt32): + if not isinstance(node.value, int): + _raise_static_error(node, 'Expected integer value') + + if node.value < -2147483648 or node.value > 2147483647: + _raise_static_error(node, 'Integer value out of range') + + return ConstantInt32(exp_type, node.value) + + if isinstance(exp_type, TypeInt64): + if not isinstance(node.value, int): + _raise_static_error(node, 'Expected integer value') + + if node.value < -9223372036854775808 or node.value > 9223372036854775807: + _raise_static_error(node, 'Integer value out of range') + + return ConstantInt64(exp_type, node.value) + + if isinstance(exp_type, TypeFloat32): + if not isinstance(node.value, (float, int, )): + _raise_static_error(node, 'Expected float value') + + # FIXME: Range check + + return ConstantFloat32(exp_type, node.value) + + if isinstance(exp_type, TypeFloat64): + if not isinstance(node.value, (float, int, )): + _raise_static_error(node, 'Expected float value') + + # FIXME: Range check + + return ConstantFloat64(exp_type, node.value) + + raise NotImplementedError(f'{node} as const for type {exp_type}') + + def visit_type(self, module: Module, node: ast.expr) -> TypeBase: + if isinstance(node, ast.Constant): + if node.value is None: + return module.types['None'] + + _raise_static_error(node, f'Unrecognized type {node.value}') + + if isinstance(node, ast.Name): + if not isinstance(node.ctx, ast.Load): + _raise_static_error(node, 'Must be load context') + + if node.id in module.types: + return module.types[node.id] + + if node.id in module.structs: + return module.structs[node.id] + + _raise_static_error(node, f'Unrecognized type {node.id}') + + if isinstance(node, ast.Subscript): + if not isinstance(node.value, ast.Name): + _raise_static_error(node, 'Must be name') + if not isinstance(node.slice, ast.Index): + _raise_static_error(node, 'Must subscript using an index') + if not isinstance(node.slice.value, ast.Constant): + _raise_static_error(node, 'Must subscript using a constant index') + if not isinstance(node.slice.value.value, int): + _raise_static_error(node, 'Must subscript using a constant integer index') + if not isinstance(node.ctx, ast.Load): + _raise_static_error(node, 'Must be load context') + + if node.value.id in module.types: + member_type = module.types[node.value.id] + else: + _raise_static_error(node, f'Unrecognized type {node.value.id}') + + type_static_array = TypeStaticArray(member_type) + + offset = 0 + + for idx in range(node.slice.value.value): + static_array_member = TypeStaticArrayMember(idx, offset) + + type_static_array.members.append(static_array_member) + offset += member_type.alloc_size() + + key = f'{node.value.id}[{node.slice.value.value}]' + + if key not in module.types: + module.types[key] = type_static_array + + return module.types[key] + + if isinstance(node, ast.Tuple): + if not isinstance(node.ctx, ast.Load): + _raise_static_error(node, 'Must be load context') + + type_tuple = TypeTuple() + + offset = 0 + + for idx, elt in enumerate(node.elts): + tuple_member = TypeTupleMember(idx, self.visit_type(module, elt), offset) + + type_tuple.members.append(tuple_member) + offset += tuple_member.type.alloc_size() + + key = type_tuple.render_internal_name() + + if key not in module.types: + module.types[key] = type_tuple + constructor = TupleConstructor(type_tuple) + module.functions[constructor.name] = constructor + + return module.types[key] + + raise NotImplementedError(f'{node} as type') + +def _not_implemented(check: Any, msg: str) -> None: + if not check: + raise NotImplementedError(msg) + +def _raise_static_error(node: Union[ast.mod, ast.stmt, ast.expr], msg: str) -> NoReturn: + raise StaticError( + f'Static error on line {node.lineno}: {msg}' + ) diff --git a/phasm/stdlib/__init__.py b/phasm/stdlib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/phasm/stdlib/alloc.py b/phasm/stdlib/alloc.py new file mode 100644 index 0000000..2761bfb --- /dev/null +++ b/phasm/stdlib/alloc.py @@ -0,0 +1,86 @@ +""" +stdlib: Memory allocation +""" +from phasm.wasmgenerator import Generator, VarType_i32 as i32, func_wrapper + +IDENTIFIER = 0xA1C0 + +ADR_IDENTIFIER = 0 +ADR_RESERVED0 = ADR_IDENTIFIER + 4 +ADR_FREE_BLOCK_PTR = ADR_RESERVED0 + 4 +ADR_UNALLOC_PTR = ADR_FREE_BLOCK_PTR + 4 + +UNALLOC_PTR = ADR_UNALLOC_PTR + 4 + +# For memory initialization see phasm.compiler.module_data + +@func_wrapper(exported=False) +def __find_free_block__(g: Generator, alloc_size: i32) -> i32: + # Find out if we've freed any blocks at all so far + g.i32.const(ADR_FREE_BLOCK_PTR) + g.i32.load() + g.i32.const(0) + g.i32.eq() + + with g.if_(): + g.i32.const(0) + g.return_() + + del alloc_size # TODO + g.unreachable() + + return i32('return') # To satisfy mypy + +@func_wrapper() +def __alloc__(g: Generator, alloc_size: i32) -> i32: + result = i32('result') + + # Check if the memory is already initialized + g.i32.const(ADR_IDENTIFIER) + g.i32.load() + g.i32.const(IDENTIFIER) + g.i32.ne() + with g.if_(): + # Not yet initialized, or memory corruption + g.unreachable() + + # Try to claim a free block + g.local.get(alloc_size) + g.call(__find_free_block__) + g.local.set(result) + + # Check if there was a free block + g.local.get(result) + g.i32.const(0) + g.i32.eq() + with g.if_(): + # No free blocks, increase allocated memory usage + + # Put the address on the stack in advance so we can store to it later + g.i32.const(ADR_UNALLOC_PTR) + + # Get the current unalloc pointer value + g.i32.const(ADR_UNALLOC_PTR) + g.i32.load() + g.local.tee(result) + + # Calculate new unalloc pointer value + g.i32.const(4) # Header size + g.i32.add() + g.local.get(alloc_size) + g.i32.add() + + # Store new unalloc pointer value (address was set on stack in advance) + g.i32.store() + + # Store block size in the header + g.local.get(result) + g.local.get(alloc_size) + g.i32.store() + + # Return address of the allocated memory, skipping the allocator header + g.local.get(result) + g.i32.const(4) # Header size + g.i32.add() + + return i32('return') # To satisfy mypy diff --git a/phasm/stdlib/types.py b/phasm/stdlib/types.py new file mode 100644 index 0000000..b902642 --- /dev/null +++ b/phasm/stdlib/types.py @@ -0,0 +1,66 @@ +""" +stdlib: Standard types that are not wasm primitives +""" +from phasm.wasmgenerator import Generator, VarType_i32 as i32, func_wrapper + +from phasm.stdlib import alloc + +@func_wrapper() +def __alloc_bytes__(g: Generator, length: i32) -> i32: + """ + Allocates room for a bytes instance, but does not write + anything to the allocated memory + """ + result = i32('result') + + # Allocate the length of the byte string, as well + # as 4 bytes for a length header + g.local.get(length) + g.i32.const(4) + g.i32.add() + g.call(alloc.__alloc__) + + # Store the address in a variable so we can use it up + # for writing the length header + g.local.tee(result) + g.local.get(length) + g.i32.store() + + # Get the address back from the variable as return + g.local.get(result) + + return i32('return') # To satisfy mypy + +@func_wrapper() +def __subscript_bytes__(g: Generator, adr: i32, ofs: i32) -> i32: + """ + Returns an index from a bytes value + + If ofs is more than the length of the bytes, this + function returns 0, following the 'no undefined behaviour' + philosophy. + + adr i32 The pointer for the allocated bytes + ofs i32 The offset within the allocated bytes + """ + g.local.get(ofs) + g.local.get(adr) + g.i32.load() + g.i32.lt_u() + + with g.if_(): + # The offset is less than the length + + g.local.get(adr) + g.i32.const(4) # Bytes header + g.i32.add() + g.local.get(ofs) + g.i32.add() + g.i32.load8_u() + g.return_() + + # The offset is outside the allocated bytes + g.i32.const(0) + g.return_() + + return i32('return') # To satisfy mypy diff --git a/phasm/typing.py b/phasm/typing.py new file mode 100644 index 0000000..e56f7a9 --- /dev/null +++ b/phasm/typing.py @@ -0,0 +1,202 @@ +""" +The phasm type system +""" +from typing import Optional, List + +class TypeBase: + """ + TypeBase base class + """ + __slots__ = () + + def alloc_size(self) -> int: + """ + When allocating this type in memory, how many bytes do we need to reserve? + """ + raise NotImplementedError(self, 'alloc_size') + +class TypeNone(TypeBase): + """ + The None (or Void) type + """ + __slots__ = () + +class TypeBool(TypeBase): + """ + The boolean type + """ + __slots__ = () + +class TypeUInt8(TypeBase): + """ + The Integer type, unsigned and 8 bits wide + + Note that under the hood we need to use i32 to represent + these values in expressions. So we need to add some operations + to make sure the math checks out. + + So while this does save bytes in memory, it may not actually + speed up or improve your code. + """ + __slots__ = () + + def alloc_size(self) -> int: + return 4 # Int32 under the hood + +class TypeUInt32(TypeBase): + """ + The Integer type, unsigned and 32 bits wide + """ + __slots__ = () + + def alloc_size(self) -> int: + return 4 + +class TypeUInt64(TypeBase): + """ + The Integer type, unsigned and 64 bits wide + """ + __slots__ = () + + def alloc_size(self) -> int: + return 8 + +class TypeInt32(TypeBase): + """ + The Integer type, signed and 32 bits wide + """ + __slots__ = () + + def alloc_size(self) -> int: + return 4 + +class TypeInt64(TypeBase): + """ + The Integer type, signed and 64 bits wide + """ + __slots__ = () + + def alloc_size(self) -> int: + return 8 + +class TypeFloat32(TypeBase): + """ + The Float type, 32 bits wide + """ + __slots__ = () + + def alloc_size(self) -> int: + return 4 + +class TypeFloat64(TypeBase): + """ + The Float type, 64 bits wide + """ + __slots__ = () + + def alloc_size(self) -> int: + return 8 + +class TypeBytes(TypeBase): + """ + The bytes type + """ + __slots__ = () + +class TypeTupleMember: + """ + Represents a tuple member + """ + def __init__(self, idx: int, type_: TypeBase, offset: int) -> None: + self.idx = idx + self.type = type_ + self.offset = offset + +class TypeTuple(TypeBase): + """ + The tuple type + """ + __slots__ = ('members', ) + + members: List[TypeTupleMember] + + def __init__(self) -> None: + self.members = [] + + def render_internal_name(self) -> str: + """ + Generates an internal name for this tuple + """ + mems = '@'.join('?' for x in self.members) # FIXME: Should not be a questionmark + assert ' ' not in mems, 'Not implement yet: subtuples' + return f'tuple@{mems}' + + def alloc_size(self) -> int: + return sum( + x.type.alloc_size() + for x in self.members + ) + +class TypeStaticArrayMember: + """ + Represents a static array member + """ + def __init__(self, idx: int, offset: int) -> None: + self.idx = idx + self.offset = offset + +class TypeStaticArray(TypeBase): + """ + The static array type + """ + __slots__ = ('member_type', 'members', ) + + member_type: TypeBase + members: List[TypeStaticArrayMember] + + def __init__(self, member_type: TypeBase) -> None: + self.member_type = member_type + self.members = [] + + def alloc_size(self) -> int: + return self.member_type.alloc_size() * len(self.members) + +class TypeStructMember: + """ + Represents a struct member + """ + def __init__(self, name: str, type_: TypeBase, offset: int) -> None: + self.name = name + self.type = type_ + self.offset = offset + +class TypeStruct(TypeBase): + """ + A struct has named properties + """ + __slots__ = ('name', 'lineno', 'members', ) + + name: str + lineno: int + members: List[TypeStructMember] + + def __init__(self, name: str, lineno: int) -> None: + self.name = name + self.lineno = lineno + self.members = [] + + def get_member(self, name: str) -> Optional[TypeStructMember]: + """ + Returns a member by name + """ + for mem in self.members: + if mem.name == name: + return mem + + return None + + def alloc_size(self) -> int: + return sum( + x.type.alloc_size() + for x in self.members + ) diff --git a/phasm/wasm.py b/phasm/wasm.py new file mode 100644 index 0000000..7c5a982 --- /dev/null +++ b/phasm/wasm.py @@ -0,0 +1,199 @@ +""" +Python classes for storing the representation of Web Assembly code, +and being able to conver it to Web Assembly Text Format +""" + +from typing import Iterable, List, Optional, Tuple + +class WatSerializable: + """ + Mixin for clases that can be serialized as WebAssembly Text + """ + def to_wat(self) -> str: + """ + Renders this object as WebAssembly Text + """ + raise NotImplementedError(self, 'to_wat') + +class WasmType(WatSerializable): + """ + Type base class + """ + +class WasmTypeNone(WasmType): + """ + Type when there is no type + """ + def to_wat(self) -> str: + raise Exception('None type is only a placeholder') + +class WasmTypeInt32(WasmType): + """ + i32 value + + Signed or not depends on the operations, not the type + """ + def to_wat(self) -> str: + return 'i32' + +class WasmTypeInt64(WasmType): + """ + i64 value + + Signed or not depends on the operations, not the type + """ + def to_wat(self) -> str: + return 'i64' + +class WasmTypeFloat32(WasmType): + """ + f32 value + """ + def to_wat(self) -> str: + return 'f32' + +class WasmTypeFloat64(WasmType): + """ + f64 value + """ + def to_wat(self) -> str: + return 'f64' + +class WasmTypeVector(WasmType): + """ + A vector is a 128-bit value + """ + def to_wat(self) -> str: + return 'v128' + +class WasmTypeVectorInt32x4(WasmTypeVector): + """ + 4 Int32 values in a single vector + """ + +Param = Tuple[str, WasmType] + +class Import(WatSerializable): + """ + Represents a Web Assembly import + """ + def __init__( + self, + module: str, + name: str, + intname: str, + params: Iterable[Param], + result: WasmType, + ) -> None: + self.module = module + self.name = name + self.intname = intname + self.params = [*params] + self.result = result + + def to_wat(self) -> str: + return '(import "{}" "{}" (func ${}{}{}))'.format( + self.module, + self.name, + self.intname, + ''.join( + f' (param {typ.to_wat()})' + for _, typ in self.params + ), + '' if isinstance(self.result, WasmTypeNone) + else f' (result {self.result.to_wat()})' + ) + +class Statement(WatSerializable): + """ + Represents a Web Assembly statement + """ + def __init__(self, name: str, *args: str, comment: Optional[str] = None): + self.name = name + self.args = args + self.comment = comment + + def to_wat(self) -> str: + args = ' '.join(self.args) + comment = f' ;; {self.comment}' if self.comment else '' + + return f'{self.name} {args}{comment}' + +class Function(WatSerializable): + """ + Represents a Web Assembly function + """ + def __init__( + self, + name: str, + exported_name: Optional[str], + params: Iterable[Param], + locals_: Iterable[Param], + result: WasmType, + statements: Iterable[Statement], + ) -> None: + self.name = name + self.exported_name = exported_name + self.params = [*params] + self.locals = [*locals_] + self.result = result + self.statements = [*statements] + + def to_wat(self) -> str: + header = f'${self.name}' # Name for internal use + + if self.exported_name is not None: + # Name for external use + header += f' (export "{self.exported_name}")' + + for nam, typ in self.params: + header += f' (param ${nam} {typ.to_wat()})' + + if not isinstance(self.result, WasmTypeNone): + header += f' (result {self.result.to_wat()})' + + for nam, typ in self.locals: + header += f' (local ${nam} {typ.to_wat()})' + + return '(func {}\n {}\n )'.format( + header, + '\n '.join(x.to_wat() for x in self.statements), + ) + +class ModuleMemory(WatSerializable): + """ + Represents a WebAssembly module's memory + """ + def __init__(self, data: bytes = b'') -> None: + self.data = data + + def to_wat(self) -> str: + data = ''.join( + f'\\{x:02x}' + for x in self.data + ) + + return ( + '(memory 1)\n ' + f'(data (memory 0) (i32.const 0) "{data}")\n ' + '(export "memory" (memory 0))\n' + ) + +class Module(WatSerializable): + """ + Represents a Web Assembly module + """ + def __init__(self) -> None: + self.imports: List[Import] = [] + self.functions: List[Function] = [] + self.memory = ModuleMemory() + + def to_wat(self) -> str: + """ + Generates the text version + """ + return '(module\n {}\n {}\n {})\n'.format( + '\n '.join(x.to_wat() for x in self.imports), + self.memory.to_wat(), + '\n '.join(x.to_wat() for x in self.functions), + ) diff --git a/phasm/wasmeasy.py b/phasm/wasmeasy.py new file mode 100644 index 0000000..d0cf358 --- /dev/null +++ b/phasm/wasmeasy.py @@ -0,0 +1,69 @@ +""" +Helper functions to quickly generate WASM code +""" +from typing import Any, Dict, List, Optional, Type + +import functools + +from . import wasm + +#pylint: disable=C0103,C0115,C0116,R0201,R0902 + +class Prefix_inn_fnn: + def __init__(self, prefix: str) -> None: + self.prefix = prefix + + # 6.5.5. Memory Instructions + self.load = functools.partial(wasm.Statement, f'{self.prefix}.load') + self.store = functools.partial(wasm.Statement, f'{self.prefix}.store') + + # 6.5.6. Numeric Instructions + self.clz = functools.partial(wasm.Statement, f'{self.prefix}.clz') + self.ctz = functools.partial(wasm.Statement, f'{self.prefix}.ctz') + self.popcnt = functools.partial(wasm.Statement, f'{self.prefix}.popcnt') + self.add = functools.partial(wasm.Statement, f'{self.prefix}.add') + self.sub = functools.partial(wasm.Statement, f'{self.prefix}.sub') + self.mul = functools.partial(wasm.Statement, f'{self.prefix}.mul') + self.div_s = functools.partial(wasm.Statement, f'{self.prefix}.div_s') + self.div_u = functools.partial(wasm.Statement, f'{self.prefix}.div_u') + self.rem_s = functools.partial(wasm.Statement, f'{self.prefix}.rem_s') + self.rem_u = functools.partial(wasm.Statement, f'{self.prefix}.rem_u') + self.and_ = functools.partial(wasm.Statement, f'{self.prefix}.and') + self.or_ = functools.partial(wasm.Statement, f'{self.prefix}.or') + self.xor = functools.partial(wasm.Statement, f'{self.prefix}.xor') + self.shl = functools.partial(wasm.Statement, f'{self.prefix}.shl') + self.shr_s = functools.partial(wasm.Statement, f'{self.prefix}.shr_s') + self.shr_u = functools.partial(wasm.Statement, f'{self.prefix}.shr_u') + self.rotl = functools.partial(wasm.Statement, f'{self.prefix}.rotl') + self.rotr = functools.partial(wasm.Statement, f'{self.prefix}.rotr') + + self.eqz = functools.partial(wasm.Statement, f'{self.prefix}.eqz') + self.eq = functools.partial(wasm.Statement, f'{self.prefix}.eq') + self.ne = functools.partial(wasm.Statement, f'{self.prefix}.ne') + self.lt_s = functools.partial(wasm.Statement, f'{self.prefix}.lt_s') + self.lt_u = functools.partial(wasm.Statement, f'{self.prefix}.lt_u') + self.gt_s = functools.partial(wasm.Statement, f'{self.prefix}.gt_s') + self.gt_u = functools.partial(wasm.Statement, f'{self.prefix}.gt_u') + self.le_s = functools.partial(wasm.Statement, f'{self.prefix}.le_s') + self.le_u = functools.partial(wasm.Statement, f'{self.prefix}.le_u') + self.ge_s = functools.partial(wasm.Statement, f'{self.prefix}.ge_s') + self.ge_u = functools.partial(wasm.Statement, f'{self.prefix}.ge_u') + + def const(self, value: int, comment: Optional[str] = None) -> wasm.Statement: + return wasm.Statement(f'{self.prefix}.const', f'0x{value:08x}', comment=comment) + +i32 = Prefix_inn_fnn('i32') +i64 = Prefix_inn_fnn('i64') + +class Block: + def __init__(self, start: str) -> None: + self.start = start + + def __call__(self, *statements: wasm.Statement) -> List[wasm.Statement]: + return [ + wasm.Statement('if'), + *statements, + wasm.Statement('end'), + ] + +if_ = Block('if') diff --git a/phasm/wasmgenerator.py b/phasm/wasmgenerator.py new file mode 100644 index 0000000..48cca1a --- /dev/null +++ b/phasm/wasmgenerator.py @@ -0,0 +1,220 @@ +""" +Helper functions to generate WASM code by writing Python functions +""" +from typing import Any, Callable, Dict, List, Optional, Type + +import functools + +from . import wasm + +# pylint: disable=C0103,C0115,C0116,R0902 + +class VarType_Base: + wasm_type: Type[wasm.WasmType] + + def __init__(self, name: str) -> None: + self.name = name + self.name_ref = f'${name}' + +class VarType_u8(VarType_Base): + wasm_type = wasm.WasmTypeInt32 + +class VarType_i32(VarType_Base): + wasm_type = wasm.WasmTypeInt32 + +class Generator_i32i64: + def __init__(self, prefix: str, generator: 'Generator') -> None: + self.prefix = prefix + self.generator = generator + + # 2.4.1. Numeric Instructions + # ibinop + self.add = functools.partial(self.generator.add_statement, f'{prefix}.add') + self.sub = functools.partial(self.generator.add_statement, f'{prefix}.sub') + self.mul = functools.partial(self.generator.add_statement, f'{prefix}.mul') + + # irelop + self.eq = functools.partial(self.generator.add_statement, f'{prefix}.eq') + self.ne = functools.partial(self.generator.add_statement, f'{prefix}.ne') + self.lt_u = functools.partial(self.generator.add_statement, f'{prefix}.lt_u') + self.ge_u = functools.partial(self.generator.add_statement, f'{prefix}.ge_u') + + # 2.4.4. Memory Instructions + self.load = functools.partial(self.generator.add_statement, f'{prefix}.load') + self.load8_u = functools.partial(self.generator.add_statement, f'{prefix}.load8_u') + self.store = functools.partial(self.generator.add_statement, f'{prefix}.store') + + def const(self, value: int, comment: Optional[str] = None) -> None: + self.generator.add_statement(f'{self.prefix}.const', f'0x{value:08x}', comment=comment) + +class Generator_i32(Generator_i32i64): + def __init__(self, generator: 'Generator') -> None: + super().__init__('i32', generator) + +class Generator_i64(Generator_i32i64): + def __init__(self, generator: 'Generator') -> None: + super().__init__('i64', generator) + +class Generator_f32f64: + def __init__(self, prefix: str, generator: 'Generator') -> None: + self.prefix = prefix + self.generator = generator + + # 2.4.1. Numeric Instructions + # fbinop + self.add = functools.partial(self.generator.add_statement, f'{prefix}.add') + + # frelop + self.eq = functools.partial(self.generator.add_statement, f'{prefix}.eq') + self.ne = functools.partial(self.generator.add_statement, f'{prefix}.ne') + + # 2.4.4. Memory Instructions + self.load = functools.partial(self.generator.add_statement, f'{prefix}.load') + self.store = functools.partial(self.generator.add_statement, f'{prefix}.store') + + def const(self, value: float, comment: Optional[str] = None) -> None: + # FIXME: Is this sufficient to guarantee the float comes across properly? + self.generator.add_statement(f'{self.prefix}.const', f'{value}', comment=comment) + +class Generator_f32(Generator_f32f64): + def __init__(self, generator: 'Generator') -> None: + super().__init__('f32', generator) + +class Generator_f64(Generator_f32f64): + def __init__(self, generator: 'Generator') -> None: + super().__init__('f64', generator) + +class Generator_Local: + def __init__(self, generator: 'Generator') -> None: + self.generator = generator + + # 2.4.3. Variable Instructions + def get(self, variable: VarType_Base, comment: Optional[str] = None) -> None: + self.generator.add_statement('local.get', variable.name_ref, comment=comment) + + def set(self, variable: VarType_Base, comment: Optional[str] = None) -> None: + self.generator.locals.setdefault(variable.name, variable) + + self.generator.add_statement('local.set', variable.name_ref, comment=comment) + + def tee(self, variable: VarType_Base, comment: Optional[str] = None) -> None: + self.generator.locals.setdefault(variable.name, variable) + + self.generator.add_statement('local.tee', variable.name_ref, comment=comment) + +class GeneratorBlock: + def __init__(self, generator: 'Generator', name: str) -> None: + self.generator = generator + self.name = name + + def __enter__(self) -> None: + self.generator.add_statement(self.name) + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + if not exc_type: + self.generator.add_statement('end') + +class Generator: + def __init__(self) -> None: + self.statements: List[wasm.Statement] = [] + self.locals: Dict[str, VarType_Base] = {} + + self.i32 = Generator_i32(self) + self.i64 = Generator_i64(self) + self.f32 = Generator_f32(self) + self.f64 = Generator_f64(self) + + # 2.4.3 Variable Instructions + self.local = Generator_Local(self) + + # 2.4.5 Control Instructions + self.nop = functools.partial(self.add_statement, 'nop') + self.unreachable = functools.partial(self.add_statement, 'unreachable') + # block + self.loop = functools.partial(GeneratorBlock, self, 'loop') + self.if_ = functools.partial(GeneratorBlock, self, 'if') + # br + # br_if - see below + # br_table + self.return_ = functools.partial(self.add_statement, 'return') + # call - see below + # call_indirect + + def br_if(self, idx: int) -> None: + self.add_statement('br_if', f'{idx}') + + def call(self, function: wasm.Function) -> None: + self.add_statement('call', f'${function.name}') + + def add_statement(self, name: str, *args: str, comment: Optional[str] = None) -> None: + self.statements.append(wasm.Statement(name, *args, comment=comment)) + + def temp_var_i32(self, infix: str) -> VarType_i32: + idx = 0 + while (varname := f'__{infix}_tmp_var_{idx}__') in self.locals: + idx += 1 + + return VarType_i32(varname) + + def temp_var_u8(self, infix: str) -> VarType_u8: + idx = 0 + while (varname := f'__{infix}_tmp_var_{idx}__') in self.locals: + idx += 1 + + return VarType_u8(varname) + +def func_wrapper(exported: bool = True) -> Callable[[Any], wasm.Function]: + """ + This wrapper will execute the function and return + a wasm Function with the generated Statements + """ + def inner(func: Any) -> wasm.Function: + func_name_parts = func.__module__.split('.') + [func.__name__] + if 'phasm' == func_name_parts[0]: + func_name_parts.pop(0) + func_name = '.'.join(func_name_parts) + + annot = dict(func.__annotations__) + + # Check if we can pass the generator + assert Generator is annot.pop('g') + + # Convert return type to WasmType + annot_return = annot.pop('return') + if annot_return is None: + return_type = wasm.WasmTypeNone() + else: + assert issubclass(annot_return, VarType_Base) + return_type = annot_return.wasm_type() + + # Load the argument types, and generate instances + args: Dict[str, VarType_Base] = {} + params: List[wasm.Param] = [] + for param_name, param_type in annot.items(): + assert issubclass(param_type, VarType_Base) + + params.append((param_name, param_type.wasm_type(), )) + + args[param_name] = VarType_Base(param_name) + + # Make a generator, and run the function on that generator, + # so the Statements get added + generator = Generator() + func(g=generator, **args) + + # Check what locals were used, and define them + locals_: List[wasm.Param] = [] + for local_name, local_type in generator.locals.items(): + locals_.append((local_name, local_type.wasm_type(), )) + + # Complete function definition + return wasm.Function( + func_name, + func_name if exported else None, + params, + locals_, + return_type, + generator.statements, + ) + + return inner diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..0591be3 --- /dev/null +++ b/pylintrc @@ -0,0 +1,10 @@ +[MASTER] +disable=C0122,R0903,R0911,R0912,R0913,R0915,R1710,W0223 + +max-line-length=180 + +[stdlib] +good-names=g + +[tests] +disable=C0116, diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d29e53c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +mypy==0.812 +pygments==2.12.0 +pylint==2.7.4 +pytest==6.2.2 +pytest-integration==0.2.2 +pywasm==1.0.7 +pywasm3==0.5.0 +wasmer==1.1.0 +wasmer_compiler_cranelift==1.1.0 +wasmtime==0.36.0 diff --git a/stubs/pywasm/__init__.pyi b/stubs/pywasm/__init__.pyi new file mode 100644 index 0000000..4d94109 --- /dev/null +++ b/stubs/pywasm/__init__.pyi @@ -0,0 +1,14 @@ +from typing import Any, Dict, List, Optional, Union + +from . import binary +from . import option +from . import execution + +class Runtime: + store: execution.Store + + def __init__(self, module: binary.Module, imps: Optional[Dict[str, Any]] = None, opts: Optional[option.Option] = None): + ... + + def exec(self, name: str, args: List[Union[int, float]]) -> Any: + ... diff --git a/stubs/pywasm/binary.pyi b/stubs/pywasm/binary.pyi new file mode 100644 index 0000000..3af65a0 --- /dev/null +++ b/stubs/pywasm/binary.pyi @@ -0,0 +1,6 @@ +from typing import BinaryIO + +class Module: + @classmethod + def from_reader(cls, reader: BinaryIO) -> 'Module': + ... diff --git a/stubs/pywasm/execution.pyi b/stubs/pywasm/execution.pyi new file mode 100644 index 0000000..54db4fb --- /dev/null +++ b/stubs/pywasm/execution.pyi @@ -0,0 +1,10 @@ +from typing import List + +class Result: + ... + +class MemoryInstance: + data: bytearray + +class Store: + memory_list: List[MemoryInstance] diff --git a/stubs/pywasm/option.pyi b/stubs/pywasm/option.pyi new file mode 100644 index 0000000..fd461ee --- /dev/null +++ b/stubs/pywasm/option.pyi @@ -0,0 +1,2 @@ +class Option: + ... diff --git a/stubs/wasm3.pyi b/stubs/wasm3.pyi new file mode 100644 index 0000000..216412c --- /dev/null +++ b/stubs/wasm3.pyi @@ -0,0 +1,23 @@ +from typing import Any, Callable + +class Module: + ... + +class Runtime: + ... + + def load(self, wasm_bin: Module) -> None: + ... + + def get_memory(self, memid: int) -> memoryview: + ... + + def find_function(self, name: str) -> Callable[[Any], Any]: + ... + +class Environment: + def new_runtime(self, mem_size: int) -> Runtime: + ... + + def parse_module(self, wasm_bin: bytes) -> Module: + ... diff --git a/stubs/wasmer.pyi b/stubs/wasmer.pyi new file mode 100644 index 0000000..535fa99 --- /dev/null +++ b/stubs/wasmer.pyi @@ -0,0 +1,39 @@ +from typing import Any, Dict, Callable, Union + +def wat2wasm(inp: str) -> bytes: + ... + +class Store: + ... + +class Function: + def __init__(self, store: Store, func: Callable[[Any], Any]) -> None: + ... + +class Module: + def __init__(self, store: Store, wasm: bytes) -> None: + ... + +class Uint8Array: + def __getitem__(self, index: Union[int, slice]) -> int: + ... + + def __setitem__(self, idx: int, value: int) -> None: + ... + +class Memory: + def uint8_view(self, offset: int = 0) -> Uint8Array: + ... + +class Exports: + ... + +class ImportObject: + def register(self, region: str, values: Dict[str, Function]) -> None: + ... + +class Instance: + exports: Exports + + def __init__(self, module: Module, imports: ImportObject) -> None: + ... diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e6b179c --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1,3 @@ +import pytest + +pytest.register_assert_rewrite('tests.integration.helpers') diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py new file mode 100644 index 0000000..ca4c8a7 --- /dev/null +++ b/tests/integration/helpers.py @@ -0,0 +1,85 @@ +import sys + +from phasm.codestyle import phasm_render + +from . import runners + +DASHES = '-' * 16 + +class SuiteResult: + def __init__(self): + self.returned_value = None + +RUNNER_CLASS_MAP = { + 'pywasm': runners.RunnerPywasm, + 'pywasm3': runners.RunnerPywasm3, + 'wasmtime': runners.RunnerWasmtime, + 'wasmer': runners.RunnerWasmer, +} + +class Suite: + """ + WebAssembly test suite + """ + def __init__(self, code_py): + self.code_py = code_py + + def run_code(self, *args, runtime='pywasm3', imports=None): + """ + Compiles the given python code into wasm and + then runs it + + Returned is an object with the results set + """ + class_ = RUNNER_CLASS_MAP[runtime] + + runner = class_(self.code_py) + + runner.parse() + runner.compile_ast() + runner.compile_wat() + runner.compile_wasm() + runner.interpreter_setup() + runner.interpreter_load(imports) + + write_header(sys.stderr, 'Phasm') + runner.dump_phasm_code(sys.stderr) + write_header(sys.stderr, 'Assembly') + runner.dump_wasm_wat(sys.stderr) + + # Check if code formatting works + assert self.code_py == '\n' + phasm_render(runner.phasm_ast) # \n for formatting in tests + + wasm_args = [] + if args: + write_header(sys.stderr, 'Memory (pre alloc)') + runner.interpreter_dump_memory(sys.stderr) + + for arg in args: + if isinstance(arg, (int, float, )): + wasm_args.append(arg) + continue + + if isinstance(arg, bytes): + adr = runner.call('stdlib.types.__alloc_bytes__', len(arg)) + sys.stderr.write(f'Allocation 0x{adr:08x} {repr(arg)}\n') + + runner.interpreter_write_memory(adr + 4, arg) + wasm_args.append(adr) + continue + + raise NotImplementedError(arg) + + write_header(sys.stderr, 'Memory (pre run)') + runner.interpreter_dump_memory(sys.stderr) + + result = SuiteResult() + result.returned_value = runner.call('testEntry', *wasm_args) + + write_header(sys.stderr, 'Memory (post run)') + runner.interpreter_dump_memory(sys.stderr) + + return result + +def write_header(textio, msg: str) -> None: + textio.write(f'{DASHES} {msg.ljust(16)} {DASHES}\n') diff --git a/tests/integration/runners.py b/tests/integration/runners.py new file mode 100644 index 0000000..fd3a53e --- /dev/null +++ b/tests/integration/runners.py @@ -0,0 +1,323 @@ +""" +Runners to help run WebAssembly code on various interpreters +""" +from typing import Any, Callable, Dict, Iterable, Optional, TextIO + +import ctypes +import io + +import pywasm.binary +import wasm3 +import wasmer +import wasmtime + +from phasm.compiler import phasm_compile +from phasm.parser import phasm_parse +from phasm import ourlang +from phasm import wasm + +class RunnerBase: + """ + Base class + """ + phasm_code: str + phasm_ast: ourlang.Module + wasm_ast: wasm.Module + wasm_asm: str + wasm_bin: bytes + + def __init__(self, phasm_code: str) -> None: + self.phasm_code = phasm_code + + def dump_phasm_code(self, textio: TextIO) -> None: + """ + Dumps the input Phasm code for debugging + """ + _dump_code(textio, self.phasm_code) + + def parse(self) -> None: + """ + Parses the Phasm code into an AST + """ + self.phasm_ast = phasm_parse(self.phasm_code) + + def compile_ast(self) -> None: + """ + Compiles the Phasm AST into an WebAssembly AST + """ + self.wasm_ast = phasm_compile(self.phasm_ast) + + def compile_wat(self) -> None: + """ + Compiles the WebAssembly AST into WebAssembly Assembly code + """ + self.wasm_asm = self.wasm_ast.to_wat() + + def dump_wasm_wat(self, textio: TextIO) -> None: + """ + Dumps the intermediate WebAssembly Assembly code for debugging + """ + _dump_code(textio, self.wasm_asm) + + def compile_wasm(self) -> None: + """ + Compiles the WebAssembly AST into WebAssembly Binary + """ + self.wasm_bin = wasmer.wat2wasm(self.wasm_asm) + + def interpreter_setup(self) -> None: + """ + Sets up the interpreter + """ + raise NotImplementedError + + def interpreter_load(self, imports: Optional[Dict[str, Callable[[Any], Any]]] = None) -> None: + """ + Loads the code into the interpreter + """ + raise NotImplementedError + + def interpreter_write_memory(self, offset: int, data: Iterable[int]) -> None: + """ + Writes into the interpreters memory + """ + raise NotImplementedError + + def interpreter_read_memory(self, offset: int, length: int) -> bytes: + """ + Reads from the interpreters memory + """ + raise NotImplementedError + + def interpreter_dump_memory(self, textio: TextIO) -> None: + """ + Dumps the interpreters memory for debugging + """ + raise NotImplementedError + + def call(self, function: str, *args: Any) -> Any: + """ + Calls the given function with the given arguments, returning the result + """ + raise NotImplementedError + +class RunnerPywasm(RunnerBase): + """ + Implements a runner for pywasm + + See https://pypi.org/project/pywasm/ + """ + module: pywasm.binary.Module + runtime: pywasm.Runtime + + def interpreter_setup(self) -> None: + # Nothing to set up + pass + + def interpreter_load(self, imports: Optional[Dict[str, Callable[[Any], Any]]] = None) -> None: + if imports is not None: + raise NotImplementedError + + bytesio = io.BytesIO(self.wasm_bin) + self.module = pywasm.binary.Module.from_reader(bytesio) + self.runtime = pywasm.Runtime(self.module, {}, None) + + def interpreter_write_memory(self, offset: int, data: Iterable[int]) -> None: + for idx, byt in enumerate(data): + self.runtime.store.memory_list[0].data[offset + idx] = byt + + def interpreter_read_memory(self, offset: int, length: int) -> bytes: + return self.runtime.store.memory_list[0].data[offset:length] + + def interpreter_dump_memory(self, textio: TextIO) -> None: + _dump_memory(textio, self.runtime.store.memory_list[0].data) + + def call(self, function: str, *args: Any) -> Any: + return self.runtime.exec(function, [*args]) + +class RunnerPywasm3(RunnerBase): + """ + Implements a runner for pywasm3 + + See https://pypi.org/project/pywasm3/ + """ + env: wasm3.Environment + rtime: wasm3.Runtime + mod: wasm3.Module + + def interpreter_setup(self) -> None: + self.env = wasm3.Environment() + self.rtime = self.env.new_runtime(1024 * 1024) + + def interpreter_load(self, imports: Optional[Dict[str, Callable[[Any], Any]]] = None) -> None: + if imports is not None: + raise NotImplementedError + + self.mod = self.env.parse_module(self.wasm_bin) + self.rtime.load(self.mod) + + def interpreter_write_memory(self, offset: int, data: Iterable[int]) -> None: + memory = self.rtime.get_memory(0) + + for idx, byt in enumerate(data): + memory[offset + idx] = byt # type: ignore + + def interpreter_read_memory(self, offset: int, length: int) -> bytes: + memory = self.rtime.get_memory(0) + return memory[offset:length].tobytes() + + def interpreter_dump_memory(self, textio: TextIO) -> None: + _dump_memory(textio, self.rtime.get_memory(0)) + + def call(self, function: str, *args: Any) -> Any: + return self.rtime.find_function(function)(*args) + +class RunnerWasmtime(RunnerBase): + """ + Implements a runner for wasmtime + + See https://pypi.org/project/wasmtime/ + """ + store: wasmtime.Store + module: wasmtime.Module + instance: wasmtime.Instance + + def interpreter_setup(self) -> None: + self.store = wasmtime.Store() + + def interpreter_load(self, imports: Optional[Dict[str, Callable[[Any], Any]]] = None) -> None: + if imports is not None: + raise NotImplementedError + + self.module = wasmtime.Module(self.store.engine, self.wasm_bin) + self.instance = wasmtime.Instance(self.store, self.module, []) + + def interpreter_write_memory(self, offset: int, data: Iterable[int]) -> None: + exports = self.instance.exports(self.store) + memory = exports['memory'] + assert isinstance(memory, wasmtime.Memory) # type hint + + data_ptr = memory.data_ptr(self.store) + data_len = memory.data_len(self.store) + + idx = offset + for byt in data: + assert idx < data_len + data_ptr[idx] = ctypes.c_ubyte(byt) + idx += 1 + + def interpreter_read_memory(self, offset: int, length: int) -> bytes: + exports = self.instance.exports(self.store) + memory = exports['memory'] + assert isinstance(memory, wasmtime.Memory) # type hint + + data_ptr = memory.data_ptr(self.store) + data_len = memory.data_len(self.store) + + raw = ctypes.string_at(data_ptr, data_len) + + return raw[offset:length] + + def interpreter_dump_memory(self, textio: TextIO) -> None: + exports = self.instance.exports(self.store) + memory = exports['memory'] + assert isinstance(memory, wasmtime.Memory) # type hint + + data_ptr = memory.data_ptr(self.store) + data_len = memory.data_len(self.store) + + _dump_memory(textio, ctypes.string_at(data_ptr, data_len)) + + def call(self, function: str, *args: Any) -> Any: + exports = self.instance.exports(self.store) + func = exports[function] + assert isinstance(func, wasmtime.Func) + + return func(self.store, *args) + +class RunnerWasmer(RunnerBase): + """ + Implements a runner for wasmer + + See https://pypi.org/project/wasmer/ + """ + + # pylint: disable=E1101 + + store: wasmer.Store + module: wasmer.Module + instance: wasmer.Instance + + def interpreter_setup(self) -> None: + self.store = wasmer.Store() + + def interpreter_load(self, imports: Optional[Dict[str, Callable[[Any], Any]]] = None) -> None: + import_object = wasmer.ImportObject() + if imports: + import_object.register('imports', { + k: wasmer.Function(self.store, v) + for k, v in (imports or {}).items() + }) + + self.module = wasmer.Module(self.store, self.wasm_bin) + self.instance = wasmer.Instance(self.module, import_object) + + def interpreter_write_memory(self, offset: int, data: Iterable[int]) -> None: + exports = self.instance.exports + memory = getattr(exports, 'memory') + assert isinstance(memory, wasmer.Memory) + view = memory.uint8_view(offset) + + for idx, byt in enumerate(data): + view[idx] = byt + + def interpreter_read_memory(self, offset: int, length: int) -> bytes: + exports = self.instance.exports + memory = getattr(exports, 'memory') + assert isinstance(memory, wasmer.Memory) + view = memory.uint8_view(offset) + return bytes(view[offset:length]) + + def interpreter_dump_memory(self, textio: TextIO) -> None: + exports = self.instance.exports + memory = getattr(exports, 'memory') + assert isinstance(memory, wasmer.Memory) + view = memory.uint8_view() + + _dump_memory(textio, view) # type: ignore + + def call(self, function: str, *args: Any) -> Any: + exports = self.instance.exports + func = getattr(exports, function) + + return func(*args) + +def _dump_memory(textio: TextIO, mem: bytes) -> None: + line_width = 16 + + prev_line = None + skip = False + for idx in range(0, len(mem), line_width): + line = '' + for idx2 in range(0, line_width): + line += f'{mem[idx + idx2]:02X}' + if idx2 % 2 == 1: + line += ' ' + + if prev_line == line: + if not skip: + textio.write('**\n') + skip = True + else: + textio.write(f'{idx:08x} {line}\n') + + prev_line = line + +def _dump_code(textio: TextIO, text: str) -> None: + line_list = text.split('\n') + line_no_width = len(str(len(line_list))) + for line_no, line_txt in enumerate(line_list): + textio.write('{} {}\n'.format( + str(line_no + 1).zfill(line_no_width), + line_txt, + )) diff --git a/tests/integration/test_builtins.py b/tests/integration/test_builtins.py new file mode 100644 index 0000000..4b84197 --- /dev/null +++ b/tests/integration/test_builtins.py @@ -0,0 +1,80 @@ +import sys + +import pytest + +from .helpers import Suite, write_header +from .runners import RunnerPywasm + +def setup_interpreter(phash_code: str) -> RunnerPywasm: + runner = RunnerPywasm(phash_code) + + runner.parse() + runner.compile_ast() + runner.compile_wat() + runner.compile_wasm() + runner.interpreter_setup() + runner.interpreter_load() + + write_header(sys.stderr, 'Phasm') + runner.dump_phasm_code(sys.stderr) + write_header(sys.stderr, 'Assembly') + runner.dump_wasm_wat(sys.stderr) + + return runner + +@pytest.mark.integration_test +def test_foldl_1(): + code_py = """ +def u8_or(l: u8, r: u8) -> u8: + return l | r + +@exported +def testEntry(b: bytes) -> u8: + return foldl(u8_or, 128, b) +""" + suite = Suite(code_py) + + result = suite.run_code(b'') + assert 128 == result.returned_value + + result = suite.run_code(b'\x80', runtime='pywasm') + assert 128 == result.returned_value + + result = suite.run_code(b'\x80\x40', runtime='pywasm') + assert 192 == result.returned_value + + result = suite.run_code(b'\x80\x40\x20\x10', runtime='pywasm') + assert 240 == result.returned_value + + result = suite.run_code(b'\x80\x40\x20\x10\x08\x04\x02\x01', runtime='pywasm') + assert 255 == result.returned_value + +@pytest.mark.integration_test +def test_foldl_2(): + code_py = """ +def xor(l: u8, r: u8) -> u8: + return l ^ r + +@exported +def testEntry(a: bytes, b: bytes) -> u8: + return foldl(xor, 0, a) ^ foldl(xor, 0, b) +""" + suite = Suite(code_py) + + result = suite.run_code(b'\x55\x0F', b'\x33\x80') + assert 233 == result.returned_value + +@pytest.mark.integration_test +def test_foldl_3(): + code_py = """ +def xor(l: u32, r: u8) -> u32: + return l ^ u32(r) + +@exported +def testEntry(a: bytes) -> u32: + return foldl(xor, 0, a) +""" + suite = Suite(code_py) + + result = suite.run_code(b'\x55\x0F\x33\x80') + assert 233 == result.returned_value diff --git a/tests/integration/test_constants.py b/tests/integration/test_constants.py new file mode 100644 index 0000000..19f0203 --- /dev/null +++ b/tests/integration/test_constants.py @@ -0,0 +1,87 @@ +import pytest + +from .helpers import Suite + +@pytest.mark.integration_test +def test_i32(): + code_py = """ +CONSTANT: i32 = 13 + +@exported +def testEntry() -> i32: + return CONSTANT * 5 +""" + + result = Suite(code_py).run_code() + + assert 65 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['u8', 'u32', 'u64', ]) +def test_tuple_1(type_): + code_py = f""" +CONSTANT: ({type_}, ) = (65, ) + +@exported +def testEntry() -> {type_}: + return helper(CONSTANT) + +def helper(vector: ({type_}, )) -> {type_}: + return vector[0] +""" + + result = Suite(code_py).run_code() + + assert 65 == result.returned_value + +@pytest.mark.integration_test +def test_tuple_6(): + code_py = """ +CONSTANT: (u8, u8, u32, u32, u64, u64, ) = (11, 22, 3333, 4444, 555555, 666666, ) + +@exported +def testEntry() -> u32: + return helper(CONSTANT) + +def helper(vector: (u8, u8, u32, u32, u64, u64, )) -> u32: + return vector[2] +""" + + result = Suite(code_py).run_code() + + assert 3333 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['u8', 'u32', 'u64', ]) +def test_static_array_1(type_): + code_py = f""" +CONSTANT: {type_}[1] = (65, ) + +@exported +def testEntry() -> {type_}: + return helper(CONSTANT) + +def helper(vector: {type_}[1]) -> {type_}: + return vector[0] +""" + + result = Suite(code_py).run_code() + + assert 65 == result.returned_value + +@pytest.mark.integration_test +def test_static_array_6(): + code_py = """ +CONSTANT: u32[6] = (11, 22, 3333, 4444, 555555, 666666, ) + +@exported +def testEntry() -> u32: + return helper(CONSTANT) + +def helper(vector: u32[6]) -> u32: + return vector[2] +""" + + result = Suite(code_py).run_code() + + assert 3333 == result.returned_value diff --git a/tests/integration/test_examples.py b/tests/integration/test_examples.py new file mode 100644 index 0000000..b3b278d --- /dev/null +++ b/tests/integration/test_examples.py @@ -0,0 +1,39 @@ +import binascii +import struct + +import pytest + +from .helpers import Suite + +@pytest.mark.integration_test +def test_crc32(): + # FIXME: Stub + # crc = 0xFFFFFFFF + # byt = 0x61 + # => (crc >> 8) ^ _CRC32_Table[(crc & 0xFF) ^ byt] + # (crc >> 8) = 0x00FFFFFF + # => 0x00FFFFFF ^ _CRC32_Table[(crc & 0xFF) ^ byt] + # (crc & 0xFF) = 0xFF + # => 0x00FFFFFF ^ _CRC32_Table[0xFF ^ byt] + # 0xFF ^ 0x61 = 0x9E + # => 0x00FFFFFF ^ _CRC32_Table[0x9E] + # _CRC32_Table[0x9E] = 0x17b7be43 + # => 0x00FFFFFF ^ 0x17b7be43 + + code_py = """ +def _crc32_f(crc: u32, byt: u8) -> u32: + return 16777215 ^ 397917763 + +def testEntry(data: bytes) -> u32: + return 4294967295 ^ _crc32_f(4294967295, data[0]) +""" + exp_result = binascii.crc32(b'a') + + result = Suite(code_py).run_code(b'a') + + # exp_result returns a unsigned integer, as is proper + exp_data = struct.pack('I', exp_result) + # ints extracted from WebAssembly are always signed + data = struct.pack('i', result.returned_value) + + assert exp_data == data diff --git a/tests/integration/test_fib.py b/tests/integration/test_fib.py new file mode 100644 index 0000000..20e7e63 --- /dev/null +++ b/tests/integration/test_fib.py @@ -0,0 +1,30 @@ +import pytest + +from .helpers import Suite + +@pytest.mark.slow_integration_test +def test_fib(): + code_py = """ +def helper(n: i32, a: i32, b: i32) -> i32: + if n < 1: + return a + b + + return helper(n - 1, a + b, a) + +def fib(n: i32) -> i32: + if n == 0: + return 0 + + if n == 1: + return 1 + + return helper(n - 1, 0, 1) + +@exported +def testEntry() -> i32: + return fib(40) +""" + + result = Suite(code_py).run_code() + + assert 102334155 == result.returned_value diff --git a/tests/integration/test_helper.py b/tests/integration/test_helper.py new file mode 100644 index 0000000..cb44021 --- /dev/null +++ b/tests/integration/test_helper.py @@ -0,0 +1,70 @@ +import io + +import pytest + +from pywasm import binary +from pywasm import Runtime + +from wasmer import wat2wasm + +def run(code_wat): + code_wasm = wat2wasm(code_wat) + module = binary.Module.from_reader(io.BytesIO(code_wasm)) + + runtime = Runtime(module, {}, {}) + + out_put = runtime.exec('testEntry', []) + return (runtime, out_put) + +@pytest.mark.parametrize('size,offset,exp_out_put', [ + ('32', 0, 0x3020100), + ('32', 1, 0x4030201), + ('64', 0, 0x706050403020100), + ('64', 2, 0x908070605040302), +]) +def test_i32_64_load(size, offset, exp_out_put): + code_wat = f""" + (module + (memory 1) + (data (memory 0) (i32.const 0) "\\00\\01\\02\\03\\04\\05\\06\\07\\08\\09\\10") + + (func (export "testEntry") (result i{size}) + i32.const {offset} + i{size}.load + return )) +""" + + (_, out_put) = run(code_wat) + assert exp_out_put == out_put + +def test_load_then_store(): + code_wat = """ + (module + (memory 1) + (data (memory 0) (i32.const 0) "\\04\\00\\00\\00") + + (func (export "testEntry") (result i32) (local $my_memory_value i32) + ;; Load i32 from address 0 + i32.const 0 + i32.load + + ;; Add 8 to the loaded value + i32.const 8 + i32.add + + local.set $my_memory_value + + ;; Store back to the memory + i32.const 0 + local.get $my_memory_value + i32.store + + ;; Return something + i32.const 9 + return )) +""" + (runtime, out_put) = run(code_wat) + + assert 9 == out_put + + assert (b'\x0c'+ b'\00' * 23) == runtime.store.mems[0].data[:24] diff --git a/tests/integration/test_runtime_checks.py b/tests/integration/test_runtime_checks.py new file mode 100644 index 0000000..97d6542 --- /dev/null +++ b/tests/integration/test_runtime_checks.py @@ -0,0 +1,31 @@ +import pytest + +from .helpers import Suite + +@pytest.mark.integration_test +def test_bytes_index_out_of_bounds(): + code_py = """ +@exported +def testEntry(f: bytes) -> u8: + return f[50] +""" + + result = Suite(code_py).run_code(b'Short', b'Long' * 100) + + assert 0 == result.returned_value + +@pytest.mark.integration_test +def test_static_array_index_out_of_bounds(): + code_py = """ +CONSTANT0: u32[3] = (24, 57, 80, ) + +CONSTANT1: u32[16] = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ) + +@exported +def testEntry() -> u32: + return CONSTANT0[16] +""" + + result = Suite(code_py).run_code() + + assert 0 == result.returned_value diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py new file mode 100644 index 0000000..f0c2993 --- /dev/null +++ b/tests/integration/test_simple.py @@ -0,0 +1,571 @@ +import pytest + +from .helpers import Suite + +TYPE_MAP = { + 'u8': int, + 'u32': int, + 'u64': int, + 'i32': int, + 'i64': int, + 'f32': float, + 'f64': float, +} + +COMPLETE_SIMPLE_TYPES = [ + 'u32', 'u64', + 'i32', 'i64', + 'f32', 'f64', +] + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', TYPE_MAP.keys()) +def test_return(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return 13 +""" + + result = Suite(code_py).run_code() + + assert 13 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', COMPLETE_SIMPLE_TYPES) +def test_addition(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return 10 + 3 +""" + + result = Suite(code_py).run_code() + + assert 13 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', COMPLETE_SIMPLE_TYPES) +def test_subtraction(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return 10 - 3 +""" + + result = Suite(code_py).run_code() + + assert 7 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['u32', 'u64']) # FIXME: Support u8, requires an extra AND operation +def test_logical_left_shift(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return 10 << 3 +""" + + result = Suite(code_py).run_code() + + assert 80 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['u8', 'u32', 'u64']) +def test_logical_right_shift(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return 10 >> 3 +""" + + result = Suite(code_py).run_code() + + assert 1 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['u8', 'u32', 'u64']) +def test_bitwise_or(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return 10 | 3 +""" + + result = Suite(code_py).run_code() + + assert 11 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['u8', 'u32', 'u64']) +def test_bitwise_xor(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return 10 ^ 3 +""" + + result = Suite(code_py).run_code() + + assert 9 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['u8', 'u32', 'u64']) +def test_bitwise_and(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return 10 & 3 +""" + + result = Suite(code_py).run_code() + + assert 2 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['f32', 'f64']) +def test_buildins_sqrt(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return sqrt(25) +""" + + result = Suite(code_py).run_code() + + assert 5 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', TYPE_MAP.keys()) +def test_arg(type_): + code_py = f""" +@exported +def testEntry(a: {type_}) -> {type_}: + return a +""" + + result = Suite(code_py).run_code(125) + + assert 125 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.skip('Do we want it to work like this?') +def test_i32_to_i64(): + code_py = """ +@exported +def testEntry(a: i32) -> i64: + return a +""" + + result = Suite(code_py).run_code(125) + + assert 125 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.skip('Do we want it to work like this?') +def test_i32_plus_i64(): + code_py = """ +@exported +def testEntry(a: i32, b: i64) -> i64: + return a + b +""" + + result = Suite(code_py).run_code(125, 100) + + assert 225 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.skip('Do we want it to work like this?') +def test_f32_to_f64(): + code_py = """ +@exported +def testEntry(a: f32) -> f64: + return a +""" + + result = Suite(code_py).run_code(125.5) + + assert 125.5 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.skip('Do we want it to work like this?') +def test_f32_plus_f64(): + code_py = """ +@exported +def testEntry(a: f32, b: f64) -> f64: + return a + b +""" + + result = Suite(code_py).run_code(125.5, 100.25) + + assert 225.75 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.skip('TODO') +def test_uadd(): + code_py = """ +@exported +def testEntry() -> i32: + return +523 +""" + + result = Suite(code_py).run_code() + + assert 523 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.skip('TODO') +def test_usub(): + code_py = """ +@exported +def testEntry() -> i32: + return -19 +""" + + result = Suite(code_py).run_code() + + assert -19 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.parametrize('inp', [9, 10, 11, 12]) +def test_if_simple(inp): + code_py = """ +@exported +def testEntry(a: i32) -> i32: + if a > 10: + return 15 + + return 3 +""" + exp_result = 15 if inp > 10 else 3 + + suite = Suite(code_py) + + result = suite.run_code(inp) + assert exp_result == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.skip('Such a return is not how things should be') +def test_if_complex(): + code_py = """ +@exported +def testEntry(a: i32) -> i32: + if a > 10: + return 10 + elif a > 0: + return a + else: + return 0 + + return -1 # Required due to function type +""" + + suite = Suite(code_py) + + assert 10 == suite.run_code(20).returned_value + assert 10 == suite.run_code(10).returned_value + + assert 8 == suite.run_code(8).returned_value + + assert 0 == suite.run_code(0).returned_value + assert 0 == suite.run_code(-1).returned_value + +@pytest.mark.integration_test +def test_if_nested(): + code_py = """ +@exported +def testEntry(a: i32, b: i32) -> i32: + if a > 11: + if b > 11: + return 3 + + return 2 + + if b > 11: + return 1 + + return 0 +""" + + suite = Suite(code_py) + + assert 3 == suite.run_code(20, 20).returned_value + assert 2 == suite.run_code(20, 10).returned_value + assert 1 == suite.run_code(10, 20).returned_value + assert 0 == suite.run_code(10, 10).returned_value + +@pytest.mark.integration_test +def test_call_pre_defined(): + code_py = """ +def helper(left: i32, right: i32) -> i32: + return left + right + +@exported +def testEntry() -> i32: + return helper(10, 3) +""" + + result = Suite(code_py).run_code() + + assert 13 == result.returned_value + +@pytest.mark.integration_test +def test_call_post_defined(): + code_py = """ +@exported +def testEntry() -> i32: + return helper(10, 3) + +def helper(left: i32, right: i32) -> i32: + return left - right +""" + + result = Suite(code_py).run_code() + + assert 7 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', COMPLETE_SIMPLE_TYPES) +def test_call_with_expression(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return helper(10 + 20, 3 + 5) + +def helper(left: {type_}, right: {type_}) -> {type_}: + return left - right +""" + + result = Suite(code_py).run_code() + + assert 22 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.skip('Not yet implemented') +def test_assign(): + code_py = """ + +@exported +def testEntry() -> i32: + a: i32 = 8947 + return a +""" + + result = Suite(code_py).run_code() + + assert 8947 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', TYPE_MAP.keys()) +def test_struct_0(type_): + code_py = f""" +class CheckedValue: + value: {type_} + +@exported +def testEntry() -> {type_}: + return helper(CheckedValue(23)) + +def helper(cv: CheckedValue) -> {type_}: + return cv.value +""" + + result = Suite(code_py).run_code() + + assert 23 == result.returned_value + +@pytest.mark.integration_test +def test_struct_1(): + code_py = """ +class Rectangle: + height: i32 + width: i32 + border: i32 + +@exported +def testEntry() -> i32: + return helper(Rectangle(100, 150, 2)) + +def helper(shape: Rectangle) -> i32: + return shape.height + shape.width + shape.border +""" + + result = Suite(code_py).run_code() + + assert 252 == result.returned_value + +@pytest.mark.integration_test +def test_struct_2(): + code_py = """ +class Rectangle: + height: i32 + width: i32 + border: i32 + +@exported +def testEntry() -> i32: + return helper(Rectangle(100, 150, 2), Rectangle(200, 90, 3)) + +def helper(shape1: Rectangle, shape2: Rectangle) -> i32: + return shape1.height + shape1.width + shape1.border + shape2.height + shape2.width + shape2.border +""" + + result = Suite(code_py).run_code() + + assert 545 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', COMPLETE_SIMPLE_TYPES) +def test_tuple_simple_constructor(type_): + code_py = f""" +@exported +def testEntry() -> {type_}: + return helper((24, 57, 80, )) + +def helper(vector: ({type_}, {type_}, {type_}, )) -> {type_}: + return vector[0] + vector[1] + vector[2] +""" + + result = Suite(code_py).run_code() + + assert 161 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +def test_tuple_float(): + code_py = """ +@exported +def testEntry() -> f32: + return helper((1.0, 2.0, 3.0, )) + +def helper(v: (f32, f32, f32, )) -> f32: + return sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) +""" + + result = Suite(code_py).run_code() + + assert 3.74 < result.returned_value < 3.75 + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', COMPLETE_SIMPLE_TYPES) +def test_static_array_module_constant(type_): + code_py = f""" +CONSTANT: {type_}[3] = (24, 57, 80, ) + +@exported +def testEntry() -> {type_}: + return helper(CONSTANT) + +def helper(array: {type_}[3]) -> {type_}: + return array[0] + array[1] + array[2] +""" + + result = Suite(code_py).run_code() + + assert 161 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', COMPLETE_SIMPLE_TYPES) +def test_static_array_indexed(type_): + code_py = f""" +CONSTANT: {type_}[3] = (24, 57, 80, ) + +@exported +def testEntry() -> {type_}: + return helper(CONSTANT, 0, 1, 2) + +def helper(array: {type_}[3], i0: u32, i1: u32, i2: u32) -> {type_}: + return array[i0] + array[i1] + array[i2] +""" + + result = Suite(code_py).run_code() + + assert 161 == result.returned_value + assert TYPE_MAP[type_] == type(result.returned_value) + +@pytest.mark.integration_test +def test_bytes_address(): + code_py = """ +@exported +def testEntry(f: bytes) -> bytes: + return f +""" + + result = Suite(code_py).run_code(b'This is a test') + + # THIS DEPENDS ON THE ALLOCATOR + # A different allocator will return a different value + assert 20 == result.returned_value + +@pytest.mark.integration_test +def test_bytes_length(): + code_py = """ +@exported +def testEntry(f: bytes) -> i32: + return len(f) +""" + + result = Suite(code_py).run_code(b'This is another test') + + assert 20 == result.returned_value + +@pytest.mark.integration_test +def test_bytes_index(): + code_py = """ +@exported +def testEntry(f: bytes) -> u8: + return f[8] +""" + + result = Suite(code_py).run_code(b'This is another test') + + assert 0x61 == result.returned_value + +@pytest.mark.integration_test +@pytest.mark.skip('SIMD support is but a dream') +def test_tuple_i32x4(): + code_py = """ +@exported +def testEntry() -> i32x4: + return (51, 153, 204, 0, ) +""" + + result = Suite(code_py).run_code() + + assert (1, 2, 3, 0) == result.returned_value + +@pytest.mark.integration_test +def test_imported(): + code_py = """ +@imported +def helper(mul: i32) -> i32: + pass + +@exported +def testEntry() -> i32: + return helper(2) +""" + + def helper(mul: int) -> int: + return 4238 * mul + + result = Suite(code_py).run_code( + runtime='wasmer', + imports={ + 'helper': helper, + } + ) + + assert 8476 == result.returned_value diff --git a/tests/integration/test_static_checking.py b/tests/integration/test_static_checking.py new file mode 100644 index 0000000..1544537 --- /dev/null +++ b/tests/integration/test_static_checking.py @@ -0,0 +1,109 @@ +import pytest + +from phasm.parser import phasm_parse +from phasm.exceptions import StaticError + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['i32', 'i64', 'f32', 'f64']) +def test_type_mismatch_function_argument(type_): + code_py = f""" +def helper(a: {type_}) -> (i32, i32, ): + return a +""" + + with pytest.raises(StaticError, match=f'Static error on line 3: Expected \\(i32, i32, \\), a is actually {type_}'): + phasm_parse(code_py) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['i32', 'i64', 'f32', 'f64']) +def test_type_mismatch_struct_member(type_): + code_py = f""" +class Struct: + param: {type_} + +def testEntry(arg: Struct) -> (i32, i32, ): + return arg.param +""" + + with pytest.raises(StaticError, match=f'Static error on line 6: Expected \\(i32, i32, \\), arg.param is actually {type_}'): + phasm_parse(code_py) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['i32', 'i64', 'f32', 'f64']) +def test_type_mismatch_tuple_member(type_): + code_py = f""" +def testEntry(arg: ({type_}, )) -> (i32, i32, ): + return arg[0] +""" + + with pytest.raises(StaticError, match=f'Static error on line 3: Expected \\(i32, i32, \\), arg\\[0\\] is actually {type_}'): + phasm_parse(code_py) + +@pytest.mark.integration_test +@pytest.mark.parametrize('type_', ['i32', 'i64', 'f32', 'f64']) +def test_type_mismatch_function_result(type_): + code_py = f""" +def helper() -> {type_}: + return 1 + +@exported +def testEntry() -> (i32, i32, ): + return helper() +""" + + with pytest.raises(StaticError, match=f'Static error on line 7: Expected \\(i32, i32, \\), helper actually returns {type_}'): + phasm_parse(code_py) + +@pytest.mark.integration_test +def test_tuple_constant_too_few_values(): + code_py = """ +CONSTANT: (u32, u8, u8, ) = (24, 57, ) +""" + + with pytest.raises(StaticError, match='Static error on line 2: Invalid number of tuple values'): + phasm_parse(code_py) + +@pytest.mark.integration_test +def test_tuple_constant_too_many_values(): + code_py = """ +CONSTANT: (u32, u8, u8, ) = (24, 57, 1, 1, ) +""" + + with pytest.raises(StaticError, match='Static error on line 2: Invalid number of tuple values'): + phasm_parse(code_py) + +@pytest.mark.integration_test +def test_tuple_constant_type_mismatch(): + code_py = """ +CONSTANT: (u32, u8, u8, ) = (24, 4000, 1, ) +""" + + with pytest.raises(StaticError, match='Static error on line 2: Integer value out of range; expected 0..255, actual 4000'): + phasm_parse(code_py) + +@pytest.mark.integration_test +def test_static_array_constant_too_few_values(): + code_py = """ +CONSTANT: u8[3] = (24, 57, ) +""" + + with pytest.raises(StaticError, match='Static error on line 2: Invalid number of static array values'): + phasm_parse(code_py) + +@pytest.mark.integration_test +def test_static_array_constant_too_many_values(): + code_py = """ +CONSTANT: u8[3] = (24, 57, 1, 1, ) +""" + + with pytest.raises(StaticError, match='Static error on line 2: Invalid number of static array values'): + phasm_parse(code_py) + +@pytest.mark.integration_test +def test_static_array_constant_type_mismatch(): + code_py = """ +CONSTANT: u8[3] = (24, 4000, 1, ) +""" + + with pytest.raises(StaticError, match='Static error on line 2: Integer value out of range; expected 0..255, actual 4000'): + phasm_parse(code_py) diff --git a/tests/integration/test_stdlib_alloc.py b/tests/integration/test_stdlib_alloc.py new file mode 100644 index 0000000..da8ccea --- /dev/null +++ b/tests/integration/test_stdlib_alloc.py @@ -0,0 +1,56 @@ +import sys + +import pytest + +from .helpers import write_header +from .runners import RunnerPywasm3 as Runner + +def setup_interpreter(phash_code: str) -> Runner: + runner = Runner(phash_code) + + runner.parse() + runner.compile_ast() + runner.compile_wat() + runner.compile_wasm() + runner.interpreter_setup() + runner.interpreter_load() + + write_header(sys.stderr, 'Phasm') + runner.dump_phasm_code(sys.stderr) + write_header(sys.stderr, 'Assembly') + runner.dump_wasm_wat(sys.stderr) + + return runner + +@pytest.mark.integration_test +def test___alloc___ok(): + code_py = """ +@exported +def testEntry() -> u8: + return 13 +""" + + runner = setup_interpreter(code_py) + + write_header(sys.stderr, 'Memory (pre run)') + runner.interpreter_dump_memory(sys.stderr) + + offset0 = runner.call('stdlib.alloc.__alloc__', 32) + offset1 = runner.call('stdlib.alloc.__alloc__', 32) + offset2 = runner.call('stdlib.alloc.__alloc__', 32) + + write_header(sys.stderr, 'Memory (post run)') + runner.interpreter_dump_memory(sys.stderr) + + assert ( + b'\xC0\xA1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x7C\x00\x00\x00' + b'\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00' + ) == runner.interpreter_read_memory(0, 0x60) + + assert 0x14 == offset0 + assert 0x38 == offset1 + assert 0x5C == offset2