diff --git a/examples/crc32.html b/examples/crc32.html index 949dadf..e78505c 100644 --- a/examples/crc32.html +++ b/examples/crc32.html @@ -13,6 +13,45 @@ Note: This tests performs some timing comparison, please wait a few seconds for

Measurement log

AMD Ryzen 7 3700X 8-Core, Ubuntu 20.04, Linux 5.4.0-124-generic

+

After optimizing fold over bytes by inlineing __subscript_bytes__

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TestInterpreterSetupWebAssemblyJavascript
Lynx * 65536Chromium 104.0.5112.101DevTools closed5.7012.45
Lynx * 65536Firefox 103DevTools closed5.165.72
Lynx * 1048576Chromium 104.0.5112.101DevTools closed95.65203.60
Lynx * 1048576Firefox 103DevTools closed83.3492.38
+

Before optimizing fold over bytes by inlineing __subscript_bytes__

@@ -80,7 +119,7 @@ Note: This tests performs some timing comparison, please wait a few seconds for
Test
-Notes:
+

Notes

- Firefox seems faster than Chromium in my setup for Javascript, WebAssembly seems about the same.
- Having DevTools open in Chromium seems to slow down the WebAssembly by about 30%, but not when doing a recording of the page load.
- WebAssembly in Firefox seems to slow down when doing a recording of the page load, which makes sense, but the Javascript does not.
@@ -168,7 +207,9 @@ function run_test(app, str, str_repeat) }); // Don't test speedup for small strings, it varies a lot - let speedup = str.length < 16 ? 1 : (js_time == wasm_time ? 1 : js_time / wasm_time); + let speedup = (wasm_timing.min == 0 || js_timing.min == 0) + ? 1 + : js_time / wasm_time; test_result(check && 0.999 < speedup, { // At least as fast as Javascript 'summary': 'crc32(' + (str @@ -197,7 +238,7 @@ WebAssembly.instantiateStreaming(fetch('crc32.wasm'), importObject) run_test(app, "abcdefghijklmnopqrstuvwxyz"); run_test(app, "The quick brown fox jumps over the lazy dog"); run_test(app, "The quick brown fox jumps over the lazy dog", 1024); - run_test(app, "Lynx c.q. vos prikt bh: dag zwemjuf!", 1048576); + run_test(app, "Lynx c.q. vos prikt bh: dag zwemjuf!", 65536); }); diff --git a/phasm/compiler.py b/phasm/compiler.py index 3b62065..4ba5d5a 100644 --- a/phasm/compiler.py +++ b/phasm/compiler.py @@ -368,13 +368,27 @@ def expression_fold(wgn: WasmGenerator, inp: ourlang.Fold) -> None: wgn.local.get(len_var) wgn.i32.lt_u() with wgn.if_(): + # From here on, adr_var is the address of byte we're referencing + # This is akin to calling stdlib_types.__subscript_bytes__ + # But since we already know we are inside of bounds, + # can just bypass it and load the memory directly. + wgn.local.get(adr_var) + wgn.i32.const(3) # Bytes header -1, since we do a +1 every loop + wgn.i32.add() + wgn.local.set(adr_var) + wgn.add_statement('nop', comment='while True') with wgn.loop(): wgn.add_statement('nop', comment='acu = func(acu, iter[i])') wgn.local.get(acu_var) + + # Get the next byte, write back the address wgn.local.get(adr_var) - wgn.local.get(idx_var) - wgn.call(stdlib_types.__subscript_bytes__) + wgn.i32.const(1) + wgn.i32.add() + wgn.local.tee(adr_var) + wgn.i32.load8_u() + wgn.add_statement('call', f'${inp.func.name}') wgn.local.set(acu_var)