Compare commits

..

No commits in common. "c02afb05f489506f8bc56eaf13091ac8362a2efb" and "2970093c8f7d93b03ac03a8b8e922bbdcd4cd945" have entirely different histories.

2 changed files with 5 additions and 60 deletions

View File

@ -13,45 +13,6 @@ Note: This tests performs some timing comparison, please wait a few seconds for
<h2>Measurement log</h2> <h2>Measurement log</h2>
<h3>AMD Ryzen 7 3700X 8-Core, Ubuntu 20.04, Linux 5.4.0-124-generic</h3> <h3>AMD Ryzen 7 3700X 8-Core, Ubuntu 20.04, Linux 5.4.0-124-generic</h3>
<h4>After optimizing fold over bytes by inlineing __subscript_bytes__</h4>
<table>
<tr>
<td>Test</td>
<td>Interpreter</td>
<td>Setup</td>
<td>WebAssembly</td>
<td>Javascript</td>
</tr>
<tr>
<td>Lynx * 65536</td>
<td>Chromium 104.0.5112.101</td>
<td>DevTools closed</td>
<td>5.70</td>
<td>12.45</td>
</tr>
<tr>
<td>Lynx * 65536</td>
<td>Firefox 103</td>
<td>DevTools closed</td>
<td>5.16</td>
<td>5.72</td>
</tr>
<tr>
<td>Lynx * 1048576</td>
<td>Chromium 104.0.5112.101</td>
<td>DevTools closed</td>
<td>95.65</td>
<td>203.60</td>
</tr>
<tr>
<td>Lynx * 1048576</td>
<td>Firefox 103</td>
<td>DevTools closed</td>
<td>83.34</td>
<td>92.38</td>
</tr>
</table>
<h4>Before optimizing fold over bytes by inlineing __subscript_bytes__</h4>
<table> <table>
<tr> <tr>
<td>Test</td> <td>Test</td>
@ -119,7 +80,7 @@ Note: This tests performs some timing comparison, please wait a few seconds for
</tr> </tr>
</table> </table>
<h4>Notes</h4> Notes:<br />
- Firefox seems faster than Chromium in my setup for Javascript, WebAssembly seems about the same.<br /> - Firefox seems faster than Chromium in my setup for Javascript, WebAssembly seems about the same.<br />
- Having DevTools open in Chromium seems to slow down the WebAssembly by about 30%, but not when doing a recording of the page load.<br /> - Having DevTools open in Chromium seems to slow down the WebAssembly by about 30%, but not when doing a recording of the page load.<br />
- WebAssembly in Firefox seems to slow down when doing a recording of the page load, which makes sense, but the Javascript does not.<br /> - WebAssembly in Firefox seems to slow down when doing a recording of the page load, which makes sense, but the Javascript does not.<br />
@ -207,9 +168,7 @@ function run_test(app, str, str_repeat)
}); });
// Don't test speedup for small strings, it varies a lot // Don't test speedup for small strings, it varies a lot
let speedup = (wasm_timing.min == 0 || js_timing.min == 0) let speedup = str.length < 16 ? 1 : (js_time == wasm_time ? 1 : js_time / wasm_time);
? 1
: js_time / wasm_time;
test_result(check && 0.999 < speedup, { // At least as fast as Javascript test_result(check && 0.999 < speedup, { // At least as fast as Javascript
'summary': 'crc32(' + (str 'summary': 'crc32(' + (str
@ -238,7 +197,7 @@ WebAssembly.instantiateStreaming(fetch('crc32.wasm'), importObject)
run_test(app, "abcdefghijklmnopqrstuvwxyz"); run_test(app, "abcdefghijklmnopqrstuvwxyz");
run_test(app, "The quick brown fox jumps over the lazy dog"); run_test(app, "The quick brown fox jumps over the lazy dog");
run_test(app, "The quick brown fox jumps over the lazy dog", 1024); run_test(app, "The quick brown fox jumps over the lazy dog", 1024);
run_test(app, "Lynx c.q. vos prikt bh: dag zwemjuf!", 65536); run_test(app, "Lynx c.q. vos prikt bh: dag zwemjuf!", 1048576);
}); });
</script> </script>

View File

@ -368,27 +368,13 @@ def expression_fold(wgn: WasmGenerator, inp: ourlang.Fold) -> None:
wgn.local.get(len_var) wgn.local.get(len_var)
wgn.i32.lt_u() wgn.i32.lt_u()
with wgn.if_(): with wgn.if_():
# From here on, adr_var is the address of byte we're referencing
# This is akin to calling stdlib_types.__subscript_bytes__
# But since we already know we are inside of bounds,
# can just bypass it and load the memory directly.
wgn.local.get(adr_var)
wgn.i32.const(3) # Bytes header -1, since we do a +1 every loop
wgn.i32.add()
wgn.local.set(adr_var)
wgn.add_statement('nop', comment='while True') wgn.add_statement('nop', comment='while True')
with wgn.loop(): with wgn.loop():
wgn.add_statement('nop', comment='acu = func(acu, iter[i])') wgn.add_statement('nop', comment='acu = func(acu, iter[i])')
wgn.local.get(acu_var) wgn.local.get(acu_var)
# Get the next byte, write back the address
wgn.local.get(adr_var) wgn.local.get(adr_var)
wgn.i32.const(1) wgn.local.get(idx_var)
wgn.i32.add() wgn.call(stdlib_types.__subscript_bytes__)
wgn.local.tee(adr_var)
wgn.i32.load8_u()
wgn.add_statement('call', f'${inp.func.name}') wgn.add_statement('call', f'${inp.func.name}')
wgn.local.set(acu_var) wgn.local.set(acu_var)