Speedup foldl over bytes #2
@ -13,6 +13,45 @@ Note: This tests performs some timing comparison, please wait a few seconds for
|
|||||||
|
|
||||||
<h2>Measurement log</h2>
|
<h2>Measurement log</h2>
|
||||||
<h3>AMD Ryzen 7 3700X 8-Core, Ubuntu 20.04, Linux 5.4.0-124-generic</h3>
|
<h3>AMD Ryzen 7 3700X 8-Core, Ubuntu 20.04, Linux 5.4.0-124-generic</h3>
|
||||||
|
<h4>After optimizing fold over bytes by inlineing __subscript_bytes__</h4>
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td>Test</td>
|
||||||
|
<td>Interpreter</td>
|
||||||
|
<td>Setup</td>
|
||||||
|
<td>WebAssembly</td>
|
||||||
|
<td>Javascript</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Lynx * 65536</td>
|
||||||
|
<td>Chromium 104.0.5112.101</td>
|
||||||
|
<td>DevTools closed</td>
|
||||||
|
<td>5.70</td>
|
||||||
|
<td>12.45</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Lynx * 65536</td>
|
||||||
|
<td>Firefox 103</td>
|
||||||
|
<td>DevTools closed</td>
|
||||||
|
<td>5.16</td>
|
||||||
|
<td>5.72</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Lynx * 1048576</td>
|
||||||
|
<td>Chromium 104.0.5112.101</td>
|
||||||
|
<td>DevTools closed</td>
|
||||||
|
<td>95.65</td>
|
||||||
|
<td>203.60</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Lynx * 1048576</td>
|
||||||
|
<td>Firefox 103</td>
|
||||||
|
<td>DevTools closed</td>
|
||||||
|
<td>83.34</td>
|
||||||
|
<td>92.38</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<h4>Before optimizing fold over bytes by inlineing __subscript_bytes__</h4>
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Test</td>
|
<td>Test</td>
|
||||||
@ -80,7 +119,7 @@ Note: This tests performs some timing comparison, please wait a few seconds for
|
|||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
Notes:<br />
|
<h4>Notes</h4>
|
||||||
- Firefox seems faster than Chromium in my setup for Javascript, WebAssembly seems about the same.<br />
|
- Firefox seems faster than Chromium in my setup for Javascript, WebAssembly seems about the same.<br />
|
||||||
- Having DevTools open in Chromium seems to slow down the WebAssembly by about 30%, but not when doing a recording of the page load.<br />
|
- Having DevTools open in Chromium seems to slow down the WebAssembly by about 30%, but not when doing a recording of the page load.<br />
|
||||||
- WebAssembly in Firefox seems to slow down when doing a recording of the page load, which makes sense, but the Javascript does not.<br />
|
- WebAssembly in Firefox seems to slow down when doing a recording of the page load, which makes sense, but the Javascript does not.<br />
|
||||||
@ -168,7 +207,9 @@ function run_test(app, str, str_repeat)
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Don't test speedup for small strings, it varies a lot
|
// Don't test speedup for small strings, it varies a lot
|
||||||
let speedup = str.length < 16 ? 1 : (js_time == wasm_time ? 1 : js_time / wasm_time);
|
let speedup = (wasm_timing.min == 0 || js_timing.min == 0)
|
||||||
|
? 1
|
||||||
|
: js_time / wasm_time;
|
||||||
|
|
||||||
test_result(check && 0.999 < speedup, { // At least as fast as Javascript
|
test_result(check && 0.999 < speedup, { // At least as fast as Javascript
|
||||||
'summary': 'crc32(' + (str
|
'summary': 'crc32(' + (str
|
||||||
@ -197,7 +238,7 @@ WebAssembly.instantiateStreaming(fetch('crc32.wasm'), importObject)
|
|||||||
run_test(app, "abcdefghijklmnopqrstuvwxyz");
|
run_test(app, "abcdefghijklmnopqrstuvwxyz");
|
||||||
run_test(app, "The quick brown fox jumps over the lazy dog");
|
run_test(app, "The quick brown fox jumps over the lazy dog");
|
||||||
run_test(app, "The quick brown fox jumps over the lazy dog", 1024);
|
run_test(app, "The quick brown fox jumps over the lazy dog", 1024);
|
||||||
run_test(app, "Lynx c.q. vos prikt bh: dag zwemjuf!", 1048576);
|
run_test(app, "Lynx c.q. vos prikt bh: dag zwemjuf!", 65536);
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
|||||||
@ -368,13 +368,27 @@ def expression_fold(wgn: WasmGenerator, inp: ourlang.Fold) -> None:
|
|||||||
wgn.local.get(len_var)
|
wgn.local.get(len_var)
|
||||||
wgn.i32.lt_u()
|
wgn.i32.lt_u()
|
||||||
with wgn.if_():
|
with wgn.if_():
|
||||||
|
# From here on, adr_var is the address of byte we're referencing
|
||||||
|
# This is akin to calling stdlib_types.__subscript_bytes__
|
||||||
|
# But since we already know we are inside of bounds,
|
||||||
|
# can just bypass it and load the memory directly.
|
||||||
|
wgn.local.get(adr_var)
|
||||||
|
wgn.i32.const(3) # Bytes header -1, since we do a +1 every loop
|
||||||
|
wgn.i32.add()
|
||||||
|
wgn.local.set(adr_var)
|
||||||
|
|
||||||
wgn.add_statement('nop', comment='while True')
|
wgn.add_statement('nop', comment='while True')
|
||||||
with wgn.loop():
|
with wgn.loop():
|
||||||
wgn.add_statement('nop', comment='acu = func(acu, iter[i])')
|
wgn.add_statement('nop', comment='acu = func(acu, iter[i])')
|
||||||
wgn.local.get(acu_var)
|
wgn.local.get(acu_var)
|
||||||
|
|
||||||
|
# Get the next byte, write back the address
|
||||||
wgn.local.get(adr_var)
|
wgn.local.get(adr_var)
|
||||||
wgn.local.get(idx_var)
|
wgn.i32.const(1)
|
||||||
wgn.call(stdlib_types.__subscript_bytes__)
|
wgn.i32.add()
|
||||||
|
wgn.local.tee(adr_var)
|
||||||
|
wgn.i32.load8_u()
|
||||||
|
|
||||||
wgn.add_statement('call', f'${inp.func.name}')
|
wgn.add_statement('call', f'${inp.func.name}')
|
||||||
wgn.local.set(acu_var)
|
wgn.local.set(acu_var)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user