Prior to this PR, the compiler would call stdlib.types's __subscript_bytes__. However, that function performs some checks we do not need. After this MR, folding iterates directly over the bytes memory, saving the memory access checks and the function calls. This gets us a speedup of about 43% less CPU time used on Firefox. Also, by default, the CRC32 page runs a shorter timing test.
247 lines
6.0 KiB
HTML
247 lines
6.0 KiB
HTML
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>Examples - CRC32</title>
|
|
</head>
|
|
<body>
|
|
<h1>Buffer</h1>
|
|
|
|
<a href="index.html">List</a> - <a href="crc32.py.html">Source</a> - <a href="crc32.wat.html">WebAssembly</a><br />
|
|
<br />
|
|
Note: This tests performs some timing comparison, please wait a few seconds for the results.<br />
|
|
<div style="white-space: pre;" id="results"></div>
|
|
|
|
<h2>Measurement log</h2>
|
|
<h3>AMD Ryzen 7 3700X 8-Core, Ubuntu 20.04, Linux 5.4.0-124-generic</h3>
|
|
<h4>After optimizing fold over bytes by inlineing __subscript_bytes__</h4>
|
|
<table>
|
|
<tr>
|
|
<td>Test</td>
|
|
<td>Interpreter</td>
|
|
<td>Setup</td>
|
|
<td>WebAssembly</td>
|
|
<td>Javascript</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 65536</td>
|
|
<td>Chromium 104.0.5112.101</td>
|
|
<td>DevTools closed</td>
|
|
<td>5.70</td>
|
|
<td>12.45</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 65536</td>
|
|
<td>Firefox 103</td>
|
|
<td>DevTools closed</td>
|
|
<td>5.16</td>
|
|
<td>5.72</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 1048576</td>
|
|
<td>Chromium 104.0.5112.101</td>
|
|
<td>DevTools closed</td>
|
|
<td>95.65</td>
|
|
<td>203.60</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 1048576</td>
|
|
<td>Firefox 103</td>
|
|
<td>DevTools closed</td>
|
|
<td>83.34</td>
|
|
<td>92.38</td>
|
|
</tr>
|
|
</table>
|
|
<h4>Before optimizing fold over bytes by inlineing __subscript_bytes__</h4>
|
|
<table>
|
|
<tr>
|
|
<td>Test</td>
|
|
<td>Interpreter</td>
|
|
<td>Setup</td>
|
|
<td>WebAssembly</td>
|
|
<td>Javascript</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 65536</td>
|
|
<td>Chromium 104.0.5112.101</td>
|
|
<td>DevTools closed</td>
|
|
<td>9.35</td>
|
|
<td>12.56</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 65536</td>
|
|
<td>Chromium 104.0.5112.101</td>
|
|
<td>DevTools open</td>
|
|
<td>14.71</td>
|
|
<td>12.72</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 65536</td>
|
|
<td>Chromium 104.0.5112.101</td>
|
|
<td>Record page load</td>
|
|
<td>9.44</td>
|
|
<td>12.69</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 65536</td>
|
|
<td>Firefox 103</td>
|
|
<td>DevTools closed</td>
|
|
<td>9.02</td>
|
|
<td>5.86</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 65536</td>
|
|
<td>Firefox 103</td>
|
|
<td>DevTools open</td>
|
|
<td>9.01</td>
|
|
<td>5.83</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 65536</td>
|
|
<td>Firefox 103</td>
|
|
<td>Record page load</td>
|
|
<td>72.41</td>
|
|
<td>5.85</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td>Lynx * 1048576</td>
|
|
<td>Chromium 104.0.5112.101</td>
|
|
<td>DevTools closed</td>
|
|
<td>149.24</td>
|
|
<td>202.36</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Lynx * 1048576</td>
|
|
<td>Firefox 103</td>
|
|
<td>DevTools closed</td>
|
|
<td>145.01</td>
|
|
<td>91.44</td>
|
|
</tr>
|
|
</table>
|
|
|
|
<h4>Notes</h4>
|
|
- Firefox seems faster than Chromium in my setup for Javascript, WebAssembly seems about the same.<br />
|
|
- Having DevTools open in Chromium seems to slow down the WebAssembly by about 30%, but not when doing a recording of the page load.<br />
|
|
- WebAssembly in Firefox seems to slow down when doing a recording of the page load, which makes sense, but the Javascript does not.<br />
|
|
|
|
<script type="text/javascript" src="./include.js"></script>
|
|
<script type="text/javascript">
|
|
let importObject = {};
|
|
|
|
// Build up a JS version
|
|
var makeCRCTable = function(){
|
|
var c;
|
|
var crcTable = [];
|
|
for(var n =0; n < 256; n++){
|
|
c = n;
|
|
for(var k =0; k < 8; k++){
|
|
c = ((c&1) ? (0xEDB88320 ^ (c >>> 1)) : (c >>> 1));
|
|
}
|
|
crcTable[n] = c;
|
|
}
|
|
return crcTable;
|
|
}
|
|
|
|
window.crcTable = makeCRCTable();
|
|
|
|
var crc32_js = function(i8arr) {
|
|
// console.log('crc32_js', i8arr.length);
|
|
|
|
var crcTable = window.crcTable;
|
|
var crc = 0 ^ (-1);
|
|
|
|
for (var i = 0; i < i8arr.length; i++ ) {
|
|
crc = (crc >>> 8) ^ crcTable[(crc ^ i8arr[i]) & 0xFF];
|
|
}
|
|
|
|
return (crc ^ (-1)) >>> 0;
|
|
};
|
|
|
|
// Run a single test
|
|
function run_test(app, str, str_repeat)
|
|
{
|
|
// Cast to Uint32 in Javascript
|
|
let crc32_wasm = function(offset) {
|
|
// console.log('crc32_wasm', str.length);
|
|
return app.instance.exports.crc32(offset) >>> 0;
|
|
};
|
|
|
|
let orig_str = str;
|
|
if( str_repeat ) {
|
|
str = str.repeat(str_repeat);
|
|
} else {
|
|
str_repeat = 1;
|
|
}
|
|
|
|
let data = Uint8Array.from(str.split('').map(x => x.charCodeAt()));
|
|
|
|
offset = alloc_bytes(app, data);
|
|
|
|
let tweak = () => {
|
|
data[0] = data[0] + 1;
|
|
|
|
let i8arr = new Uint8Array(app.instance.exports.memory.buffer, offset + 4, data.length);
|
|
i8arr[0] = i8arr[0] + 1;
|
|
};
|
|
|
|
let tweak_reset = () => {
|
|
data[0] = 'T'.charCodeAt(0);
|
|
|
|
let i8arr = new Uint8Array(app.instance.exports.memory.buffer, offset + 4, data.length);
|
|
i8arr[0] = 'T'.charCodeAt(0);
|
|
};
|
|
|
|
// Run once to get the result
|
|
// For some reason, the JS version takes 2ms on the first run
|
|
// let wasm_result = crc32_wasm(offset);
|
|
// let js_result = crc32_js(data);
|
|
|
|
let wasm_timing = run_times(100, () => crc32_wasm(offset));
|
|
let js_timing = run_times(100, () => crc32_js(data));
|
|
|
|
let wasm_time = wasm_timing.avg;
|
|
let js_time = js_timing.avg;
|
|
|
|
let check = wasm_timing.values.every(function(value, index) {
|
|
return value.result === js_timing.values[index].result;
|
|
});
|
|
|
|
// Don't test speedup for small strings, it varies a lot
|
|
let speedup = (wasm_timing.min == 0 || js_timing.min == 0)
|
|
? 1
|
|
: js_time / wasm_time;
|
|
|
|
test_result(check && 0.999 < speedup, { // At least as fast as Javascript
|
|
'summary': 'crc32(' + (str
|
|
? (str.length < 64 ? '"' + str + '"' : '"' + str.substring(0, 64) + '..." (' + str.length + ')')
|
|
: '""') + ')',
|
|
'attributes': {
|
|
'str': orig_str,
|
|
'str_repeat': str_repeat,
|
|
'wasm_timing': wasm_timing,
|
|
'js_timing': js_timing,
|
|
'check': check,
|
|
'speedup': speedup,
|
|
},
|
|
});
|
|
}
|
|
|
|
// Load WebAssembly, and run all tests
|
|
WebAssembly.instantiateStreaming(fetch('crc32.wasm'), importObject)
|
|
.then(app => {
|
|
app.instance.exports.memory.grow(640);
|
|
|
|
run_test(app, "");
|
|
run_test(app, "a");
|
|
run_test(app, "Z");
|
|
run_test(app, "ab");
|
|
run_test(app, "abcdefghijklmnopqrstuvwxyz");
|
|
run_test(app, "The quick brown fox jumps over the lazy dog");
|
|
run_test(app, "The quick brown fox jumps over the lazy dog", 1024);
|
|
run_test(app, "Lynx c.q. vos prikt bh: dag zwemjuf!", 65536);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html>
|