Most examples I saw copy the string twice. First on the WASM side, into CString
or by shrinking the Vec
to its capacity, and then on the JS side while decoding the UTF-8.
Given that we often use WASM for the sake of the speed, I sought to implement a version that would reuse the Rust vector.
use std::collections::HashMap;
/// Byte vectors shared with JavaScript.
///
/// A map from payload's memory location to `Vec<u8>`.
///
/// In order to deallocate memory in Rust we need not just the memory location but also it's size.
/// In case of strings and vectors the freed size is capacity.
/// Keeping the vector around allows us not to change it's capacity.
///
/// Not thread-safe (assuming that we're running WASM from the single JavaScript thread).
static mut SHARED_VECS: Option<HashMap<u32, Vec<u8>>> = None;
extern "C" {
fn console_log(rs: *const u8);
fn console_log_8859_1(rs: *const u8);
}
#[no_mangle]
pub fn init() {
unsafe { SHARED_VECS = Some(HashMap::new()) }
}
#[no_mangle]
pub fn vec_len(payload: *const u8) -> u32 {
unsafe {
SHARED_VECS
.as_ref()
.unwrap()
.get(&(payload as u32))
.unwrap()
.len() as u32
}
}
pub fn vec2js<V: Into<Vec<u8>>>(v: V) -> *const u8 {
let v = v.into();
let payload = v.as_ptr();
unsafe {
SHARED_VECS.as_mut().unwrap().insert(payload as u32, v);
}
payload
}
#[no_mangle]
pub extern "C" fn free_vec(payload: *const u8) {
unsafe {
SHARED_VECS.as_mut().unwrap().remove(&(payload as u32));
}
}
#[no_mangle]
pub fn start() {
unsafe {
console_log(vec2js(format!("Hello again!")));
console_log_8859_1(vec2js(b"ASCII string." as &[u8]));
}
}
And the JavaScript part:
(function (iif) {
function rs2js (mod, rs, utfLabel = 'utf-8') {
const view = new Uint8Array (mod.memory.buffer, rs, mod.vec_len (rs))
const utf8dec = new TextDecoder (utfLabel)
const utf8 = utf8dec.decode (view)
mod.free_vec (rs)
return utf8}
function loadWasm (cache) {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/WebAssembly/instantiateStreaming
WebAssembly.instantiateStreaming (fetch ('main.wasm', {cache: cache ? "default" : "no-cache"}), {env: {
console_log: function (rs) {if (window.console) console.log ('main]', rs2js (iif.main, rs))},
console_log_8859_1: function (rs) {if (window.console) console.log ('main]', rs2js (iif.main, rs, 'iso-8859-1'))}
}}) .then (results => {
const exports = results.instance.exports
exports.init()
iif.main = exports
iif.main.start()})}
// Hot code reloading.
if (window.location.hostname == '127.0.0.1' && window.location.port == '43080') {
window.setInterval (
function() {
// Check if the WASM was updated.
fetch ('main.wasm.lm', {cache: "no-cache"}) .then (r => r.text()) .then (lm => {
lm = lm.trim()
if (/^\d+$/.test (lm) && lm != iif.lm) {
iif.lm = lm
loadWasm (false)}})},
200)
} else loadWasm (true)
} (window.iif = window.iif || {}))
The trade-off here is that we're using HashMap
in the WASM which might increase the size unless HashMap
is already required.
An interesting alternative would be to use the tables to share the (payload, length, capacity) triplet with the JavaScript and get it back when it is time to free the string. But I don't know how to use the tables yet.
P.S. Sometimes we don't want to allocate the Vec
in the first place.
In this case we can move the memory tracking to JavaScript:
extern "C" {
fn new_js_string(utf8: *const u8, len: i32) -> i32;
fn console_log(js: i32);
}
fn rs2js(rs: &str) -> i32 {
assert!(rs.len() < i32::max_value() as usize);
unsafe { new_js_string(rs.as_ptr(), rs.len() as i32) }
}
#[no_mangle]
pub fn start() {
unsafe {
console_log(rs2js("Hello again!"));
}
}
(function (iif) {
function loadWasm (cache) {
WebAssembly.instantiateStreaming (fetch ('main.wasm', {cache: cache ? "default" : "no-cache"}), {env: {
new_js_string: function (utf8, len) {
const view = new Uint8Array (iif.main.memory.buffer, utf8, len)
const utf8dec = new TextDecoder ('utf-8')
const decoded = utf8dec.decode (view)
let stringId = iif.lastStringId
while (typeof iif.strings[stringId] !== 'undefined') stringId += 1
if (stringId > 2147483647) { // Can't easily pass more than that through WASM.
stringId = -2147483648
while (typeof iif.strings[stringId] !== 'undefined') stringId += 1
if (stringId > 2147483647) throw new Error ('Out of string IDs!')}
iif.strings[stringId] = decoded
return iif.lastStringId = stringId},
console_log: function (js) {
if (window.console) console.log ('main]', iif.strings[js])
delete iif.strings[js]}
}}) .then (results => {
iif.main = results.instance.exports
iif.main.start()})}
loadWasm (true)
} (window.iif = window.iif || {strings: {}, lastStringId: 1}))
pub fn hello() -> String { "hello from rust".to_string() }
returns the same "undefined" – Briarroot