From b3646ae5d3415678e69b9dd3dbfbace03e5b1d7b Mon Sep 17 00:00:00 2001 From: jaakko Date: Mon, 6 Apr 2026 19:59:09 +0300 Subject: [PATCH] =?UTF-8?q?Web=20Worker:=20WASM-inferenssi=20erillisess?= =?UTF-8?q?=C3=A4=20s=C3=A4ikeess=C3=A4,=20UI=20ei=20j=C3=A4=C3=A4dy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Poistettu kaikki web_sys::window() -kutsut Rust WASM:sta - Uudet Worker-yhteensopivat apufunktiot: perf_now(), worker_fetch(), sleep_ms() - worker.js lataa ja ajaa WASM-moduulin erillisessä säikeessä - ensureCoderNode käynnistää Workerin pääsäikeen sijaan - Selaimen UI pysyy responsiivisena inferenssin aikana Co-Authored-By: Claude Opus 4.6 (1M context) --- network-poc/node/src/lib.rs | 55 +++++++++++++++++++------ network-poc/node/src/qwen.rs | 15 +++---- network-poc/node/src/qwen_coder.rs | 14 +++---- network-poc/node/src/smollm.rs | 18 ++++----- network-poc/static/index.html | 64 ++++++++++++++++-------------- network-poc/static/worker.js | 33 +++++++++++++++ 6 files changed, 129 insertions(+), 70 deletions(-) create mode 100644 network-poc/static/worker.js diff --git a/network-poc/node/src/lib.rs b/network-poc/node/src/lib.rs index 6700b27..52505ad 100644 --- a/network-poc/node/src/lib.rs +++ b/network-poc/node/src/lib.rs @@ -38,17 +38,50 @@ pub fn set_gpu_load(load: u32) { console_log!("[Wasm] GPU Kuormitusraja vaihdettu -> {}%", load); } -// Asynkroninen odotus WebAssemblylle -async fn sleep_ms(ms: i32) { +// Worker-yhteensopiva setTimeout — toimii sekä Window- että Worker-kontekstissa +#[wasm_bindgen] +extern "C" { + #[wasm_bindgen(js_name = setTimeout)] + fn set_timeout(closure: &js_sys::Function, ms: i32); +} + +// Asynkroninen odotus WebAssemblylle (Window + Worker) +pub async fn sleep_ms(ms: i32) { let promise = js_sys::Promise::new(&mut |resolve, _| { - web_sys::window() - .unwrap() - .set_timeout_with_callback_and_timeout_and_arguments_0(&resolve, ms) - .unwrap(); + set_timeout(&resolve, ms); }); let _ = wasm_bindgen_futures::JsFuture::from(promise).await; } +// Worker-yhteensopiva Performance — käyttää globalThis.performance +pub fn perf_now() -> f64 { + js_sys::Reflect::get(&js_sys::global(), &"performance".into()) + .ok() + .and_then(|p| js_sys::Reflect::get(&p, &"now".into()).ok()) + .and_then(|f| f.dyn_into::().ok()) + .and_then(|f| { + let perf = js_sys::Reflect::get(&js_sys::global(), &"performance".into()).unwrap(); + f.call0(&perf).ok() + }) + .and_then(|v| v.as_f64()) + .unwrap_or(0.0) +} + +// Worker-yhteensopiva fetch — käyttää globalThis.fetch +pub async fn worker_fetch(url: &str) -> Result { + let promise = js_sys::Reflect::get(&js_sys::global(), &"fetch".into()) + .map_err(|_| "fetch ei saatavilla".to_string())? + .dyn_into::() + .map_err(|_| "fetch ei funktio".to_string())? + .call1(&JsValue::NULL, &url.into()) + .map_err(|e| format!("fetch: {:?}", e))?; + let resp = wasm_bindgen_futures::JsFuture::from(js_sys::Promise::from(promise)) + .await + .map_err(|e| format!("fetch await: {:?}", e))?; + resp.dyn_into::() + .map_err(|_| "ei Response".to_string()) +} + // Geneerinen tensorilaskenta — toimii millä tahansa Burn-backendillä fn run_matmul(size: usize) -> String { let device = Default::default(); @@ -123,10 +156,9 @@ async fn run_single_tokenize(text: String, ws: Rc>) { let Some(bytes) = cached_tok else { return; }; let Ok(tokenizer) = tokenizers::Tokenizer::from_bytes(&bytes) else { return; }; - let perf = web_sys::window().unwrap().performance().unwrap(); - let start = perf.now(); + let start = perf_now(); let result = tokenize_text(&tokenizer, &text); - let duration_ms = perf.now() - start; + let duration_ms = perf_now() - start; let token_count = result["token_count"].as_u64().unwrap_or(0); let cpt = result["chars_per_token"].as_f64().unwrap_or(0.0); @@ -157,11 +189,10 @@ async fn run_pair_comparison(en_text: String, fi_text: String, ws: Rc>) -> Res console_log!("[Qwen] Ladataan {}...", key); - let window = web_sys::window().unwrap(); - let resp_val = wasm_bindgen_futures::JsFuture::from(window.fetch_with_str(url)) - .await.map_err(|e| format!("Fetch epäonnistui: {:?}", e))?; - let resp: web_sys::Response = resp_val.dyn_into().map_err(|_| "Ei Response".to_string())?; + let resp = crate::worker_fetch(url).await?; if !resp.ok() { return Err(format!("HTTP {}", resp.status())); } let total_size: usize = resp.headers() @@ -71,7 +68,7 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc>) -> Res } pub async fn run_qwen_inference(prompt: String, ws: Rc>) { - let perf = web_sys::window().unwrap().performance().unwrap(); + // performance via crate::perf_now() let tok_bytes = match ensure_cached("qwen05b-tokenizer.json", TOKENIZER_URL, &ws).await { Ok(b) => b, @@ -88,7 +85,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc>) { }; console_log!("[Qwen] Rakennetaan mallia..."); - let start_load = perf.now(); + let start_load = crate::perf_now(); let device = Device::Cpu; let dtype = DType::F32; @@ -120,7 +117,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc>) { Err(e) => { console_log!("[Qwen] Mallin lataus: {}", e); return; } }; - let load_time = perf.now() - start_load; + let load_time = crate::perf_now() - start_load; console_log!("[Qwen] Malli ladattu ({:.0}ms). Generoidaan...", load_time); let encoding = match tokenizer.encode(prompt.as_str(), true) { @@ -131,7 +128,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc>) { let input_len = input_ids.len(); console_log!("[Qwen] Syöte: {} tokenia", input_len); - let start_gen = perf.now(); + let start_gen = crate::perf_now(); let max_new_tokens = 32; let mut generated_text = String::new(); let mut tokens_generated: usize = 0; @@ -202,7 +199,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc>) { crate::sleep_ms(0).await; } - let gen_time = perf.now() - start_gen; + let gen_time = crate::perf_now() - start_gen; let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 }; console_log!("[Qwen] {} tokenia | {:.0}ms | {:.1} tok/s", tokens_generated, gen_time, tokens_per_sec); diff --git a/network-poc/node/src/qwen_coder.rs b/network-poc/node/src/qwen_coder.rs index a83854a..c4a85b7 100644 --- a/network-poc/node/src/qwen_coder.rs +++ b/network-poc/node/src/qwen_coder.rs @@ -140,10 +140,7 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc>) -> Res console_log!("[Coder] Ladataan {}...", key); - let window = web_sys::window().unwrap(); - let resp_val = wasm_bindgen_futures::JsFuture::from(window.fetch_with_str(url)) - .await.map_err(|e| format!("Fetch: {:?}", e))?; - let resp: web_sys::Response = resp_val.dyn_into().map_err(|_| "Ei Response".to_string())?; + let resp = crate::worker_fetch(url).await?; if !resp.ok() { return Err(format!("HTTP {}", resp.status())); } let total_size: usize = resp.headers() @@ -251,17 +248,16 @@ async fn get_or_build_model(use_3b: bool, ws: &Rc>) -> Result /// use_3b: false = 0.5B (nopea), true = 3B (laadukas) pub async fn run_coder_inference(prompt: String, ws: Rc>, use_3b: bool, task_id: Option) { - let perf = web_sys::window().unwrap().performance().unwrap(); let size_label = if use_3b { "3B" } else { "0.5B" }; - let start_load = perf.now(); + let start_load = crate::perf_now(); if let Err(e) = get_or_build_model(use_3b, &ws).await { console_log!("[Coder] Mallin lataus: {}", e); return; } - let load_time = perf.now() - start_load; + let load_time = crate::perf_now() - start_load; if load_time > 100.0 { console_log!("[Coder] Malli ladattu ({:.0}ms). Generoidaan...", load_time); } @@ -297,7 +293,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc>, use console_log!("[Coder] Syöte: {} tokenia", input_len); let device = Device::Cpu; - let start_gen = perf.now(); + let start_gen = crate::perf_now(); let eos_token = 151645u32; let temperature: f32 = 0.7; let top_k: usize = 40; @@ -373,7 +369,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc>, use tokens_generated += 1; } - let gen_time = perf.now() - start_gen; + let gen_time = crate::perf_now() - start_gen; // Siivotaan vastaus: poista markdown-koodiblokit ja johdantotekstit let cleaned = strip_markdown_wrapper(&generated_text); diff --git a/network-poc/node/src/smollm.rs b/network-poc/node/src/smollm.rs index ada64a2..6417cb2 100644 --- a/network-poc/node/src/smollm.rs +++ b/network-poc/node/src/smollm.rs @@ -28,10 +28,7 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc>) -> Res send_progress(ws, key, 0, 0, 0); // Fetch API:lla saadaan Content-Length ja streaming-luku - let window = web_sys::window().unwrap(); - let resp_val = wasm_bindgen_futures::JsFuture::from(window.fetch_with_str(url)) - .await.map_err(|e| format!("Fetch epäonnistui: {:?}", e))?; - let resp: web_sys::Response = resp_val.dyn_into().map_err(|_| "Ei Response-objekti".to_string())?; + let resp = crate::worker_fetch(url).await?; if !resp.ok() { return Err(format!("HTTP {}", resp.status())); @@ -99,7 +96,7 @@ fn send_progress(ws: &Rc>, file: &str, pct: u32, loaded: usiz /// Lataa malli ja tokenizer, suorita inferenssi ja streamaa tokenit hubille pub async fn run_smollm_inference(prompt: String, ws: Rc>) { - let perf = web_sys::window().unwrap().performance().unwrap(); + // performance via crate::perf_now() // 1. Lataa tokenizer let tok_bytes = match ensure_cached("smollm-tokenizer.json", TOKENIZER_URL, &ws).await { @@ -122,7 +119,7 @@ pub async fn run_smollm_inference(prompt: String, ws: Rc>) { // Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu) // → NdArray kunnes Burn 0.21 stable + Wasm-tuki console_log!("[SmolLM] Burn NdArray (CPU) inferenssi..."); - run_burn_inference::(prompt, model_bytes, tokenizer, ws, perf.clone()).await; + run_burn_inference::(prompt, model_bytes, tokenizer, ws).await; } async fn run_burn_inference( @@ -130,9 +127,8 @@ async fn run_burn_inference( model_bytes: Vec, tokenizer: tokenizers::Tokenizer, ws: Rc>, - perf: web_sys::Performance, // Korjattu Wasm-performanssi välitettäväksi ) { - let start_load = perf.now(); + let start_load = crate::perf_now(); let device = Default::default(); let config = crate::burn_smollm::config::SmolLMConfig::default(); @@ -143,7 +139,7 @@ async fn run_burn_inference( Err(e) => { console_log!("[SmolLM] Lataus epäonnistui: {}", e); return; } }; - let load_time = perf.now() - start_load; + let load_time = crate::perf_now() - start_load; console_log!("[SmolLM] Burn-malli ladattu ({:.0}ms). Generoidaan...", load_time); let formatted_prompt = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt); @@ -156,7 +152,7 @@ async fn run_burn_inference( let input_len = input_ids.len(); console_log!("[SmolLM] Syöte: {} tokenia", input_len); - let start_gen = perf.now(); + let start_gen = crate::perf_now(); let max_new_tokens = 32; let mut generated_text = String::new(); let mut tokens_generated: usize = 0; @@ -219,7 +215,7 @@ async fn run_burn_inference( tokens_generated += 1; } - let gen_time = perf.now() - start_gen; + let gen_time = crate::perf_now() - start_gen; let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 }; let done = serde_json::json!({ diff --git a/network-poc/static/index.html b/network-poc/static/index.html index 40bef5c..1e2c6cf 100644 --- a/network-poc/static/index.html +++ b/network-poc/static/index.html @@ -3233,7 +3233,9 @@ Write the corrected code.`; const _prevConsoleLog = console.log; console.log = function(...args) { _prevConsoleLog.apply(console, args); codeLogListener(...args); }; - // Käynnistä Coder-node automaattisesti ensimmäisellä kerralla + // Web Worker -pohjainen laskentasolmu — UI ei jäädy inferenssin aikana + let coderWorker = null; + async function ensureCoderNode() { if (coderJoined) return; coderJoined = true; @@ -3243,10 +3245,21 @@ Write the corrected code.`; setStep('step-wasm', 'active'); try { - if (!wasmInitialized) { - await init(); - wasmInitialized = true; - } + // Käynnistetään WASM Web Workerissa + coderWorker = new Worker('./worker.js'); + + // Workerin console.log-viestit → pääsäikeen kuuntelija + // Worker ei voi kutsua console.log näkyvästi, joten WASM:n console_log + // ei näy automaattisesti. Workerissa console.log menee Workerin konsoliin. + + await new Promise((resolve, reject) => { + coderWorker.onmessage = (e) => { + if (e.data.type === 'ready') resolve(); + else if (e.data.type === 'error') reject(new Error(e.data.message)); + }; + coderWorker.postMessage({ type: 'init' }); + }); + setStep('step-wasm', 'done'); setStep('step-tokenizer', 'active'); @@ -3260,30 +3273,23 @@ Write the corrected code.`; selected_task: coderSize === '3b' ? 'qwen-coder-3b' : 'qwen-coder-05b' }; const taskId = coderSize === '3b' ? 5 : 4; - // Tunnistetaan WebGPU myös koodilaboratorion puolella - let coderHasWebGPU = false; - if (navigator.gpu) { - try { - const adapter = await navigator.gpu.requestAdapter(); - if (adapter) { - try { - const testDevice = await adapter.requestDevice({ requiredLimits: { maxInterStageShaderComponents: 60 } }); - coderHasWebGPU = true; - testDevice.destroy(); - } catch(e) { - coderHasWebGPU = false; - } - } - } catch(e) {} - } - await start_agent_node(wsUrl, coderHasWebGPU, JSON.stringify(deviceInfo), taskId); - document.getElementById('coder-status').textContent = 'Connected'; - document.getElementById('coder-status').style.color = '#d29922'; - coderWsReady = true; - // Proaktiivinen mallin esilataus: lähetetään tyhjä warmup-prompt - // joka triggeröi get_or_build_model:n ilman varsinaista generointia. - // Pipeline-tilakone seuraa logeja ja merkkaa vaiheet valmiiksi. + // Käynnistetään node Workerissa + coderWorker.onmessage = (e) => { + if (e.data.type === 'started') { + document.getElementById('coder-status').textContent = 'Connected'; + document.getElementById('coder-status').style.color = '#d29922'; + coderWsReady = true; + } else if (e.data.type === 'error') { + console.log('[Worker] Virhe: ' + e.data.message); + } + }; + coderWorker.postMessage({ + type: 'start', + data: { hubUrl: wsUrl, hasWebGPU: false, deviceInfo: JSON.stringify(deviceInfo), taskId } + }); + + // Warmup setTimeout(() => { if (uiSocket && uiSocket.readyState === 1) { uiSocket.send(JSON.stringify({ @@ -3297,7 +3303,7 @@ Write the corrected code.`; if (pendingCodePrompt) { setTimeout(() => { sendCodeToHub(pendingCodePrompt); - }, 2000); // Hieman pidempi odotus jotta warmup ehtii ensin + }, 2000); pendingCodePrompt = null; } } catch(e) { diff --git a/network-poc/static/worker.js b/network-poc/static/worker.js new file mode 100644 index 0000000..1f33288 --- /dev/null +++ b/network-poc/static/worker.js @@ -0,0 +1,33 @@ +// Kipinä WASM Worker — ajaa kielimallin inferenssin erillisessä säikeessä +// Pääsäie (UI) ei jäädy pitkien laskutoimituksien aikana. + +let wasm = null; + +self.onmessage = async (e) => { + const { type, data } = e.data; + + if (type === 'init') { + try { + // Ladataan WASM-moduuli Workerissa + importScripts('./pkg/node.js'); + wasm = wasm_bindgen; + await wasm.default(); + self.postMessage({ type: 'ready' }); + } catch (err) { + self.postMessage({ type: 'error', message: 'WASM init: ' + err.message }); + } + } else if (type === 'start') { + if (!wasm) return; + const { hubUrl, hasWebGPU, deviceInfo, taskId } = data; + try { + await wasm.start_agent_node(hubUrl, hasWebGPU, deviceInfo, taskId); + self.postMessage({ type: 'started' }); + } catch (err) { + self.postMessage({ type: 'error', message: 'Node: ' + err.message }); + } + } else if (type === 'set_gpu_load') { + if (wasm) wasm.set_gpu_load(data.load); + } else if (type === 'set_auto_tasks') { + if (wasm) wasm.set_auto_tasks(data.enabled); + } +};