Web Worker: WASM-inferenssi erillisessä säikeessä, UI ei jäädy
- Poistettu kaikki web_sys::window() -kutsut Rust WASM:sta - Uudet Worker-yhteensopivat apufunktiot: perf_now(), worker_fetch(), sleep_ms() - worker.js lataa ja ajaa WASM-moduulin erillisessä säikeessä - ensureCoderNode käynnistää Workerin pääsäikeen sijaan - Selaimen UI pysyy responsiivisena inferenssin aikana Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,10 +28,7 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Res
|
||||
send_progress(ws, key, 0, 0, 0);
|
||||
|
||||
// Fetch API:lla saadaan Content-Length ja streaming-luku
|
||||
let window = web_sys::window().unwrap();
|
||||
let resp_val = wasm_bindgen_futures::JsFuture::from(window.fetch_with_str(url))
|
||||
.await.map_err(|e| format!("Fetch epäonnistui: {:?}", e))?;
|
||||
let resp: web_sys::Response = resp_val.dyn_into().map_err(|_| "Ei Response-objekti".to_string())?;
|
||||
let resp = crate::worker_fetch(url).await?;
|
||||
|
||||
if !resp.ok() {
|
||||
return Err(format!("HTTP {}", resp.status()));
|
||||
@@ -99,7 +96,7 @@ fn send_progress(ws: &Rc<RefCell<WebSocket>>, file: &str, pct: u32, loaded: usiz
|
||||
|
||||
/// Lataa malli ja tokenizer, suorita inferenssi ja streamaa tokenit hubille
|
||||
pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
let perf = web_sys::window().unwrap().performance().unwrap();
|
||||
// performance via crate::perf_now()
|
||||
|
||||
// 1. Lataa tokenizer
|
||||
let tok_bytes = match ensure_cached("smollm-tokenizer.json", TOKENIZER_URL, &ws).await {
|
||||
@@ -122,7 +119,7 @@ pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
// Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu)
|
||||
// → NdArray kunnes Burn 0.21 stable + Wasm-tuki
|
||||
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
|
||||
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws, perf.clone()).await;
|
||||
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws).await;
|
||||
}
|
||||
|
||||
async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
||||
@@ -130,9 +127,8 @@ async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
||||
model_bytes: Vec<u8>,
|
||||
tokenizer: tokenizers::Tokenizer,
|
||||
ws: Rc<RefCell<WebSocket>>,
|
||||
perf: web_sys::Performance, // Korjattu Wasm-performanssi välitettäväksi
|
||||
) {
|
||||
let start_load = perf.now();
|
||||
let start_load = crate::perf_now();
|
||||
|
||||
let device = Default::default();
|
||||
let config = crate::burn_smollm::config::SmolLMConfig::default();
|
||||
@@ -143,7 +139,7 @@ async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
||||
Err(e) => { console_log!("[SmolLM] Lataus epäonnistui: {}", e); return; }
|
||||
};
|
||||
|
||||
let load_time = perf.now() - start_load;
|
||||
let load_time = crate::perf_now() - start_load;
|
||||
console_log!("[SmolLM] Burn-malli ladattu ({:.0}ms). Generoidaan...", load_time);
|
||||
|
||||
let formatted_prompt = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
|
||||
@@ -156,7 +152,7 @@ async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
||||
let input_len = input_ids.len();
|
||||
console_log!("[SmolLM] Syöte: {} tokenia", input_len);
|
||||
|
||||
let start_gen = perf.now();
|
||||
let start_gen = crate::perf_now();
|
||||
let max_new_tokens = 32;
|
||||
let mut generated_text = String::new();
|
||||
let mut tokens_generated: usize = 0;
|
||||
@@ -219,7 +215,7 @@ async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
||||
tokens_generated += 1;
|
||||
}
|
||||
|
||||
let gen_time = perf.now() - start_gen;
|
||||
let gen_time = crate::perf_now() - start_gen;
|
||||
let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 };
|
||||
|
||||
let done = serde_json::json!({
|
||||
|
||||
Reference in New Issue
Block a user