Web Worker: WASM-inferenssi erillisessä säikeessä, UI ei jäädy
- Poistettu kaikki web_sys::window() -kutsut Rust WASM:sta - Uudet Worker-yhteensopivat apufunktiot: perf_now(), worker_fetch(), sleep_ms() - worker.js lataa ja ajaa WASM-moduulin erillisessä säikeessä - ensureCoderNode käynnistää Workerin pääsäikeen sijaan - Selaimen UI pysyy responsiivisena inferenssin aikana Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -38,17 +38,50 @@ pub fn set_gpu_load(load: u32) {
|
|||||||
console_log!("[Wasm] GPU Kuormitusraja vaihdettu -> {}%", load);
|
console_log!("[Wasm] GPU Kuormitusraja vaihdettu -> {}%", load);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Asynkroninen odotus WebAssemblylle
|
// Worker-yhteensopiva setTimeout — toimii sekä Window- että Worker-kontekstissa
|
||||||
async fn sleep_ms(ms: i32) {
|
#[wasm_bindgen]
|
||||||
|
extern "C" {
|
||||||
|
#[wasm_bindgen(js_name = setTimeout)]
|
||||||
|
fn set_timeout(closure: &js_sys::Function, ms: i32);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Asynkroninen odotus WebAssemblylle (Window + Worker)
|
||||||
|
pub async fn sleep_ms(ms: i32) {
|
||||||
let promise = js_sys::Promise::new(&mut |resolve, _| {
|
let promise = js_sys::Promise::new(&mut |resolve, _| {
|
||||||
web_sys::window()
|
set_timeout(&resolve, ms);
|
||||||
.unwrap()
|
|
||||||
.set_timeout_with_callback_and_timeout_and_arguments_0(&resolve, ms)
|
|
||||||
.unwrap();
|
|
||||||
});
|
});
|
||||||
let _ = wasm_bindgen_futures::JsFuture::from(promise).await;
|
let _ = wasm_bindgen_futures::JsFuture::from(promise).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Worker-yhteensopiva Performance — käyttää globalThis.performance
|
||||||
|
pub fn perf_now() -> f64 {
|
||||||
|
js_sys::Reflect::get(&js_sys::global(), &"performance".into())
|
||||||
|
.ok()
|
||||||
|
.and_then(|p| js_sys::Reflect::get(&p, &"now".into()).ok())
|
||||||
|
.and_then(|f| f.dyn_into::<js_sys::Function>().ok())
|
||||||
|
.and_then(|f| {
|
||||||
|
let perf = js_sys::Reflect::get(&js_sys::global(), &"performance".into()).unwrap();
|
||||||
|
f.call0(&perf).ok()
|
||||||
|
})
|
||||||
|
.and_then(|v| v.as_f64())
|
||||||
|
.unwrap_or(0.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Worker-yhteensopiva fetch — käyttää globalThis.fetch
|
||||||
|
pub async fn worker_fetch(url: &str) -> Result<web_sys::Response, String> {
|
||||||
|
let promise = js_sys::Reflect::get(&js_sys::global(), &"fetch".into())
|
||||||
|
.map_err(|_| "fetch ei saatavilla".to_string())?
|
||||||
|
.dyn_into::<js_sys::Function>()
|
||||||
|
.map_err(|_| "fetch ei funktio".to_string())?
|
||||||
|
.call1(&JsValue::NULL, &url.into())
|
||||||
|
.map_err(|e| format!("fetch: {:?}", e))?;
|
||||||
|
let resp = wasm_bindgen_futures::JsFuture::from(js_sys::Promise::from(promise))
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("fetch await: {:?}", e))?;
|
||||||
|
resp.dyn_into::<web_sys::Response>()
|
||||||
|
.map_err(|_| "ei Response".to_string())
|
||||||
|
}
|
||||||
|
|
||||||
// Geneerinen tensorilaskenta — toimii millä tahansa Burn-backendillä
|
// Geneerinen tensorilaskenta — toimii millä tahansa Burn-backendillä
|
||||||
fn run_matmul<B: burn::tensor::backend::Backend>(size: usize) -> String {
|
fn run_matmul<B: burn::tensor::backend::Backend>(size: usize) -> String {
|
||||||
let device = Default::default();
|
let device = Default::default();
|
||||||
@@ -123,10 +156,9 @@ async fn run_single_tokenize(text: String, ws: Rc<RefCell<WebSocket>>) {
|
|||||||
let Some(bytes) = cached_tok else { return; };
|
let Some(bytes) = cached_tok else { return; };
|
||||||
let Ok(tokenizer) = tokenizers::Tokenizer::from_bytes(&bytes) else { return; };
|
let Ok(tokenizer) = tokenizers::Tokenizer::from_bytes(&bytes) else { return; };
|
||||||
|
|
||||||
let perf = web_sys::window().unwrap().performance().unwrap();
|
let start = perf_now();
|
||||||
let start = perf.now();
|
|
||||||
let result = tokenize_text(&tokenizer, &text);
|
let result = tokenize_text(&tokenizer, &text);
|
||||||
let duration_ms = perf.now() - start;
|
let duration_ms = perf_now() - start;
|
||||||
|
|
||||||
let token_count = result["token_count"].as_u64().unwrap_or(0);
|
let token_count = result["token_count"].as_u64().unwrap_or(0);
|
||||||
let cpt = result["chars_per_token"].as_f64().unwrap_or(0.0);
|
let cpt = result["chars_per_token"].as_f64().unwrap_or(0.0);
|
||||||
@@ -157,11 +189,10 @@ async fn run_pair_comparison(en_text: String, fi_text: String, ws: Rc<RefCell<We
|
|||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
let perf = web_sys::window().unwrap().performance().unwrap();
|
let start_time = perf_now();
|
||||||
let start_time = perf.now();
|
|
||||||
let en_result = tokenize_text(&tokenizer, &en_text);
|
let en_result = tokenize_text(&tokenizer, &en_text);
|
||||||
let fi_result = tokenize_text(&tokenizer, &fi_text);
|
let fi_result = tokenize_text(&tokenizer, &fi_text);
|
||||||
let duration_ms = perf.now() - start_time; // millisekunteja desimaalitarkkuudella
|
let duration_ms = perf_now() - start_time;
|
||||||
|
|
||||||
let en_cpt = en_result["chars_per_token"].as_f64().unwrap_or(0.0);
|
let en_cpt = en_result["chars_per_token"].as_f64().unwrap_or(0.0);
|
||||||
let fi_cpt = fi_result["chars_per_token"].as_f64().unwrap_or(0.0);
|
let fi_cpt = fi_result["chars_per_token"].as_f64().unwrap_or(0.0);
|
||||||
|
|||||||
@@ -24,10 +24,7 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Res
|
|||||||
|
|
||||||
console_log!("[Qwen] Ladataan {}...", key);
|
console_log!("[Qwen] Ladataan {}...", key);
|
||||||
|
|
||||||
let window = web_sys::window().unwrap();
|
let resp = crate::worker_fetch(url).await?;
|
||||||
let resp_val = wasm_bindgen_futures::JsFuture::from(window.fetch_with_str(url))
|
|
||||||
.await.map_err(|e| format!("Fetch epäonnistui: {:?}", e))?;
|
|
||||||
let resp: web_sys::Response = resp_val.dyn_into().map_err(|_| "Ei Response".to_string())?;
|
|
||||||
if !resp.ok() { return Err(format!("HTTP {}", resp.status())); }
|
if !resp.ok() { return Err(format!("HTTP {}", resp.status())); }
|
||||||
|
|
||||||
let total_size: usize = resp.headers()
|
let total_size: usize = resp.headers()
|
||||||
@@ -71,7 +68,7 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Res
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||||
let perf = web_sys::window().unwrap().performance().unwrap();
|
// performance via crate::perf_now()
|
||||||
|
|
||||||
let tok_bytes = match ensure_cached("qwen05b-tokenizer.json", TOKENIZER_URL, &ws).await {
|
let tok_bytes = match ensure_cached("qwen05b-tokenizer.json", TOKENIZER_URL, &ws).await {
|
||||||
Ok(b) => b,
|
Ok(b) => b,
|
||||||
@@ -88,7 +85,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
console_log!("[Qwen] Rakennetaan mallia...");
|
console_log!("[Qwen] Rakennetaan mallia...");
|
||||||
let start_load = perf.now();
|
let start_load = crate::perf_now();
|
||||||
let device = Device::Cpu;
|
let device = Device::Cpu;
|
||||||
let dtype = DType::F32;
|
let dtype = DType::F32;
|
||||||
|
|
||||||
@@ -120,7 +117,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
|||||||
Err(e) => { console_log!("[Qwen] Mallin lataus: {}", e); return; }
|
Err(e) => { console_log!("[Qwen] Mallin lataus: {}", e); return; }
|
||||||
};
|
};
|
||||||
|
|
||||||
let load_time = perf.now() - start_load;
|
let load_time = crate::perf_now() - start_load;
|
||||||
console_log!("[Qwen] Malli ladattu ({:.0}ms). Generoidaan...", load_time);
|
console_log!("[Qwen] Malli ladattu ({:.0}ms). Generoidaan...", load_time);
|
||||||
|
|
||||||
let encoding = match tokenizer.encode(prompt.as_str(), true) {
|
let encoding = match tokenizer.encode(prompt.as_str(), true) {
|
||||||
@@ -131,7 +128,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
|||||||
let input_len = input_ids.len();
|
let input_len = input_ids.len();
|
||||||
console_log!("[Qwen] Syöte: {} tokenia", input_len);
|
console_log!("[Qwen] Syöte: {} tokenia", input_len);
|
||||||
|
|
||||||
let start_gen = perf.now();
|
let start_gen = crate::perf_now();
|
||||||
let max_new_tokens = 32;
|
let max_new_tokens = 32;
|
||||||
let mut generated_text = String::new();
|
let mut generated_text = String::new();
|
||||||
let mut tokens_generated: usize = 0;
|
let mut tokens_generated: usize = 0;
|
||||||
@@ -202,7 +199,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
|||||||
crate::sleep_ms(0).await;
|
crate::sleep_ms(0).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let gen_time = perf.now() - start_gen;
|
let gen_time = crate::perf_now() - start_gen;
|
||||||
let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 };
|
let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 };
|
||||||
console_log!("[Qwen] {} tokenia | {:.0}ms | {:.1} tok/s", tokens_generated, gen_time, tokens_per_sec);
|
console_log!("[Qwen] {} tokenia | {:.0}ms | {:.1} tok/s", tokens_generated, gen_time, tokens_per_sec);
|
||||||
|
|
||||||
|
|||||||
@@ -140,10 +140,7 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Res
|
|||||||
|
|
||||||
console_log!("[Coder] Ladataan {}...", key);
|
console_log!("[Coder] Ladataan {}...", key);
|
||||||
|
|
||||||
let window = web_sys::window().unwrap();
|
let resp = crate::worker_fetch(url).await?;
|
||||||
let resp_val = wasm_bindgen_futures::JsFuture::from(window.fetch_with_str(url))
|
|
||||||
.await.map_err(|e| format!("Fetch: {:?}", e))?;
|
|
||||||
let resp: web_sys::Response = resp_val.dyn_into().map_err(|_| "Ei Response".to_string())?;
|
|
||||||
if !resp.ok() { return Err(format!("HTTP {}", resp.status())); }
|
if !resp.ok() { return Err(format!("HTTP {}", resp.status())); }
|
||||||
|
|
||||||
let total_size: usize = resp.headers()
|
let total_size: usize = resp.headers()
|
||||||
@@ -251,17 +248,16 @@ async fn get_or_build_model(use_3b: bool, ws: &Rc<RefCell<WebSocket>>) -> Result
|
|||||||
|
|
||||||
/// use_3b: false = 0.5B (nopea), true = 3B (laadukas)
|
/// use_3b: false = 0.5B (nopea), true = 3B (laadukas)
|
||||||
pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use_3b: bool, task_id: Option<String>) {
|
pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use_3b: bool, task_id: Option<String>) {
|
||||||
let perf = web_sys::window().unwrap().performance().unwrap();
|
|
||||||
let size_label = if use_3b { "3B" } else { "0.5B" };
|
let size_label = if use_3b { "3B" } else { "0.5B" };
|
||||||
|
|
||||||
let start_load = perf.now();
|
let start_load = crate::perf_now();
|
||||||
|
|
||||||
if let Err(e) = get_or_build_model(use_3b, &ws).await {
|
if let Err(e) = get_or_build_model(use_3b, &ws).await {
|
||||||
console_log!("[Coder] Mallin lataus: {}", e);
|
console_log!("[Coder] Mallin lataus: {}", e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let load_time = perf.now() - start_load;
|
let load_time = crate::perf_now() - start_load;
|
||||||
if load_time > 100.0 {
|
if load_time > 100.0 {
|
||||||
console_log!("[Coder] Malli ladattu ({:.0}ms). Generoidaan...", load_time);
|
console_log!("[Coder] Malli ladattu ({:.0}ms). Generoidaan...", load_time);
|
||||||
}
|
}
|
||||||
@@ -297,7 +293,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
|
|||||||
console_log!("[Coder] Syöte: {} tokenia", input_len);
|
console_log!("[Coder] Syöte: {} tokenia", input_len);
|
||||||
|
|
||||||
let device = Device::Cpu;
|
let device = Device::Cpu;
|
||||||
let start_gen = perf.now();
|
let start_gen = crate::perf_now();
|
||||||
let eos_token = 151645u32;
|
let eos_token = 151645u32;
|
||||||
let temperature: f32 = 0.7;
|
let temperature: f32 = 0.7;
|
||||||
let top_k: usize = 40;
|
let top_k: usize = 40;
|
||||||
@@ -373,7 +369,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
|
|||||||
tokens_generated += 1;
|
tokens_generated += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let gen_time = perf.now() - start_gen;
|
let gen_time = crate::perf_now() - start_gen;
|
||||||
|
|
||||||
// Siivotaan vastaus: poista markdown-koodiblokit ja johdantotekstit
|
// Siivotaan vastaus: poista markdown-koodiblokit ja johdantotekstit
|
||||||
let cleaned = strip_markdown_wrapper(&generated_text);
|
let cleaned = strip_markdown_wrapper(&generated_text);
|
||||||
|
|||||||
@@ -28,10 +28,7 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Res
|
|||||||
send_progress(ws, key, 0, 0, 0);
|
send_progress(ws, key, 0, 0, 0);
|
||||||
|
|
||||||
// Fetch API:lla saadaan Content-Length ja streaming-luku
|
// Fetch API:lla saadaan Content-Length ja streaming-luku
|
||||||
let window = web_sys::window().unwrap();
|
let resp = crate::worker_fetch(url).await?;
|
||||||
let resp_val = wasm_bindgen_futures::JsFuture::from(window.fetch_with_str(url))
|
|
||||||
.await.map_err(|e| format!("Fetch epäonnistui: {:?}", e))?;
|
|
||||||
let resp: web_sys::Response = resp_val.dyn_into().map_err(|_| "Ei Response-objekti".to_string())?;
|
|
||||||
|
|
||||||
if !resp.ok() {
|
if !resp.ok() {
|
||||||
return Err(format!("HTTP {}", resp.status()));
|
return Err(format!("HTTP {}", resp.status()));
|
||||||
@@ -99,7 +96,7 @@ fn send_progress(ws: &Rc<RefCell<WebSocket>>, file: &str, pct: u32, loaded: usiz
|
|||||||
|
|
||||||
/// Lataa malli ja tokenizer, suorita inferenssi ja streamaa tokenit hubille
|
/// Lataa malli ja tokenizer, suorita inferenssi ja streamaa tokenit hubille
|
||||||
pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||||
let perf = web_sys::window().unwrap().performance().unwrap();
|
// performance via crate::perf_now()
|
||||||
|
|
||||||
// 1. Lataa tokenizer
|
// 1. Lataa tokenizer
|
||||||
let tok_bytes = match ensure_cached("smollm-tokenizer.json", TOKENIZER_URL, &ws).await {
|
let tok_bytes = match ensure_cached("smollm-tokenizer.json", TOKENIZER_URL, &ws).await {
|
||||||
@@ -122,7 +119,7 @@ pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
|||||||
// Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu)
|
// Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu)
|
||||||
// → NdArray kunnes Burn 0.21 stable + Wasm-tuki
|
// → NdArray kunnes Burn 0.21 stable + Wasm-tuki
|
||||||
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
|
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
|
||||||
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws, perf.clone()).await;
|
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
||||||
@@ -130,9 +127,8 @@ async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
|||||||
model_bytes: Vec<u8>,
|
model_bytes: Vec<u8>,
|
||||||
tokenizer: tokenizers::Tokenizer,
|
tokenizer: tokenizers::Tokenizer,
|
||||||
ws: Rc<RefCell<WebSocket>>,
|
ws: Rc<RefCell<WebSocket>>,
|
||||||
perf: web_sys::Performance, // Korjattu Wasm-performanssi välitettäväksi
|
|
||||||
) {
|
) {
|
||||||
let start_load = perf.now();
|
let start_load = crate::perf_now();
|
||||||
|
|
||||||
let device = Default::default();
|
let device = Default::default();
|
||||||
let config = crate::burn_smollm::config::SmolLMConfig::default();
|
let config = crate::burn_smollm::config::SmolLMConfig::default();
|
||||||
@@ -143,7 +139,7 @@ async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
|||||||
Err(e) => { console_log!("[SmolLM] Lataus epäonnistui: {}", e); return; }
|
Err(e) => { console_log!("[SmolLM] Lataus epäonnistui: {}", e); return; }
|
||||||
};
|
};
|
||||||
|
|
||||||
let load_time = perf.now() - start_load;
|
let load_time = crate::perf_now() - start_load;
|
||||||
console_log!("[SmolLM] Burn-malli ladattu ({:.0}ms). Generoidaan...", load_time);
|
console_log!("[SmolLM] Burn-malli ladattu ({:.0}ms). Generoidaan...", load_time);
|
||||||
|
|
||||||
let formatted_prompt = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
|
let formatted_prompt = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
|
||||||
@@ -156,7 +152,7 @@ async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
|||||||
let input_len = input_ids.len();
|
let input_len = input_ids.len();
|
||||||
console_log!("[SmolLM] Syöte: {} tokenia", input_len);
|
console_log!("[SmolLM] Syöte: {} tokenia", input_len);
|
||||||
|
|
||||||
let start_gen = perf.now();
|
let start_gen = crate::perf_now();
|
||||||
let max_new_tokens = 32;
|
let max_new_tokens = 32;
|
||||||
let mut generated_text = String::new();
|
let mut generated_text = String::new();
|
||||||
let mut tokens_generated: usize = 0;
|
let mut tokens_generated: usize = 0;
|
||||||
@@ -219,7 +215,7 @@ async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
|||||||
tokens_generated += 1;
|
tokens_generated += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let gen_time = perf.now() - start_gen;
|
let gen_time = crate::perf_now() - start_gen;
|
||||||
let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 };
|
let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 };
|
||||||
|
|
||||||
let done = serde_json::json!({
|
let done = serde_json::json!({
|
||||||
|
|||||||
@@ -3233,7 +3233,9 @@ Write the corrected code.`;
|
|||||||
const _prevConsoleLog = console.log;
|
const _prevConsoleLog = console.log;
|
||||||
console.log = function(...args) { _prevConsoleLog.apply(console, args); codeLogListener(...args); };
|
console.log = function(...args) { _prevConsoleLog.apply(console, args); codeLogListener(...args); };
|
||||||
|
|
||||||
// Käynnistä Coder-node automaattisesti ensimmäisellä kerralla
|
// Web Worker -pohjainen laskentasolmu — UI ei jäädy inferenssin aikana
|
||||||
|
let coderWorker = null;
|
||||||
|
|
||||||
async function ensureCoderNode() {
|
async function ensureCoderNode() {
|
||||||
if (coderJoined) return;
|
if (coderJoined) return;
|
||||||
coderJoined = true;
|
coderJoined = true;
|
||||||
@@ -3243,10 +3245,21 @@ Write the corrected code.`;
|
|||||||
setStep('step-wasm', 'active');
|
setStep('step-wasm', 'active');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (!wasmInitialized) {
|
// Käynnistetään WASM Web Workerissa
|
||||||
await init();
|
coderWorker = new Worker('./worker.js');
|
||||||
wasmInitialized = true;
|
|
||||||
}
|
// Workerin console.log-viestit → pääsäikeen kuuntelija
|
||||||
|
// Worker ei voi kutsua console.log näkyvästi, joten WASM:n console_log
|
||||||
|
// ei näy automaattisesti. Workerissa console.log menee Workerin konsoliin.
|
||||||
|
|
||||||
|
await new Promise((resolve, reject) => {
|
||||||
|
coderWorker.onmessage = (e) => {
|
||||||
|
if (e.data.type === 'ready') resolve();
|
||||||
|
else if (e.data.type === 'error') reject(new Error(e.data.message));
|
||||||
|
};
|
||||||
|
coderWorker.postMessage({ type: 'init' });
|
||||||
|
});
|
||||||
|
|
||||||
setStep('step-wasm', 'done');
|
setStep('step-wasm', 'done');
|
||||||
setStep('step-tokenizer', 'active');
|
setStep('step-tokenizer', 'active');
|
||||||
|
|
||||||
@@ -3260,30 +3273,23 @@ Write the corrected code.`;
|
|||||||
selected_task: coderSize === '3b' ? 'qwen-coder-3b' : 'qwen-coder-05b'
|
selected_task: coderSize === '3b' ? 'qwen-coder-3b' : 'qwen-coder-05b'
|
||||||
};
|
};
|
||||||
const taskId = coderSize === '3b' ? 5 : 4;
|
const taskId = coderSize === '3b' ? 5 : 4;
|
||||||
// Tunnistetaan WebGPU myös koodilaboratorion puolella
|
|
||||||
let coderHasWebGPU = false;
|
// Käynnistetään node Workerissa
|
||||||
if (navigator.gpu) {
|
coderWorker.onmessage = (e) => {
|
||||||
try {
|
if (e.data.type === 'started') {
|
||||||
const adapter = await navigator.gpu.requestAdapter();
|
|
||||||
if (adapter) {
|
|
||||||
try {
|
|
||||||
const testDevice = await adapter.requestDevice({ requiredLimits: { maxInterStageShaderComponents: 60 } });
|
|
||||||
coderHasWebGPU = true;
|
|
||||||
testDevice.destroy();
|
|
||||||
} catch(e) {
|
|
||||||
coderHasWebGPU = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch(e) {}
|
|
||||||
}
|
|
||||||
await start_agent_node(wsUrl, coderHasWebGPU, JSON.stringify(deviceInfo), taskId);
|
|
||||||
document.getElementById('coder-status').textContent = 'Connected';
|
document.getElementById('coder-status').textContent = 'Connected';
|
||||||
document.getElementById('coder-status').style.color = '#d29922';
|
document.getElementById('coder-status').style.color = '#d29922';
|
||||||
coderWsReady = true;
|
coderWsReady = true;
|
||||||
|
} else if (e.data.type === 'error') {
|
||||||
|
console.log('[Worker] Virhe: ' + e.data.message);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
coderWorker.postMessage({
|
||||||
|
type: 'start',
|
||||||
|
data: { hubUrl: wsUrl, hasWebGPU: false, deviceInfo: JSON.stringify(deviceInfo), taskId }
|
||||||
|
});
|
||||||
|
|
||||||
// Proaktiivinen mallin esilataus: lähetetään tyhjä warmup-prompt
|
// Warmup
|
||||||
// joka triggeröi get_or_build_model:n ilman varsinaista generointia.
|
|
||||||
// Pipeline-tilakone seuraa logeja ja merkkaa vaiheet valmiiksi.
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
if (uiSocket && uiSocket.readyState === 1) {
|
if (uiSocket && uiSocket.readyState === 1) {
|
||||||
uiSocket.send(JSON.stringify({
|
uiSocket.send(JSON.stringify({
|
||||||
@@ -3297,7 +3303,7 @@ Write the corrected code.`;
|
|||||||
if (pendingCodePrompt) {
|
if (pendingCodePrompt) {
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
sendCodeToHub(pendingCodePrompt);
|
sendCodeToHub(pendingCodePrompt);
|
||||||
}, 2000); // Hieman pidempi odotus jotta warmup ehtii ensin
|
}, 2000);
|
||||||
pendingCodePrompt = null;
|
pendingCodePrompt = null;
|
||||||
}
|
}
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
|
|||||||
33
network-poc/static/worker.js
Normal file
33
network-poc/static/worker.js
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
// Kipinä WASM Worker — ajaa kielimallin inferenssin erillisessä säikeessä
|
||||||
|
// Pääsäie (UI) ei jäädy pitkien laskutoimituksien aikana.
|
||||||
|
|
||||||
|
let wasm = null;
|
||||||
|
|
||||||
|
self.onmessage = async (e) => {
|
||||||
|
const { type, data } = e.data;
|
||||||
|
|
||||||
|
if (type === 'init') {
|
||||||
|
try {
|
||||||
|
// Ladataan WASM-moduuli Workerissa
|
||||||
|
importScripts('./pkg/node.js');
|
||||||
|
wasm = wasm_bindgen;
|
||||||
|
await wasm.default();
|
||||||
|
self.postMessage({ type: 'ready' });
|
||||||
|
} catch (err) {
|
||||||
|
self.postMessage({ type: 'error', message: 'WASM init: ' + err.message });
|
||||||
|
}
|
||||||
|
} else if (type === 'start') {
|
||||||
|
if (!wasm) return;
|
||||||
|
const { hubUrl, hasWebGPU, deviceInfo, taskId } = data;
|
||||||
|
try {
|
||||||
|
await wasm.start_agent_node(hubUrl, hasWebGPU, deviceInfo, taskId);
|
||||||
|
self.postMessage({ type: 'started' });
|
||||||
|
} catch (err) {
|
||||||
|
self.postMessage({ type: 'error', message: 'Node: ' + err.message });
|
||||||
|
}
|
||||||
|
} else if (type === 'set_gpu_load') {
|
||||||
|
if (wasm) wasm.set_gpu_load(data.load);
|
||||||
|
} else if (type === 'set_auto_tasks') {
|
||||||
|
if (wasm) wasm.set_auto_tasks(data.enabled);
|
||||||
|
}
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user