hyvä siitä tulee

This commit is contained in:
2026-04-02 18:16:41 +03:00
parent 6cdd695a3b
commit 31995fb278
8 changed files with 272 additions and 89 deletions

View File

@@ -26,9 +26,9 @@ web-sys = { version = "0.3.68", features = [
] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
burn = { version = "0.21.0-pre.2", default-features = false, features = ["wgpu", "ndarray"] }
burn-wgpu = "0.21.0-pre.2"
burn-ndarray = "0.21.0-pre.2"
burn = { version = "0.14.0", features = ["wgpu", "ndarray"] }
burn-wgpu = "0.14.0"
burn-ndarray = "0.14.0"
wasm-bindgen-futures = "0.4"
console_error_panic_hook = "0.1.7"
reqwest = { version = "0.12", default-features = false, features = ["json"] }

View File

@@ -22,8 +22,15 @@ macro_rules! console_log {
static GPU_LOAD_PERCENT: AtomicU32 = AtomicU32::new(50);
static HAS_WEBGPU: AtomicBool = AtomicBool::new(true);
static SELECTED_TASK: AtomicU32 = AtomicU32::new(0);
// Estää rinnakkaiset LLM-inferenssit (vain yksi kerrallaan)
static LLM_BUSY: AtomicBool = AtomicBool::new(false);
// Käsitelläänkö hubin automaattisia tehtäviä
static AUTO_TASKS: AtomicBool = AtomicBool::new(true);
#[wasm_bindgen]
pub fn set_auto_tasks(enabled: bool) {
AUTO_TASKS.store(enabled, Ordering::SeqCst);
console_log!("[Wasm] Automaattiset tehtävät: {}", if enabled { "päällä" } else { "pois" });
}
#[wasm_bindgen]
pub fn set_gpu_load(load: u32) {
@@ -110,6 +117,30 @@ fn tokenize_text(tokenizer: &tokenizers::Tokenizer, text: &str) -> serde_json::V
}
}
/// Tokenisoi yksittäisen tekstin ja lähettää tuloksen hubille
async fn run_single_tokenize(text: String, ws: Rc<RefCell<WebSocket>>) {
let cached_tok = storage::load_from_idb("tokenizer.json").await.unwrap_or(None);
let Some(bytes) = cached_tok else { return; };
let Ok(tokenizer) = tokenizers::Tokenizer::from_bytes(&bytes) else { return; };
let perf = web_sys::window().unwrap().performance().unwrap();
let start = perf.now();
let result = tokenize_text(&tokenizer, &text);
let duration_ms = perf.now() - start;
let token_count = result["token_count"].as_u64().unwrap_or(0);
let cpt = result["chars_per_token"].as_f64().unwrap_or(0.0);
console_log!("Tokenisaatio: \"{}\" → {} tokenia | {:.2} m/t | {:.2}ms",
&text[..text.len().min(50)], token_count, cpt, duration_ms);
let msg = serde_json::json!({
"type": "single_tokenize_done",
"result": result,
"duration_ms": (duration_ms * 100.0).round() / 100.0,
});
let _ = ws.borrow().send_with_str(&msg.to_string());
}
/// Tokenisoi en/fi-parin, vertaa tehokkuutta ja lähettää tuloksen hubille
async fn run_pair_comparison(en_text: String, fi_text: String, ws: Rc<RefCell<WebSocket>>) {
let load_pct = GPU_LOAD_PERCENT.load(Ordering::SeqCst);
@@ -194,8 +225,9 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
let msg: String = txt.into();
let current_task = SELECTED_TASK.load(Ordering::SeqCst);
let auto_on = AUTO_TASKS.load(Ordering::SeqCst);
if msg.contains("pair_task") && current_task == 0 {
if msg.contains("pair_task") && current_task == 0 && auto_on {
// Vain tokenisaatiosolmut käsittelevät pair_task-viestejä
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
let en = task.get("en").and_then(|v| v.as_str()).unwrap_or("").to_string();
@@ -207,7 +239,17 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
});
}
}
} else if msg.contains("llm_prompt") && current_task == 1 {
} else if msg.contains("single_tokenize") && current_task == 0 {
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
let text = task.get("text").and_then(|v| v.as_str()).unwrap_or("").to_string();
if !text.is_empty() {
let ws_for_async = ws_clone.clone();
wasm_bindgen_futures::spawn_local(async move {
run_single_tokenize(text, ws_for_async).await;
});
}
}
} else if msg.contains("llm_prompt") && current_task == 1 && auto_on {
// Vain SmolLM-solmut, ja vain yksi inferenssi kerrallaan
if LLM_BUSY.load(Ordering::SeqCst) {
// Ohitetaan — edellinen inferenssi vielä käynnissä
@@ -223,7 +265,7 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
});
}
}
} else if msg.contains("llm_prompt") && current_task == 2 {
} else if msg.contains("llm_prompt") && current_task == 2 && auto_on {
// Qwen2.5-0.5B
if LLM_BUSY.load(Ordering::SeqCst) {
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
@@ -237,7 +279,7 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
});
}
}
} else if msg.contains("llm_prompt") && current_task == 3 {
} else if msg.contains("llm_prompt") && current_task == 3 && auto_on {
// Phi-3 Mini
if LLM_BUSY.load(Ordering::SeqCst) {
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {

View File

@@ -199,6 +199,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
let _ = ws.borrow().send_with_str(&chunk.to_string());
}
tokens_generated += 1;
crate::sleep_ms(0).await;
}
let gen_time = perf.now() - start_gen;

View File

@@ -262,6 +262,9 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
let _ = ws.borrow().send_with_str(&chunk.to_string());
}
tokens_generated += 1;
// Yield — vapautetaan selaimen event loop joka tokenin jälkeen
crate::sleep_ms(0).await;
}
let gen_time = perf.now() - start_gen;

View File

@@ -118,14 +118,11 @@ pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
Err(e) => { console_log!("[SmolLM] Malli-virhe: {}", e); return; }
};
let use_gpu = crate::HAS_WEBGPU.load(std::sync::atomic::Ordering::SeqCst);
if use_gpu {
console_log!("[SmolLM] Burn WebGPU inferenssi...");
run_burn_inference::<burn::backend::Wgpu>(prompt, model_bytes, tokenizer, ws, perf.clone()).await;
} else {
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws, perf.clone()).await;
}
// Burn 0.14 wgpu ei yhteensopiva nykyisten selainten kanssa (maxInterStageShaderComponents)
// Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu)
// → NdArray kunnes Burn 0.21 stable + Wasm-tuki
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws, perf.clone()).await;
}
async fn run_burn_inference<B: burn::tensor::backend::Backend>(