hyvä siitä tulee
This commit is contained in:
@@ -26,9 +26,9 @@ web-sys = { version = "0.3.68", features = [
|
||||
] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
burn = { version = "0.21.0-pre.2", default-features = false, features = ["wgpu", "ndarray"] }
|
||||
burn-wgpu = "0.21.0-pre.2"
|
||||
burn-ndarray = "0.21.0-pre.2"
|
||||
burn = { version = "0.14.0", features = ["wgpu", "ndarray"] }
|
||||
burn-wgpu = "0.14.0"
|
||||
burn-ndarray = "0.14.0"
|
||||
wasm-bindgen-futures = "0.4"
|
||||
console_error_panic_hook = "0.1.7"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json"] }
|
||||
|
||||
@@ -22,8 +22,15 @@ macro_rules! console_log {
|
||||
static GPU_LOAD_PERCENT: AtomicU32 = AtomicU32::new(50);
|
||||
static HAS_WEBGPU: AtomicBool = AtomicBool::new(true);
|
||||
static SELECTED_TASK: AtomicU32 = AtomicU32::new(0);
|
||||
// Estää rinnakkaiset LLM-inferenssit (vain yksi kerrallaan)
|
||||
static LLM_BUSY: AtomicBool = AtomicBool::new(false);
|
||||
// Käsitelläänkö hubin automaattisia tehtäviä
|
||||
static AUTO_TASKS: AtomicBool = AtomicBool::new(true);
|
||||
|
||||
#[wasm_bindgen]
|
||||
pub fn set_auto_tasks(enabled: bool) {
|
||||
AUTO_TASKS.store(enabled, Ordering::SeqCst);
|
||||
console_log!("[Wasm] Automaattiset tehtävät: {}", if enabled { "päällä" } else { "pois" });
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
pub fn set_gpu_load(load: u32) {
|
||||
@@ -110,6 +117,30 @@ fn tokenize_text(tokenizer: &tokenizers::Tokenizer, text: &str) -> serde_json::V
|
||||
}
|
||||
}
|
||||
|
||||
/// Tokenisoi yksittäisen tekstin ja lähettää tuloksen hubille
|
||||
async fn run_single_tokenize(text: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
let cached_tok = storage::load_from_idb("tokenizer.json").await.unwrap_or(None);
|
||||
let Some(bytes) = cached_tok else { return; };
|
||||
let Ok(tokenizer) = tokenizers::Tokenizer::from_bytes(&bytes) else { return; };
|
||||
|
||||
let perf = web_sys::window().unwrap().performance().unwrap();
|
||||
let start = perf.now();
|
||||
let result = tokenize_text(&tokenizer, &text);
|
||||
let duration_ms = perf.now() - start;
|
||||
|
||||
let token_count = result["token_count"].as_u64().unwrap_or(0);
|
||||
let cpt = result["chars_per_token"].as_f64().unwrap_or(0.0);
|
||||
console_log!("Tokenisaatio: \"{}\" → {} tokenia | {:.2} m/t | {:.2}ms",
|
||||
&text[..text.len().min(50)], token_count, cpt, duration_ms);
|
||||
|
||||
let msg = serde_json::json!({
|
||||
"type": "single_tokenize_done",
|
||||
"result": result,
|
||||
"duration_ms": (duration_ms * 100.0).round() / 100.0,
|
||||
});
|
||||
let _ = ws.borrow().send_with_str(&msg.to_string());
|
||||
}
|
||||
|
||||
/// Tokenisoi en/fi-parin, vertaa tehokkuutta ja lähettää tuloksen hubille
|
||||
async fn run_pair_comparison(en_text: String, fi_text: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
let load_pct = GPU_LOAD_PERCENT.load(Ordering::SeqCst);
|
||||
@@ -194,8 +225,9 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
|
||||
let msg: String = txt.into();
|
||||
|
||||
let current_task = SELECTED_TASK.load(Ordering::SeqCst);
|
||||
let auto_on = AUTO_TASKS.load(Ordering::SeqCst);
|
||||
|
||||
if msg.contains("pair_task") && current_task == 0 {
|
||||
if msg.contains("pair_task") && current_task == 0 && auto_on {
|
||||
// Vain tokenisaatiosolmut käsittelevät pair_task-viestejä
|
||||
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
||||
let en = task.get("en").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
@@ -207,7 +239,17 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if msg.contains("llm_prompt") && current_task == 1 {
|
||||
} else if msg.contains("single_tokenize") && current_task == 0 {
|
||||
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
||||
let text = task.get("text").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
if !text.is_empty() {
|
||||
let ws_for_async = ws_clone.clone();
|
||||
wasm_bindgen_futures::spawn_local(async move {
|
||||
run_single_tokenize(text, ws_for_async).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if msg.contains("llm_prompt") && current_task == 1 && auto_on {
|
||||
// Vain SmolLM-solmut, ja vain yksi inferenssi kerrallaan
|
||||
if LLM_BUSY.load(Ordering::SeqCst) {
|
||||
// Ohitetaan — edellinen inferenssi vielä käynnissä
|
||||
@@ -223,7 +265,7 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if msg.contains("llm_prompt") && current_task == 2 {
|
||||
} else if msg.contains("llm_prompt") && current_task == 2 && auto_on {
|
||||
// Qwen2.5-0.5B
|
||||
if LLM_BUSY.load(Ordering::SeqCst) {
|
||||
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
||||
@@ -237,7 +279,7 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if msg.contains("llm_prompt") && current_task == 3 {
|
||||
} else if msg.contains("llm_prompt") && current_task == 3 && auto_on {
|
||||
// Phi-3 Mini
|
||||
if LLM_BUSY.load(Ordering::SeqCst) {
|
||||
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
||||
|
||||
@@ -199,6 +199,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
let _ = ws.borrow().send_with_str(&chunk.to_string());
|
||||
}
|
||||
tokens_generated += 1;
|
||||
crate::sleep_ms(0).await;
|
||||
}
|
||||
|
||||
let gen_time = perf.now() - start_gen;
|
||||
|
||||
@@ -262,6 +262,9 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
|
||||
let _ = ws.borrow().send_with_str(&chunk.to_string());
|
||||
}
|
||||
tokens_generated += 1;
|
||||
|
||||
// Yield — vapautetaan selaimen event loop joka tokenin jälkeen
|
||||
crate::sleep_ms(0).await;
|
||||
}
|
||||
|
||||
let gen_time = perf.now() - start_gen;
|
||||
|
||||
@@ -118,14 +118,11 @@ pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
Err(e) => { console_log!("[SmolLM] Malli-virhe: {}", e); return; }
|
||||
};
|
||||
|
||||
let use_gpu = crate::HAS_WEBGPU.load(std::sync::atomic::Ordering::SeqCst);
|
||||
if use_gpu {
|
||||
console_log!("[SmolLM] Burn WebGPU inferenssi...");
|
||||
run_burn_inference::<burn::backend::Wgpu>(prompt, model_bytes, tokenizer, ws, perf.clone()).await;
|
||||
} else {
|
||||
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
|
||||
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws, perf.clone()).await;
|
||||
}
|
||||
// Burn 0.14 wgpu ei yhteensopiva nykyisten selainten kanssa (maxInterStageShaderComponents)
|
||||
// Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu)
|
||||
// → NdArray kunnes Burn 0.21 stable + Wasm-tuki
|
||||
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
|
||||
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws, perf.clone()).await;
|
||||
}
|
||||
|
||||
async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
||||
|
||||
Reference in New Issue
Block a user