423 lines
20 KiB
Rust
423 lines
20 KiB
Rust
use wasm_bindgen::prelude::*;
|
|
use web_sys::{WebSocket, MessageEvent};
|
|
use std::cell::RefCell;
|
|
use std::rc::Rc;
|
|
use std::sync::atomic::{AtomicU32, AtomicBool, Ordering};
|
|
use burn::tensor::Tensor;
|
|
use burn::backend::{Wgpu, NdArray};
|
|
|
|
pub mod storage;
|
|
pub mod sampling;
|
|
pub mod smollm;
|
|
pub mod qwen;
|
|
pub mod qwen_coder;
|
|
pub mod phi3;
|
|
pub mod burn_smollm;
|
|
|
|
#[macro_export]
|
|
macro_rules! console_log {
|
|
($($t:tt)*) => (web_sys::console::log_1(&format_args!($($t)*).to_string().into()))
|
|
}
|
|
|
|
static GPU_LOAD_PERCENT: AtomicU32 = AtomicU32::new(50);
|
|
static HAS_WEBGPU: AtomicBool = AtomicBool::new(true);
|
|
static SELECTED_TASK: AtomicU32 = AtomicU32::new(0);
|
|
static LLM_BUSY: AtomicBool = AtomicBool::new(false);
|
|
// Käsitelläänkö hubin automaattisia tehtäviä
|
|
static AUTO_TASKS: AtomicBool = AtomicBool::new(true);
|
|
|
|
#[wasm_bindgen]
|
|
pub fn set_auto_tasks(enabled: bool) {
|
|
AUTO_TASKS.store(enabled, Ordering::SeqCst);
|
|
console_log!("[Wasm] Automaattiset tehtävät: {}", if enabled { "päällä" } else { "pois" });
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn set_gpu_load(load: u32) {
|
|
GPU_LOAD_PERCENT.store(load, Ordering::SeqCst);
|
|
console_log!("[Wasm] GPU Kuormitusraja vaihdettu -> {}%", load);
|
|
}
|
|
|
|
// Worker-yhteensopiva setTimeout — toimii sekä Window- että Worker-kontekstissa
|
|
#[wasm_bindgen]
|
|
extern "C" {
|
|
#[wasm_bindgen(js_name = setTimeout)]
|
|
fn set_timeout(closure: &js_sys::Function, ms: i32);
|
|
}
|
|
|
|
// Asynkroninen odotus WebAssemblylle (Window + Worker)
|
|
pub async fn sleep_ms(ms: i32) {
|
|
let promise = js_sys::Promise::new(&mut |resolve, _| {
|
|
set_timeout(&resolve, ms);
|
|
});
|
|
let _ = wasm_bindgen_futures::JsFuture::from(promise).await;
|
|
}
|
|
|
|
// Worker-yhteensopiva Performance — käyttää globalThis.performance
|
|
pub fn perf_now() -> f64 {
|
|
js_sys::Reflect::get(&js_sys::global(), &"performance".into())
|
|
.ok()
|
|
.and_then(|p| js_sys::Reflect::get(&p, &"now".into()).ok())
|
|
.and_then(|f| f.dyn_into::<js_sys::Function>().ok())
|
|
.and_then(|f| {
|
|
let perf = js_sys::Reflect::get(&js_sys::global(), &"performance".into()).unwrap();
|
|
f.call0(&perf).ok()
|
|
})
|
|
.and_then(|v| v.as_f64())
|
|
.unwrap_or(0.0)
|
|
}
|
|
|
|
// Worker-yhteensopiva fetch — käyttää globalThis.fetch
|
|
pub async fn worker_fetch(url: &str) -> Result<web_sys::Response, String> {
|
|
let promise = js_sys::Reflect::get(&js_sys::global(), &"fetch".into())
|
|
.map_err(|_| "fetch ei saatavilla".to_string())?
|
|
.dyn_into::<js_sys::Function>()
|
|
.map_err(|_| "fetch ei funktio".to_string())?
|
|
.call1(&JsValue::NULL, &url.into())
|
|
.map_err(|e| format!("fetch: {:?}", e))?;
|
|
let resp = wasm_bindgen_futures::JsFuture::from(js_sys::Promise::from(promise))
|
|
.await
|
|
.map_err(|e| format!("fetch await: {:?}", e))?;
|
|
resp.dyn_into::<web_sys::Response>()
|
|
.map_err(|_| "ei Response".to_string())
|
|
}
|
|
|
|
// Geneerinen tensorilaskenta — toimii millä tahansa Burn-backendillä
|
|
fn run_matmul<B: burn::tensor::backend::Backend>(size: usize) -> String {
|
|
let device = Default::default();
|
|
let dist = burn::tensor::Distribution::Default;
|
|
let t1: Tensor<B, 2> = Tensor::random([size, size], dist, &device);
|
|
let t2: Tensor<B, 2> = Tensor::random([size, size], dist, &device);
|
|
let sum = t1.matmul(t2).sum();
|
|
format!("{:?}", sum)
|
|
}
|
|
|
|
// Päättelyfunktio — valitsee backendin automaattisesti
|
|
async fn run_ai_tensor_inference(difficulty: usize) -> String {
|
|
let load_pct = GPU_LOAD_PERCENT.load(Ordering::SeqCst);
|
|
|
|
if load_pct == 0 {
|
|
sleep_ms(2000).await;
|
|
return format!("Paused (0%). Lepäillään zZz..");
|
|
}
|
|
|
|
let active_workload_size = (difficulty as f32 * (load_pct as f32 / 100.0)) as usize;
|
|
|
|
let sleep_delay = (100 - load_pct) * 10;
|
|
if sleep_delay > 0 {
|
|
sleep_ms(sleep_delay as i32).await;
|
|
}
|
|
|
|
let use_gpu = HAS_WEBGPU.load(Ordering::SeqCst);
|
|
let (backend_name, result) = if use_gpu {
|
|
("WebGPU", run_matmul::<Wgpu>(active_workload_size))
|
|
} else {
|
|
("CPU/NdArray", run_matmul::<NdArray>(active_workload_size))
|
|
};
|
|
|
|
format!("PoC {} Matmul ({}x{}) >> {}", backend_name, active_workload_size, active_workload_size, result)
|
|
}
|
|
|
|
/// JS-exportti: tokenisoi tekstin ja palauttaa JSON-merkkijonon
|
|
/// Tokenizer ladataan IndexedDB:stä (täytyy olla ladattu aiemmin)
|
|
#[wasm_bindgen]
|
|
pub async fn tokenize_js(text: String) -> Result<String, JsValue> {
|
|
let cached_tok = storage::load_from_idb("tokenizer.json").await.unwrap_or(None);
|
|
let Some(bytes) = cached_tok else {
|
|
// Yritetään ladata verkosta
|
|
let resp = reqwest::get("https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B/resolve/main/tokenizer.json").await
|
|
.map_err(|e| JsValue::from_str(&format!("Tokenizer-lataus epäonnistui: {}", e)))?;
|
|
let bytes = resp.bytes().await
|
|
.map_err(|e| JsValue::from_str(&format!("Tokenizer-lataus epäonnistui: {}", e)))?;
|
|
let _ = storage::save_to_idb("tokenizer.json", &bytes).await;
|
|
let tokenizer = tokenizers::Tokenizer::from_bytes(&bytes)
|
|
.map_err(|e| JsValue::from_str(&format!("Tokenizer-parsinta: {}", e)))?;
|
|
return Ok(tokenize_text(&tokenizer, &text).to_string());
|
|
};
|
|
let tokenizer = tokenizers::Tokenizer::from_bytes(&bytes)
|
|
.map_err(|e| JsValue::from_str(&format!("Tokenizer-parsinta: {}", e)))?;
|
|
Ok(tokenize_text(&tokenizer, &text).to_string())
|
|
}
|
|
|
|
/// Tokenisoi yhden tekstin ja palauttaa metriikat
|
|
fn tokenize_text(tokenizer: &tokenizers::Tokenizer, text: &str) -> serde_json::Value {
|
|
let char_count = text.chars().count();
|
|
let word_count = text.split_whitespace().count();
|
|
|
|
if let Ok(encoding) = tokenizer.encode(text, true) {
|
|
let token_count = encoding.get_ids().len();
|
|
let cpt = if token_count > 0 { char_count as f32 / token_count as f32 } else { 0.0 };
|
|
let tokens: Vec<String> = encoding.get_ids().iter().filter_map(|&id| {
|
|
tokenizer.decode(&[id], true).ok()
|
|
}).collect();
|
|
|
|
serde_json::json!({
|
|
"text": text,
|
|
"char_count": char_count,
|
|
"word_count": word_count,
|
|
"token_count": token_count,
|
|
"chars_per_token": (cpt * 100.0).round() / 100.0,
|
|
"tokens": tokens,
|
|
})
|
|
} else {
|
|
serde_json::json!({
|
|
"text": text,
|
|
"char_count": char_count,
|
|
"word_count": word_count,
|
|
"token_count": word_count,
|
|
"chars_per_token": 0,
|
|
"tokens": [],
|
|
})
|
|
}
|
|
}
|
|
|
|
/// Tokenisoi yksittäisen tekstin ja lähettää tuloksen hubille
|
|
async fn run_single_tokenize(text: String, ws: Rc<RefCell<WebSocket>>) {
|
|
let cached_tok = storage::load_from_idb("tokenizer.json").await.unwrap_or(None);
|
|
let Some(bytes) = cached_tok else { return; };
|
|
let Ok(tokenizer) = tokenizers::Tokenizer::from_bytes(&bytes) else { return; };
|
|
|
|
let start = perf_now();
|
|
let result = tokenize_text(&tokenizer, &text);
|
|
let duration_ms = perf_now() - start;
|
|
|
|
let token_count = result["token_count"].as_u64().unwrap_or(0);
|
|
let cpt = result["chars_per_token"].as_f64().unwrap_or(0.0);
|
|
let preview: String = text.chars().take(50).collect();
|
|
console_log!("Tokenisaatio: \"{}\" → {} tokenia | {:.2} m/t | {:.2}ms",
|
|
preview, token_count, cpt, duration_ms);
|
|
|
|
let msg = serde_json::json!({
|
|
"type": "single_tokenize_done",
|
|
"result": result,
|
|
"duration_ms": (duration_ms * 100.0).round() / 100.0,
|
|
});
|
|
let _ = ws.borrow().send_with_str(&msg.to_string());
|
|
}
|
|
|
|
/// Tokenisoi en/fi-parin, vertaa tehokkuutta ja lähettää tuloksen hubille
|
|
async fn run_pair_comparison(en_text: String, fi_text: String, ws: Rc<RefCell<WebSocket>>) {
|
|
let load_pct = GPU_LOAD_PERCENT.load(Ordering::SeqCst);
|
|
if load_pct == 0 { return; }
|
|
|
|
let cached_tok = storage::load_from_idb("tokenizer.json").await.unwrap_or(None);
|
|
let Some(bytes) = cached_tok else {
|
|
console_log!("[Tokenizer] Ei vielä ladattu — ohitetaan pari");
|
|
return;
|
|
};
|
|
let Ok(tokenizer) = tokenizers::Tokenizer::from_bytes(&bytes) else {
|
|
console_log!("[Tokenizer] Parsinta epäonnistui");
|
|
return;
|
|
};
|
|
|
|
let start_time = perf_now();
|
|
let en_result = tokenize_text(&tokenizer, &en_text);
|
|
let fi_result = tokenize_text(&tokenizer, &fi_text);
|
|
let duration_ms = perf_now() - start_time;
|
|
|
|
let en_cpt = en_result["chars_per_token"].as_f64().unwrap_or(0.0);
|
|
let fi_cpt = fi_result["chars_per_token"].as_f64().unwrap_or(0.0);
|
|
let en_tokens = en_result["token_count"].as_u64().unwrap_or(0);
|
|
let fi_tokens = fi_result["token_count"].as_u64().unwrap_or(0);
|
|
|
|
// Token-ylikustannus: kuinka monta % enemmän tokeneita suomi tarvitsee
|
|
let overhead_pct = if en_tokens > 0 {
|
|
((fi_tokens as f64 / en_tokens as f64) - 1.0) * 100.0
|
|
} else { 0.0 };
|
|
|
|
console_log!("EN: {} tokenia ({:.2} m/t) vs FI: {} tokenia ({:.2} m/t) | ylikustannus: {:.0}% | {:.2}ms",
|
|
en_tokens, en_cpt, fi_tokens, fi_cpt, overhead_pct, duration_ms);
|
|
|
|
let pair_done = serde_json::json!({
|
|
"type": "pair_done",
|
|
"en": en_result,
|
|
"fi": fi_result,
|
|
"overhead_pct": (overhead_pct * 10.0).round() / 10.0,
|
|
"duration_ms": (duration_ms * 100.0).round() / 100.0,
|
|
"tokenizer": "Qwen2.5-Coder-0.5B",
|
|
});
|
|
let _ = ws.borrow().send_with_str(&pair_done.to_string());
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_json: String, task_id: u32) -> Result<(), JsValue> {
|
|
console_error_panic_hook::set_once();
|
|
|
|
HAS_WEBGPU.store(has_webgpu, Ordering::SeqCst);
|
|
SELECTED_TASK.store(task_id, Ordering::SeqCst);
|
|
let backend_name = if has_webgpu { "WebGPU" } else { "CPU (NdArray)" };
|
|
let task_names = ["tokenize", "smollm-135m", "qwen-05b", "phi3-mini", "qwen-coder-05b", "qwen-coder-3b"];
|
|
let task_name = task_names.get(task_id as usize).unwrap_or(&"tokenize");
|
|
console_log!("Kipinä Agent Node käynnistyy — backend: {} | tehtävä: {}", backend_name, task_name);
|
|
|
|
let device_info = device_info_json.clone();
|
|
|
|
wasm_bindgen_futures::spawn_local(async move {
|
|
console_log!("[Storage] Tarkistetaan IndexedDB Qwen2.5-Coder Tokenizeria...");
|
|
let cached_tokenizer = storage::load_from_idb("tokenizer.json").await.unwrap_or(None);
|
|
if let Some(tok_bytes) = cached_tokenizer {
|
|
console_log!("[Storage] Tokenizer löytyi välimuistista! Koko: {} tavua", tok_bytes.len());
|
|
} else {
|
|
console_log!("[Storage] Ei välimuistia. Ladataan HF:stä... Odota selaimen Network-välilehdellä.");
|
|
if let Ok(resp) = reqwest::get("https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B/resolve/main/tokenizer.json").await {
|
|
if let Ok(bytes) = resp.bytes().await {
|
|
console_log!("[Storage] Tallennetaan {}-tavuinen tiedosto IndexedDB:hen pysyvästi...", bytes.len());
|
|
let _ = storage::save_to_idb("tokenizer.json", &bytes).await;
|
|
console_log!("[Storage] Tallennettu!");
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
let ws = WebSocket::new(&hub_url)?;
|
|
let ws_clone = Rc::new(RefCell::new(ws));
|
|
let ws_clone_2 = ws_clone.clone();
|
|
|
|
let onmessage_callback = Closure::wrap(Box::new(move |e: MessageEvent| {
|
|
if let Ok(txt) = e.data().dyn_into::<js_sys::JsString>() {
|
|
let msg: String = txt.into();
|
|
|
|
let current_task = SELECTED_TASK.load(Ordering::SeqCst);
|
|
let auto_on = AUTO_TASKS.load(Ordering::SeqCst);
|
|
|
|
if msg.contains("pair_task") && current_task == 0 && auto_on {
|
|
// Vain tokenisaatiosolmut käsittelevät pair_task-viestejä
|
|
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
|
let en = task.get("en").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
let fi = task.get("fi").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
if !en.is_empty() && !fi.is_empty() {
|
|
let ws_for_async = ws_clone.clone();
|
|
wasm_bindgen_futures::spawn_local(async move {
|
|
run_pair_comparison(en, fi, ws_for_async).await;
|
|
});
|
|
}
|
|
}
|
|
} else if msg.contains("single_tokenize") && current_task == 0 {
|
|
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
|
let text = task.get("text").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
if !text.is_empty() {
|
|
let ws_for_async = ws_clone.clone();
|
|
wasm_bindgen_futures::spawn_local(async move {
|
|
run_single_tokenize(text, ws_for_async).await;
|
|
});
|
|
}
|
|
}
|
|
} else if msg.contains("llm_prompt") && current_task == 1 && auto_on {
|
|
// Vain SmolLM-solmut, ja vain yksi inferenssi kerrallaan
|
|
if LLM_BUSY.load(Ordering::SeqCst) {
|
|
// Ohitetaan — edellinen inferenssi vielä käynnissä
|
|
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
|
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
if !prompt.is_empty() && model == "smollm-135m" {
|
|
LLM_BUSY.store(true, Ordering::SeqCst);
|
|
let ws_for_async = ws_clone.clone();
|
|
wasm_bindgen_futures::spawn_local(async move {
|
|
smollm::run_smollm_inference(prompt, ws_for_async).await;
|
|
LLM_BUSY.store(false, Ordering::SeqCst);
|
|
});
|
|
}
|
|
}
|
|
} else if msg.contains("llm_prompt") && current_task == 2 && auto_on {
|
|
// Qwen2.5-0.5B
|
|
if LLM_BUSY.load(Ordering::SeqCst) {
|
|
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
|
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
if !prompt.is_empty() && model == "qwen-05b" {
|
|
LLM_BUSY.store(true, Ordering::SeqCst);
|
|
let ws_for_async = ws_clone.clone();
|
|
wasm_bindgen_futures::spawn_local(async move {
|
|
qwen::run_qwen_inference(prompt, ws_for_async).await;
|
|
LLM_BUSY.store(false, Ordering::SeqCst);
|
|
});
|
|
}
|
|
}
|
|
} else if msg.contains("llm_prompt") && current_task == 3 && auto_on {
|
|
// Phi-3 Mini
|
|
if LLM_BUSY.load(Ordering::SeqCst) {
|
|
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
|
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
if !prompt.is_empty() && model.starts_with("phi3-mini") {
|
|
LLM_BUSY.store(true, Ordering::SeqCst);
|
|
let ws_for_async = ws_clone.clone();
|
|
wasm_bindgen_futures::spawn_local(async move {
|
|
phi3::run_phi3_inference(prompt, ws_for_async).await;
|
|
LLM_BUSY.store(false, Ordering::SeqCst);
|
|
});
|
|
}
|
|
}
|
|
} else if msg.contains("llm_prompt") {
|
|
console_log!("[DEBUG] llm_prompt vastaanotettu! current_task={}, busy={}", current_task, LLM_BUSY.load(Ordering::SeqCst));
|
|
if current_task == 4 || current_task == 5 {
|
|
// Qwen2.5-Coder: 4 = 0.5B, 5 = 3B
|
|
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
|
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
|
let task_id = task.get("task_id").and_then(|v| v.as_str()).map(|s| s.to_string());
|
|
|
|
if !prompt.is_empty() && model.starts_with("qwen-coder") {
|
|
if LLM_BUSY.load(Ordering::SeqCst) {
|
|
if let Some(tid) = task_id {
|
|
let err_msg = serde_json::json!({
|
|
"type": "llm_error",
|
|
"task_id": tid,
|
|
"error": "Solmu on paraikaa varattuna toisen tehtävän suorittamiseen"
|
|
});
|
|
let _ = ws_clone.borrow().send_with_str(&err_msg.to_string());
|
|
}
|
|
} else {
|
|
let use_3b = current_task == 5;
|
|
LLM_BUSY.store(true, Ordering::SeqCst);
|
|
let ws_for_async = ws_clone.clone();
|
|
wasm_bindgen_futures::spawn_local(async move {
|
|
qwen_coder::run_coder_inference(prompt, ws_for_async, use_3b, task_id).await;
|
|
LLM_BUSY.store(false, Ordering::SeqCst);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
} // current_task == 4 || 5
|
|
} else if msg.contains("ai_task") {
|
|
console_log!("Hub task vastaanotettu, ajetaan GPU:lla...");
|
|
let ws_for_async = ws_clone.clone();
|
|
let diff = if msg.contains(r#""difficulty":1024"#) { 1024 } else { 512 };
|
|
|
|
// Suoritetaan inference asynkronisesti erillisessä taaskissa välttääksemme UI-jäätymisen kokonaan
|
|
wasm_bindgen_futures::spawn_local(async move {
|
|
let result = run_ai_tensor_inference(diff).await;
|
|
let reply = format!("{{\"type\":\"result\", \"status\":\"success\", \"data\":\"{}\"}}", result);
|
|
let _ = ws_for_async.borrow().send_with_str(&reply);
|
|
});
|
|
} else if msg.contains("stats") {
|
|
// Sivuutetaan statsit täällä, UI hallitsee ne aivan itse HTML:n puolella
|
|
}
|
|
}
|
|
}) as Box<dyn FnMut(MessageEvent)>);
|
|
|
|
ws_clone_2.borrow().set_onmessage(Some(onmessage_callback.as_ref().unchecked_ref()));
|
|
onmessage_callback.forget();
|
|
|
|
let ws_clone_3 = ws_clone_2.clone();
|
|
let onopen_callback = Closure::wrap(Box::new(move |_| {
|
|
console_log!("Yhteys Hubiin avattu!");
|
|
// Parsitaan device_info ja lisätään auth-kenttiin
|
|
let auth_msg = if let Ok(mut info) = serde_json::from_str::<serde_json::Value>(&device_info) {
|
|
if let Some(obj) = info.as_object_mut() {
|
|
obj.insert("type".to_string(), serde_json::json!("auth"));
|
|
obj.insert("status".to_string(), serde_json::json!("agent_ready"));
|
|
}
|
|
info.to_string()
|
|
} else {
|
|
r#"{"type":"auth","status":"agent_ready","allocated_gb":4}"#.to_string()
|
|
};
|
|
let _ = ws_clone_3.borrow().send_with_str(&auth_msg);
|
|
}) as Box<dyn FnMut(JsValue)>);
|
|
|
|
ws_clone_2.borrow().set_onopen(Some(onopen_callback.as_ref().unchecked_ref()));
|
|
onopen_callback.forget();
|
|
|
|
Ok(())
|
|
}
|