diff --git a/network-poc/node/src/lib.rs b/network-poc/node/src/lib.rs index 52505ad..2d01da6 100644 --- a/network-poc/node/src/lib.rs +++ b/network-poc/node/src/lib.rs @@ -118,6 +118,27 @@ async fn run_ai_tensor_inference(difficulty: usize) -> String { format!("PoC {} Matmul ({}x{}) >> {}", backend_name, active_workload_size, active_workload_size, result) } +/// JS-exportti: tokenisoi tekstin ja palauttaa JSON-merkkijonon +/// Tokenizer ladataan IndexedDB:stä (täytyy olla ladattu aiemmin) +#[wasm_bindgen] +pub async fn tokenize_js(text: String) -> Result { + let cached_tok = storage::load_from_idb("tokenizer.json").await.unwrap_or(None); + let Some(bytes) = cached_tok else { + // Yritetään ladata verkosta + let resp = reqwest::get("https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B/resolve/main/tokenizer.json").await + .map_err(|e| JsValue::from_str(&format!("Tokenizer-lataus epäonnistui: {}", e)))?; + let bytes = resp.bytes().await + .map_err(|e| JsValue::from_str(&format!("Tokenizer-lataus epäonnistui: {}", e)))?; + let _ = storage::save_to_idb("tokenizer.json", &bytes).await; + let tokenizer = tokenizers::Tokenizer::from_bytes(&bytes) + .map_err(|e| JsValue::from_str(&format!("Tokenizer-parsinta: {}", e)))?; + return Ok(tokenize_text(&tokenizer, &text).to_string()); + }; + let tokenizer = tokenizers::Tokenizer::from_bytes(&bytes) + .map_err(|e| JsValue::from_str(&format!("Tokenizer-parsinta: {}", e)))?; + Ok(tokenize_text(&tokenizer, &text).to_string()) +} + /// Tokenisoi yhden tekstin ja palauttaa metriikat fn tokenize_text(tokenizer: &tokenizers::Tokenizer, text: &str) -> serde_json::Value { let char_count = text.chars().count();