Wasm tokenize_js() exportti oppaan live-tokenizeria varten
Lisätty #[wasm_bindgen] tokenize_js(text) → JSON-funktio joka lataa tokenizerin IndexedDB:stä tai HuggingFacesta tarvittaessa. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -118,6 +118,27 @@ async fn run_ai_tensor_inference(difficulty: usize) -> String {
|
||||
format!("PoC {} Matmul ({}x{}) >> {}", backend_name, active_workload_size, active_workload_size, result)
|
||||
}
|
||||
|
||||
/// JS-exportti: tokenisoi tekstin ja palauttaa JSON-merkkijonon
|
||||
/// Tokenizer ladataan IndexedDB:stä (täytyy olla ladattu aiemmin)
|
||||
#[wasm_bindgen]
|
||||
pub async fn tokenize_js(text: String) -> Result<String, JsValue> {
|
||||
let cached_tok = storage::load_from_idb("tokenizer.json").await.unwrap_or(None);
|
||||
let Some(bytes) = cached_tok else {
|
||||
// Yritetään ladata verkosta
|
||||
let resp = reqwest::get("https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B/resolve/main/tokenizer.json").await
|
||||
.map_err(|e| JsValue::from_str(&format!("Tokenizer-lataus epäonnistui: {}", e)))?;
|
||||
let bytes = resp.bytes().await
|
||||
.map_err(|e| JsValue::from_str(&format!("Tokenizer-lataus epäonnistui: {}", e)))?;
|
||||
let _ = storage::save_to_idb("tokenizer.json", &bytes).await;
|
||||
let tokenizer = tokenizers::Tokenizer::from_bytes(&bytes)
|
||||
.map_err(|e| JsValue::from_str(&format!("Tokenizer-parsinta: {}", e)))?;
|
||||
return Ok(tokenize_text(&tokenizer, &text).to_string());
|
||||
};
|
||||
let tokenizer = tokenizers::Tokenizer::from_bytes(&bytes)
|
||||
.map_err(|e| JsValue::from_str(&format!("Tokenizer-parsinta: {}", e)))?;
|
||||
Ok(tokenize_text(&tokenizer, &text).to_string())
|
||||
}
|
||||
|
||||
/// Tokenisoi yhden tekstin ja palauttaa metriikat
|
||||
fn tokenize_text(tokenizer: &tokenizers::Tokenizer, text: &str) -> serde_json::Value {
|
||||
let char_count = text.chars().count();
|
||||
|
||||
Reference in New Issue
Block a user