Päivitetty juttuja

This commit is contained in:
Jaakko Vanhala
2026-04-04 21:13:20 +03:00
parent 2e7ddf6f1e
commit 3ada8949d0
11 changed files with 457 additions and 105 deletions

View File

@@ -21,12 +21,28 @@ const MODEL_3B_PART1_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-I
const MODEL_3B_PART2_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct/resolve/main/model-00002-of-00002.safetensors";
const TOKENIZER_3B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct/resolve/main/tokenizer.json";
async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Result<Vec<u8>, String> {
if let Ok(Some(bytes)) = storage::load_from_idb(key).await {
console_log!("[Coder] {} löytyi välimuistista ({} MB)", key, bytes.len() / 1024 / 1024);
thread_local! {
static RAM_CACHE: RefCell<std::collections::HashMap<String, Rc<Vec<u8>>>> = RefCell::new(std::collections::HashMap::new());
}
async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Result<Rc<Vec<u8>>, String> {
// 1. Tarkistetaan RAM välimuisti (estää OOM ja levy-I/O pullonkaulat)
let ram_hit = RAM_CACHE.with(|cache| {
cache.borrow().get(key).cloned()
});
if let Some(bytes) = ram_hit {
console_log!("[Coder] {} löytyi nopeasta RAM-välimuistista!", key);
return Ok(bytes);
}
// 2. Tarkistetaan IndexedDB (jos selain on suljettu aikaisemmin)
if let Ok(Some(bytes)) = storage::load_from_idb(key).await {
console_log!("[Coder] {} löytyi IndexedDB-välimuistista ({} MB)", key, bytes.len() / 1024 / 1024);
let rc_bytes = Rc::new(bytes);
RAM_CACHE.with(|cache| cache.borrow_mut().insert(key.to_string(), rc_bytes.clone()));
return Ok(rc_bytes);
}
console_log!("[Coder] Ladataan {}...", key);
let window = web_sys::window().unwrap();
@@ -68,11 +84,14 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Res
}
}
console_log!("[Coder] Tallennetaan {} ({} MB)...", key, data.len() / 1024 / 1024);
console_log!("[Coder] Tallennetaan {} ({} MB) IndexedDB:hen...", key, data.len() / 1024 / 1024);
let _ = storage::save_to_idb(key, &data).await;
console_log!("[Coder] {} tallennettu!", key);
Ok(data)
let rc_data = Rc::new(data);
RAM_CACHE.with(|cache| cache.borrow_mut().insert(key.to_string(), rc_data.clone()));
Ok(rc_data)
}
/// use_3b: false = 0.5B (nopea), true = 3B (laadukas)
@@ -87,7 +106,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
Ok(b) => b,
Err(e) => { console_log!("[Coder] Tokenizer-virhe: {}", e); return; }
};
let tokenizer = match tokenizers::Tokenizer::from_bytes(&tok_bytes) {
let tokenizer = match tokenizers::Tokenizer::from_bytes(&tok_bytes[..]) {
Ok(t) => t,
Err(e) => { console_log!("[Coder] Tokenizer-parsinta: {}", e); return; }
};
@@ -107,9 +126,9 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
Err(e) => { console_log!("[Coder] Malli osa 2 virhe: {}", e); return; }
};
console_log!("[Coder] Rakennetaan 3B-mallia...");
let mut all_tensors = candle_core::safetensors::load_buffer(&part1, &device)
let mut all_tensors = candle_core::safetensors::load_buffer(&part1[..], &device)
.map_err(|e| format!("Part1: {}", e)).unwrap();
let tensors2 = candle_core::safetensors::load_buffer(&part2, &device)
let tensors2 = candle_core::safetensors::load_buffer(&part2[..], &device)
.map_err(|e| format!("Part2: {}", e)).unwrap();
all_tensors.extend(tensors2);
all_tensors
@@ -120,7 +139,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
Err(e) => { console_log!("[Coder] Malli-virhe: {}", e); return; }
};
console_log!("[Coder] Rakennetaan 0.5B-mallia...");
match candle_core::safetensors::load_buffer(&model_bytes, &device) {
match candle_core::safetensors::load_buffer(&model_bytes[..], &device) {
Ok(t) => t,
Err(e) => { console_log!("[Coder] Safetensors: {}", e); return; }
}
@@ -220,7 +239,14 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
} else {
logits
};
let mut next_token = crate::sampling::sample_top_k(&logits, 10, 5.0);
// Sampling-parametrit
let temperature: f32 = 0.7;
let top_k: usize = 40;
let repetition_penalty: f32 = 1.15;
let mut all_generated: Vec<u32> = Vec::new();
let mut next_token = crate::sampling::sample_top_k_with_penalty(&logits, top_k, temperature, &all_generated, repetition_penalty);
if next_token != eos_token {
if let Ok(text) = tokenizer.decode(&[next_token], true) {
@@ -229,6 +255,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
if let Some(ref tid) = task_id { chunk.as_object_mut().unwrap().insert("task_id".to_string(), serde_json::json!(tid)); }
let _ = ws.borrow().send_with_str(&chunk.to_string());
}
all_generated.push(next_token);
tokens_generated += 1;
}
@@ -252,7 +279,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
} else {
logits
};
next_token = crate::sampling::sample_top_k(&logits, 10, 5.0);
next_token = crate::sampling::sample_top_k_with_penalty(&logits, top_k, temperature, &all_generated, repetition_penalty);
pos += 1;
if next_token == eos_token { break; }
@@ -263,6 +290,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
if let Some(ref tid) = task_id { chunk.as_object_mut().unwrap().insert("task_id".to_string(), serde_json::json!(tid)); }
let _ = ws.borrow().send_with_str(&chunk.to_string());
}
all_generated.push(next_token);
tokens_generated += 1;
// Yield — vapautetaan selaimen event loop joka tokenin jälkeen