Päivitetty juttuja

This commit is contained in:
Jaakko Vanhala
2026-04-04 21:13:20 +03:00
parent 2e7ddf6f1e
commit 3ada8949d0
11 changed files with 457 additions and 105 deletions

BIN
network-poc/node/nodes.db Normal file

Binary file not shown.

View File

@@ -130,8 +130,9 @@ async fn run_single_tokenize(text: String, ws: Rc<RefCell<WebSocket>>) {
let token_count = result["token_count"].as_u64().unwrap_or(0);
let cpt = result["chars_per_token"].as_f64().unwrap_or(0.0);
let preview: String = text.chars().take(50).collect();
console_log!("Tokenisaatio: \"{}\" → {} tokenia | {:.2} m/t | {:.2}ms",
&text[..text.len().min(50)], token_count, cpt, duration_ms);
preview, token_count, cpt, duration_ms);
let msg = serde_json::json!({
"type": "single_tokenize_done",
@@ -270,7 +271,8 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
if LLM_BUSY.load(Ordering::SeqCst) {
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
if !prompt.is_empty() {
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
if !prompt.is_empty() && model == "qwen-05b" {
LLM_BUSY.store(true, Ordering::SeqCst);
let ws_for_async = ws_clone.clone();
wasm_bindgen_futures::spawn_local(async move {
@@ -284,7 +286,8 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
if LLM_BUSY.load(Ordering::SeqCst) {
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
if !prompt.is_empty() {
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
if !prompt.is_empty() && model.starts_with("phi3-mini") {
LLM_BUSY.store(true, Ordering::SeqCst);
let ws_for_async = ws_clone.clone();
wasm_bindgen_futures::spawn_local(async move {
@@ -295,18 +298,30 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
}
} else if msg.contains("llm_prompt") && (current_task == 4 || current_task == 5) {
// Qwen2.5-Coder: 4 = 0.5B, 5 = 3B
if LLM_BUSY.load(Ordering::SeqCst) {
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
let task_id = task.get("task_id").and_then(|v| v.as_str()).map(|s| s.to_string());
if !prompt.is_empty() {
let use_3b = current_task == 5;
LLM_BUSY.store(true, Ordering::SeqCst);
let ws_for_async = ws_clone.clone();
wasm_bindgen_futures::spawn_local(async move {
qwen_coder::run_coder_inference(prompt, ws_for_async, use_3b, task_id).await;
LLM_BUSY.store(false, Ordering::SeqCst);
});
if !prompt.is_empty() && model.starts_with("qwen-coder") {
if LLM_BUSY.load(Ordering::SeqCst) {
if let Some(tid) = task_id {
let err_msg = serde_json::json!({
"type": "llm_error",
"task_id": tid,
"error": "Solmu on paraikaa varattuna toisen tehtävän suorittamiseen"
});
let _ = ws_clone.borrow().send_with_str(&err_msg.to_string());
}
} else {
let use_3b = current_task == 5;
LLM_BUSY.store(true, Ordering::SeqCst);
let ws_for_async = ws_clone.clone();
wasm_bindgen_futures::spawn_local(async move {
qwen_coder::run_coder_inference(prompt, ws_for_async, use_3b, task_id).await;
LLM_BUSY.store(false, Ordering::SeqCst);
});
}
}
}
} else if msg.contains("ai_task") {

View File

@@ -21,12 +21,28 @@ const MODEL_3B_PART1_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-I
const MODEL_3B_PART2_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct/resolve/main/model-00002-of-00002.safetensors";
const TOKENIZER_3B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct/resolve/main/tokenizer.json";
async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Result<Vec<u8>, String> {
if let Ok(Some(bytes)) = storage::load_from_idb(key).await {
console_log!("[Coder] {} löytyi välimuistista ({} MB)", key, bytes.len() / 1024 / 1024);
thread_local! {
static RAM_CACHE: RefCell<std::collections::HashMap<String, Rc<Vec<u8>>>> = RefCell::new(std::collections::HashMap::new());
}
async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Result<Rc<Vec<u8>>, String> {
// 1. Tarkistetaan RAM välimuisti (estää OOM ja levy-I/O pullonkaulat)
let ram_hit = RAM_CACHE.with(|cache| {
cache.borrow().get(key).cloned()
});
if let Some(bytes) = ram_hit {
console_log!("[Coder] {} löytyi nopeasta RAM-välimuistista!", key);
return Ok(bytes);
}
// 2. Tarkistetaan IndexedDB (jos selain on suljettu aikaisemmin)
if let Ok(Some(bytes)) = storage::load_from_idb(key).await {
console_log!("[Coder] {} löytyi IndexedDB-välimuistista ({} MB)", key, bytes.len() / 1024 / 1024);
let rc_bytes = Rc::new(bytes);
RAM_CACHE.with(|cache| cache.borrow_mut().insert(key.to_string(), rc_bytes.clone()));
return Ok(rc_bytes);
}
console_log!("[Coder] Ladataan {}...", key);
let window = web_sys::window().unwrap();
@@ -68,11 +84,14 @@ async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Res
}
}
console_log!("[Coder] Tallennetaan {} ({} MB)...", key, data.len() / 1024 / 1024);
console_log!("[Coder] Tallennetaan {} ({} MB) IndexedDB:hen...", key, data.len() / 1024 / 1024);
let _ = storage::save_to_idb(key, &data).await;
console_log!("[Coder] {} tallennettu!", key);
Ok(data)
let rc_data = Rc::new(data);
RAM_CACHE.with(|cache| cache.borrow_mut().insert(key.to_string(), rc_data.clone()));
Ok(rc_data)
}
/// use_3b: false = 0.5B (nopea), true = 3B (laadukas)
@@ -87,7 +106,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
Ok(b) => b,
Err(e) => { console_log!("[Coder] Tokenizer-virhe: {}", e); return; }
};
let tokenizer = match tokenizers::Tokenizer::from_bytes(&tok_bytes) {
let tokenizer = match tokenizers::Tokenizer::from_bytes(&tok_bytes[..]) {
Ok(t) => t,
Err(e) => { console_log!("[Coder] Tokenizer-parsinta: {}", e); return; }
};
@@ -107,9 +126,9 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
Err(e) => { console_log!("[Coder] Malli osa 2 virhe: {}", e); return; }
};
console_log!("[Coder] Rakennetaan 3B-mallia...");
let mut all_tensors = candle_core::safetensors::load_buffer(&part1, &device)
let mut all_tensors = candle_core::safetensors::load_buffer(&part1[..], &device)
.map_err(|e| format!("Part1: {}", e)).unwrap();
let tensors2 = candle_core::safetensors::load_buffer(&part2, &device)
let tensors2 = candle_core::safetensors::load_buffer(&part2[..], &device)
.map_err(|e| format!("Part2: {}", e)).unwrap();
all_tensors.extend(tensors2);
all_tensors
@@ -120,7 +139,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
Err(e) => { console_log!("[Coder] Malli-virhe: {}", e); return; }
};
console_log!("[Coder] Rakennetaan 0.5B-mallia...");
match candle_core::safetensors::load_buffer(&model_bytes, &device) {
match candle_core::safetensors::load_buffer(&model_bytes[..], &device) {
Ok(t) => t,
Err(e) => { console_log!("[Coder] Safetensors: {}", e); return; }
}
@@ -220,7 +239,14 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
} else {
logits
};
let mut next_token = crate::sampling::sample_top_k(&logits, 10, 5.0);
// Sampling-parametrit
let temperature: f32 = 0.7;
let top_k: usize = 40;
let repetition_penalty: f32 = 1.15;
let mut all_generated: Vec<u32> = Vec::new();
let mut next_token = crate::sampling::sample_top_k_with_penalty(&logits, top_k, temperature, &all_generated, repetition_penalty);
if next_token != eos_token {
if let Ok(text) = tokenizer.decode(&[next_token], true) {
@@ -229,6 +255,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
if let Some(ref tid) = task_id { chunk.as_object_mut().unwrap().insert("task_id".to_string(), serde_json::json!(tid)); }
let _ = ws.borrow().send_with_str(&chunk.to_string());
}
all_generated.push(next_token);
tokens_generated += 1;
}
@@ -252,7 +279,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
} else {
logits
};
next_token = crate::sampling::sample_top_k(&logits, 10, 5.0);
next_token = crate::sampling::sample_top_k_with_penalty(&logits, top_k, temperature, &all_generated, repetition_penalty);
pos += 1;
if next_token == eos_token { break; }
@@ -263,6 +290,7 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
if let Some(ref tid) = task_id { chunk.as_object_mut().unwrap().insert("task_id".to_string(), serde_json::json!(tid)); }
let _ = ws.borrow().send_with_str(&chunk.to_string());
}
all_generated.push(next_token);
tokens_generated += 1;
// Yield — vapautetaan selaimen event loop joka tokenin jälkeen

View File

@@ -1,39 +1,105 @@
use candle_core::Tensor;
use std::cell::Cell;
/// Top-k sampling ilman softmaxia — kiertää Candlen SoftmaxLastDim Wasm-bugin.
/// Valitsee top-k logiteista ja poimii satunnaisen (painotettu).
/// Jos k=1, toimii kuten argmax (greedy).
pub fn sample_top_k(logits: &Tensor, k: usize, eos_penalty: f32) -> u32 {
// Muunnetaan Vec<f32>:ksi
let logits_vec: Vec<f32> = logits.to_vec1::<f32>().unwrap_or_default();
thread_local! {
static RNG_STATE: Cell<u64> = Cell::new(0);
}
fn next_rand() -> f32 {
RNG_STATE.with(|state| {
let mut s = state.get();
if s == 0 {
s = (js_sys::Date::now() * 1000.0) as u64 | 1;
}
s ^= s << 13;
s ^= s >> 7;
s ^= s << 17;
state.set(s);
(s % 10000) as f32 / 10000.0
})
}
/// Top-k sampling with temperature and repetition penalty.
/// `generated_tokens` sisältää aiemmin generoidut token-id:t toiston estämiseksi.
pub fn sample_top_k_with_penalty(logits: &Tensor, k: usize, temperature: f32, generated_tokens: &[u32], repetition_penalty: f32) -> u32 {
let mut logits_vec: Vec<f32> = logits.to_vec1::<f32>().unwrap_or_default();
if logits_vec.is_empty() { return 0; }
// Rangotaan ja otetaan top-k indeksit
// Repetition penalty
if repetition_penalty != 1.0 {
for &token_id in generated_tokens {
if (token_id as usize) < logits_vec.len() {
let logit = &mut logits_vec[token_id as usize];
if *logit > 0.0 {
*logit /= repetition_penalty;
} else {
*logit *= repetition_penalty;
}
}
}
}
// Temperature scaling
if temperature > 0.0 && temperature != 1.0 {
for logit in logits_vec.iter_mut() {
*logit /= temperature;
}
}
// Top-k
let mut indexed: Vec<(usize, f32)> = logits_vec.iter().enumerate().map(|(i, &v)| (i, v)).collect();
indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
indexed.truncate(k);
// EOS-penaltti: vähennetään EOS-tokenin logitia
for item in indexed.iter_mut() {
if item.0 == 2 || item.0 == 151645 { // SmolLM EOS=2, Qwen EOS=151645
item.1 -= eos_penalty;
}
}
if k == 1 {
if k == 1 || temperature == 0.0 {
return indexed[0].0 as u32;
}
// Yksinkertainen "softmax" top-k:lle CPU:lla
let max_logit = indexed.iter().map(|x| x.1).fold(f32::NEG_INFINITY, f32::max);
// Softmax top-k:lle
let max_logit = indexed[0].1;
let exps: Vec<f32> = indexed.iter().map(|x| (x.1 - max_logit).exp()).collect();
let sum: f32 = exps.iter().sum();
let probs: Vec<f32> = exps.iter().map(|e| e / sum).collect();
// Satunnainen valinta kumulatiivisella todennäköisyydellä
// Käytetään yksinkertaista XorShift-satunnaislukugeneraattoria (ei tarvita getrandom)
let seed = (js_sys::Date::now() * 1000.0) as u64;
let rand_val = ((seed ^ (seed >> 13) ^ (seed << 7)) % 10000) as f32 / 10000.0;
let rand_val = next_rand();
let mut cumulative = 0.0;
for (i, p) in probs.iter().enumerate() {
cumulative += p;
if rand_val < cumulative {
return indexed[i].0 as u32;
}
}
indexed[0].0 as u32
}
/// Alkuperäinen API yhteensopivuudeksi SmolLM/Qwen-moduulien kanssa
pub fn sample_top_k(logits: &Tensor, k: usize, eos_penalty: f32) -> u32 {
let mut logits_vec: Vec<f32> = logits.to_vec1::<f32>().unwrap_or_default();
if logits_vec.is_empty() { return 0; }
// EOS-penaltti
for &eos_id in &[2u32, 151645] {
if (eos_id as usize) < logits_vec.len() {
logits_vec[eos_id as usize] -= eos_penalty;
}
}
let mut indexed: Vec<(usize, f32)> = logits_vec.iter().enumerate().map(|(i, &v)| (i, v)).collect();
indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
indexed.truncate(k);
if k == 1 {
return indexed[0].0 as u32;
}
let max_logit = indexed[0].1;
let exps: Vec<f32> = indexed.iter().map(|x| (x.1 - max_logit).exp()).collect();
let sum: f32 = exps.iter().sum();
let probs: Vec<f32> = exps.iter().map(|e| e / sum).collect();
let rand_val = next_rand();
let mut cumulative = 0.0;
for (i, p) in probs.iter().enumerate() {