From e652bf7ab6f7f769401172e10c8fdf17f69cc6a3 Mon Sep 17 00:00:00 2001 From: jaakko Date: Mon, 6 Apr 2026 16:14:41 +0300 Subject: [PATCH] =?UTF-8?q?1.5B=20Q4=5FK=5FM:=20vaihdettu=203B=E2=86=921.5?= =?UTF-8?q?B=20koska=203B=20ei=20mahdu=20WASM:iin=20(~1=20GB=20vs=20~2=20G?= =?UTF-8?q?B)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 3B GGUF vaati ~5 GB muistia parsinnassa → SIGILL WASM:n 4 GB rajalla. 1.5B Q4_K_M on ~1 GB ja mahtuu turvallisesti selaimeen. Co-Authored-By: Claude Opus 4.6 (1M context) --- network-poc/node/src/qwen_coder.rs | 14 +++++++------- network-poc/static/index.html | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/network-poc/node/src/qwen_coder.rs b/network-poc/node/src/qwen_coder.rs index d0fb00d..a83854a 100644 --- a/network-poc/node/src/qwen_coder.rs +++ b/network-poc/node/src/qwen_coder.rs @@ -18,9 +18,9 @@ macro_rules! console_log { const MODEL_05B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct/resolve/main/model.safetensors"; const TOKENIZER_05B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct/resolve/main/tokenizer.json"; -// 3B GGUF Q4_K_M — kvantisoidtu, mahtuu selaimeen (~1.9 GB) -const MODEL_3B_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q4_k_m.gguf"; -const TOKENIZER_3B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct/resolve/main/tokenizer.json"; +// 1.5B GGUF Q4_K_M — kvantisoidtu, mahtuu selaimeen (~1 GB) +const MODEL_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"; +const TOKENIZER_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct/resolve/main/tokenizer.json"; enum CoderModel { Full(QwenModel), @@ -207,8 +207,8 @@ async fn get_or_build_model(use_3b: bool, ws: &Rc>) -> Result let dtype = DType::F32; // Tokenizer - let tok_url = if use_3b { TOKENIZER_3B_URL } else { TOKENIZER_05B_URL }; - let tok_key = if use_3b { "coder3b-tokenizer.json" } else { "coder05b-tokenizer.json" }; + let tok_url = if use_3b { TOKENIZER_GGUF_URL } else { TOKENIZER_05B_URL }; + let tok_key = if use_3b { "coder15b-tokenizer.json" } else { "coder05b-tokenizer.json" }; let tok_bytes = ensure_cached(tok_key, tok_url, ws).await?; let tokenizer = tokenizers::Tokenizer::from_bytes(&tok_bytes[..]) .map_err(|e| format!("Tokenizer: {}", e))?; @@ -216,8 +216,8 @@ async fn get_or_build_model(use_3b: bool, ws: &Rc>) -> Result // Painot let model = if use_3b { // GGUF Q4_K_M — kvantisoidtu 3B-malli (~1.9 GB) - let gguf_bytes = ensure_cached("coder3b-q4km.gguf", MODEL_3B_GGUF_URL, ws).await?; - console_log!("[Coder] Rakennetaan kvantisoidun 3B-mallia (Q4_K_M)..."); + let gguf_bytes = ensure_cached("coder15b-q4km.gguf", MODEL_GGUF_URL, ws).await?; + console_log!("[Coder] Rakennetaan kvantisoidun 1.5B-mallia (Q4_K_M)..."); let mut cursor = std::io::Cursor::new(&gguf_bytes[..]); let content = gguf_file::Content::read(&mut cursor) .map_err(|e| format!("GGUF parse: {}", e))?; diff --git a/network-poc/static/index.html b/network-poc/static/index.html index 8b291d6..f9ce1d7 100644 --- a/network-poc/static/index.html +++ b/network-poc/static/index.html @@ -2229,7 +2229,7 @@ Write the corrected code.`; // Mallikatalogista valinta numerolla tai nimellä const loadModels = [ { id: '1', key: '05b', name: 'Qwen2.5-Coder:0.5B', size: '~990 MB', coderSize: '05b' }, - { id: '2', key: '3b', name: 'Qwen2.5-Coder:3B Q4', size: '~1.9 GB', coderSize: '3b' }, + { id: '2', key: '3b', name: 'Qwen2.5-Coder:1.5B Q4', size: '~1 GB', coderSize: '3b' }, ]; if (!arg) { // Näytetään lista @@ -2268,7 +2268,7 @@ Write the corrected code.`; if (sub === 'models') { termLog(' Käytettävissä olevat mallit:', '#c9d1d9'); termLog(' 1 qwen-coder Qwen2.5-Coder:0.5B ~990 MB | koodin generointi'); - termLog(' 2 qwen-coder-3b Qwen2.5-Coder:3B Q4 ~1.9 GB | kvantisoidtu, parempi laatu'); + termLog(' 2 qwen-coder-3b Qwen2.5-Coder:1.5B Q4 ~1 GB | kvantisoidtu, parempi laatu'); termLog(' 3 smollm-135m SmolLM 135M ~270 MB | kevyt, nopea'); termLog(' 4 qwen-05b Qwen2.5:0.5B ~990 MB | yleismalli'); termLog(' 5 phi3-mini Phi-3 Mini ~2.2 GB | Microsoftin malli'); @@ -3181,7 +3181,7 @@ Write the corrected code.`; // Terminaaliin valmis-viesti const term = document.getElementById('agent-terminal'); if (term) { - const sLabel = coderSize === '3b' ? 'Qwen2.5-Coder:3B' : 'Qwen2.5-Coder:0.5B'; + const sLabel = coderSize === '3b' ? 'Qwen2.5-Coder:1.5B Q4' : 'Qwen2.5-Coder:0.5B'; termLog(` ${sLabel} valmis — kpn run coder "prompti"`, '#3fb950'); } }