1.5B Q4_K_M: vaihdettu 3B→1.5B koska 3B ei mahdu WASM:iin (~1 GB vs ~2 GB)

3B GGUF vaati ~5 GB muistia parsinnassa → SIGILL WASM:n 4 GB rajalla.
1.5B Q4_K_M on ~1 GB ja mahtuu turvallisesti selaimeen.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-06 16:14:41 +03:00
parent eb69893124
commit e652bf7ab6
2 changed files with 10 additions and 10 deletions

View File

@@ -18,9 +18,9 @@ macro_rules! console_log {
const MODEL_05B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct/resolve/main/model.safetensors";
const TOKENIZER_05B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct/resolve/main/tokenizer.json";
// 3B GGUF Q4_K_M — kvantisoidtu, mahtuu selaimeen (~1.9 GB)
const MODEL_3B_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q4_k_m.gguf";
const TOKENIZER_3B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct/resolve/main/tokenizer.json";
// 1.5B GGUF Q4_K_M — kvantisoidtu, mahtuu selaimeen (~1 GB)
const MODEL_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf";
const TOKENIZER_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct/resolve/main/tokenizer.json";
enum CoderModel {
Full(QwenModel),
@@ -207,8 +207,8 @@ async fn get_or_build_model(use_3b: bool, ws: &Rc<RefCell<WebSocket>>) -> Result
let dtype = DType::F32;
// Tokenizer
let tok_url = if use_3b { TOKENIZER_3B_URL } else { TOKENIZER_05B_URL };
let tok_key = if use_3b { "coder3b-tokenizer.json" } else { "coder05b-tokenizer.json" };
let tok_url = if use_3b { TOKENIZER_GGUF_URL } else { TOKENIZER_05B_URL };
let tok_key = if use_3b { "coder15b-tokenizer.json" } else { "coder05b-tokenizer.json" };
let tok_bytes = ensure_cached(tok_key, tok_url, ws).await?;
let tokenizer = tokenizers::Tokenizer::from_bytes(&tok_bytes[..])
.map_err(|e| format!("Tokenizer: {}", e))?;
@@ -216,8 +216,8 @@ async fn get_or_build_model(use_3b: bool, ws: &Rc<RefCell<WebSocket>>) -> Result
// Painot
let model = if use_3b {
// GGUF Q4_K_M — kvantisoidtu 3B-malli (~1.9 GB)
let gguf_bytes = ensure_cached("coder3b-q4km.gguf", MODEL_3B_GGUF_URL, ws).await?;
console_log!("[Coder] Rakennetaan kvantisoidun 3B-mallia (Q4_K_M)...");
let gguf_bytes = ensure_cached("coder15b-q4km.gguf", MODEL_GGUF_URL, ws).await?;
console_log!("[Coder] Rakennetaan kvantisoidun 1.5B-mallia (Q4_K_M)...");
let mut cursor = std::io::Cursor::new(&gguf_bytes[..]);
let content = gguf_file::Content::read(&mut cursor)
.map_err(|e| format!("GGUF parse: {}", e))?;

View File

@@ -2229,7 +2229,7 @@ Write the corrected code.`;
// Mallikatalogista valinta numerolla tai nimellä
const loadModels = [
{ id: '1', key: '05b', name: 'Qwen2.5-Coder:0.5B', size: '~990 MB', coderSize: '05b' },
{ id: '2', key: '3b', name: 'Qwen2.5-Coder:3B Q4', size: '~1.9 GB', coderSize: '3b' },
{ id: '2', key: '3b', name: 'Qwen2.5-Coder:1.5B Q4', size: '~1 GB', coderSize: '3b' },
];
if (!arg) {
// Näytetään lista
@@ -2268,7 +2268,7 @@ Write the corrected code.`;
if (sub === 'models') {
termLog(' Käytettävissä olevat mallit:', '#c9d1d9');
termLog(' <span style="color:#58a6ff">1</span> qwen-coder Qwen2.5-Coder:0.5B <span style="color:#8b949e">~990 MB | koodin generointi</span>');
termLog(' <span style="color:#58a6ff">2</span> qwen-coder-3b Qwen2.5-Coder:3B Q4 <span style="color:#8b949e">~1.9 GB | kvantisoidtu, parempi laatu</span>');
termLog(' <span style="color:#58a6ff">2</span> qwen-coder-3b Qwen2.5-Coder:1.5B Q4 <span style="color:#8b949e">~1 GB | kvantisoidtu, parempi laatu</span>');
termLog(' <span style="color:#58a6ff">3</span> smollm-135m SmolLM 135M <span style="color:#8b949e">~270 MB | kevyt, nopea</span>');
termLog(' <span style="color:#58a6ff">4</span> qwen-05b Qwen2.5:0.5B <span style="color:#8b949e">~990 MB | yleismalli</span>');
termLog(' <span style="color:#58a6ff">5</span> phi3-mini Phi-3 Mini <span style="color:#8b949e">~2.2 GB | Microsoftin malli</span>');
@@ -3181,7 +3181,7 @@ Write the corrected code.`;
// Terminaaliin valmis-viesti
const term = document.getElementById('agent-terminal');
if (term) {
const sLabel = coderSize === '3b' ? 'Qwen2.5-Coder:3B' : 'Qwen2.5-Coder:0.5B';
const sLabel = coderSize === '3b' ? 'Qwen2.5-Coder:1.5B Q4' : 'Qwen2.5-Coder:0.5B';
termLog(` <span style="color:#3fb950">✓</span> ${sLabel} valmis — kpn run coder "prompti"`, '#3fb950');
}
}