1.5B Q4_K_M: vaihdettu 3B→1.5B koska 3B ei mahdu WASM:iin (~1 GB vs ~2 GB)
3B GGUF vaati ~5 GB muistia parsinnassa → SIGILL WASM:n 4 GB rajalla. 1.5B Q4_K_M on ~1 GB ja mahtuu turvallisesti selaimeen. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -18,9 +18,9 @@ macro_rules! console_log {
|
|||||||
const MODEL_05B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct/resolve/main/model.safetensors";
|
const MODEL_05B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct/resolve/main/model.safetensors";
|
||||||
const TOKENIZER_05B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct/resolve/main/tokenizer.json";
|
const TOKENIZER_05B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct/resolve/main/tokenizer.json";
|
||||||
|
|
||||||
// 3B GGUF Q4_K_M — kvantisoidtu, mahtuu selaimeen (~1.9 GB)
|
// 1.5B GGUF Q4_K_M — kvantisoidtu, mahtuu selaimeen (~1 GB)
|
||||||
const MODEL_3B_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q4_k_m.gguf";
|
const MODEL_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf";
|
||||||
const TOKENIZER_3B_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct/resolve/main/tokenizer.json";
|
const TOKENIZER_GGUF_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct/resolve/main/tokenizer.json";
|
||||||
|
|
||||||
enum CoderModel {
|
enum CoderModel {
|
||||||
Full(QwenModel),
|
Full(QwenModel),
|
||||||
@@ -207,8 +207,8 @@ async fn get_or_build_model(use_3b: bool, ws: &Rc<RefCell<WebSocket>>) -> Result
|
|||||||
let dtype = DType::F32;
|
let dtype = DType::F32;
|
||||||
|
|
||||||
// Tokenizer
|
// Tokenizer
|
||||||
let tok_url = if use_3b { TOKENIZER_3B_URL } else { TOKENIZER_05B_URL };
|
let tok_url = if use_3b { TOKENIZER_GGUF_URL } else { TOKENIZER_05B_URL };
|
||||||
let tok_key = if use_3b { "coder3b-tokenizer.json" } else { "coder05b-tokenizer.json" };
|
let tok_key = if use_3b { "coder15b-tokenizer.json" } else { "coder05b-tokenizer.json" };
|
||||||
let tok_bytes = ensure_cached(tok_key, tok_url, ws).await?;
|
let tok_bytes = ensure_cached(tok_key, tok_url, ws).await?;
|
||||||
let tokenizer = tokenizers::Tokenizer::from_bytes(&tok_bytes[..])
|
let tokenizer = tokenizers::Tokenizer::from_bytes(&tok_bytes[..])
|
||||||
.map_err(|e| format!("Tokenizer: {}", e))?;
|
.map_err(|e| format!("Tokenizer: {}", e))?;
|
||||||
@@ -216,8 +216,8 @@ async fn get_or_build_model(use_3b: bool, ws: &Rc<RefCell<WebSocket>>) -> Result
|
|||||||
// Painot
|
// Painot
|
||||||
let model = if use_3b {
|
let model = if use_3b {
|
||||||
// GGUF Q4_K_M — kvantisoidtu 3B-malli (~1.9 GB)
|
// GGUF Q4_K_M — kvantisoidtu 3B-malli (~1.9 GB)
|
||||||
let gguf_bytes = ensure_cached("coder3b-q4km.gguf", MODEL_3B_GGUF_URL, ws).await?;
|
let gguf_bytes = ensure_cached("coder15b-q4km.gguf", MODEL_GGUF_URL, ws).await?;
|
||||||
console_log!("[Coder] Rakennetaan kvantisoidun 3B-mallia (Q4_K_M)...");
|
console_log!("[Coder] Rakennetaan kvantisoidun 1.5B-mallia (Q4_K_M)...");
|
||||||
let mut cursor = std::io::Cursor::new(&gguf_bytes[..]);
|
let mut cursor = std::io::Cursor::new(&gguf_bytes[..]);
|
||||||
let content = gguf_file::Content::read(&mut cursor)
|
let content = gguf_file::Content::read(&mut cursor)
|
||||||
.map_err(|e| format!("GGUF parse: {}", e))?;
|
.map_err(|e| format!("GGUF parse: {}", e))?;
|
||||||
|
|||||||
@@ -2229,7 +2229,7 @@ Write the corrected code.`;
|
|||||||
// Mallikatalogista valinta numerolla tai nimellä
|
// Mallikatalogista valinta numerolla tai nimellä
|
||||||
const loadModels = [
|
const loadModels = [
|
||||||
{ id: '1', key: '05b', name: 'Qwen2.5-Coder:0.5B', size: '~990 MB', coderSize: '05b' },
|
{ id: '1', key: '05b', name: 'Qwen2.5-Coder:0.5B', size: '~990 MB', coderSize: '05b' },
|
||||||
{ id: '2', key: '3b', name: 'Qwen2.5-Coder:3B Q4', size: '~1.9 GB', coderSize: '3b' },
|
{ id: '2', key: '3b', name: 'Qwen2.5-Coder:1.5B Q4', size: '~1 GB', coderSize: '3b' },
|
||||||
];
|
];
|
||||||
if (!arg) {
|
if (!arg) {
|
||||||
// Näytetään lista
|
// Näytetään lista
|
||||||
@@ -2268,7 +2268,7 @@ Write the corrected code.`;
|
|||||||
if (sub === 'models') {
|
if (sub === 'models') {
|
||||||
termLog(' Käytettävissä olevat mallit:', '#c9d1d9');
|
termLog(' Käytettävissä olevat mallit:', '#c9d1d9');
|
||||||
termLog(' <span style="color:#58a6ff">1</span> qwen-coder Qwen2.5-Coder:0.5B <span style="color:#8b949e">~990 MB | koodin generointi</span>');
|
termLog(' <span style="color:#58a6ff">1</span> qwen-coder Qwen2.5-Coder:0.5B <span style="color:#8b949e">~990 MB | koodin generointi</span>');
|
||||||
termLog(' <span style="color:#58a6ff">2</span> qwen-coder-3b Qwen2.5-Coder:3B Q4 <span style="color:#8b949e">~1.9 GB | kvantisoidtu, parempi laatu</span>');
|
termLog(' <span style="color:#58a6ff">2</span> qwen-coder-3b Qwen2.5-Coder:1.5B Q4 <span style="color:#8b949e">~1 GB | kvantisoidtu, parempi laatu</span>');
|
||||||
termLog(' <span style="color:#58a6ff">3</span> smollm-135m SmolLM 135M <span style="color:#8b949e">~270 MB | kevyt, nopea</span>');
|
termLog(' <span style="color:#58a6ff">3</span> smollm-135m SmolLM 135M <span style="color:#8b949e">~270 MB | kevyt, nopea</span>');
|
||||||
termLog(' <span style="color:#58a6ff">4</span> qwen-05b Qwen2.5:0.5B <span style="color:#8b949e">~990 MB | yleismalli</span>');
|
termLog(' <span style="color:#58a6ff">4</span> qwen-05b Qwen2.5:0.5B <span style="color:#8b949e">~990 MB | yleismalli</span>');
|
||||||
termLog(' <span style="color:#58a6ff">5</span> phi3-mini Phi-3 Mini <span style="color:#8b949e">~2.2 GB | Microsoftin malli</span>');
|
termLog(' <span style="color:#58a6ff">5</span> phi3-mini Phi-3 Mini <span style="color:#8b949e">~2.2 GB | Microsoftin malli</span>');
|
||||||
@@ -3181,7 +3181,7 @@ Write the corrected code.`;
|
|||||||
// Terminaaliin valmis-viesti
|
// Terminaaliin valmis-viesti
|
||||||
const term = document.getElementById('agent-terminal');
|
const term = document.getElementById('agent-terminal');
|
||||||
if (term) {
|
if (term) {
|
||||||
const sLabel = coderSize === '3b' ? 'Qwen2.5-Coder:3B' : 'Qwen2.5-Coder:0.5B';
|
const sLabel = coderSize === '3b' ? 'Qwen2.5-Coder:1.5B Q4' : 'Qwen2.5-Coder:0.5B';
|
||||||
termLog(` <span style="color:#3fb950">✓</span> ${sLabel} valmis — kpn run coder "prompti"`, '#3fb950');
|
termLog(` <span style="color:#3fb950">✓</span> ${sLabel} valmis — kpn run coder "prompti"`, '#3fb950');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user