From 9983c80ef10e2d58c61e7dd9834eb464b492b733 Mon Sep 17 00:00:00 2001 From: Jaakko Vanhala Date: Thu, 9 Apr 2026 20:35:11 +0300 Subject: [PATCH] Native-noden oletusmalli vaihdettu kvantisoiduksi: qwen2.5-coder:7b-instruct-q4_K_M Q4-kvantisointi: ~4GB (vs. 7GB), ~40 tok/s M2:lla (vs. ~25 tok/s). Parempi nopeus/laatu-suhde. Co-Authored-By: Claude Opus 4.6 (1M context) --- network-poc/native-node/src/inference.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network-poc/native-node/src/inference.rs b/network-poc/native-node/src/inference.rs index 8df0381..7c4c6cf 100644 --- a/network-poc/native-node/src/inference.rs +++ b/network-poc/native-node/src/inference.rs @@ -9,7 +9,7 @@ pub struct LlmEngine { impl LlmEngine { pub async fn load() -> Result { - let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b".to_string()); + let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b-instruct-q4_K_M".to_string()); let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(600))