Native-noden oletusmalli vaihdettu kvantisoiduksi: qwen2.5-coder:7b-instruct-q4_K_M

Q4-kvantisointi: ~4GB (vs. 7GB), ~40 tok/s M2:lla (vs. ~25 tok/s). Parempi nopeus/laatu-suhde. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 20:35:11 +03:00
parent fc1fb33d5e
commit 9983c80ef1
1 changed files with 1 additions and 1 deletions
--- a/network-poc/native-node/src/inference.rs
+++ b/network-poc/native-node/src/inference.rs
@@ -9,7 +9,7 @@ pub struct LlmEngine {

 impl LlmEngine {
    pub async fn load() -> Result<Self, String> {
-        let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b".to_string());
+        let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b-instruct-q4_K_M".to_string());

        let client = reqwest::Client::builder()
            .timeout(std::time::Duration::from_secs(600))