Native-noden oletusmalli vaihdettu kvantisoiduksi: qwen2.5-coder:7b-instruct-q4_K_M

Q4-kvantisointi: ~4GB (vs. 7GB), ~40 tok/s M2:lla (vs. ~25 tok/s).
Parempi nopeus/laatu-suhde.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jaakko Vanhala
2026-04-09 20:35:11 +03:00
parent fc1fb33d5e
commit 9983c80ef1

View File

@@ -9,7 +9,7 @@ pub struct LlmEngine {
impl LlmEngine {
pub async fn load() -> Result<Self, String> {
let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b".to_string());
let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b-instruct-q4_K_M".to_string());
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(600))