Native-noden oletusmalli vaihdettu kvantisoiduksi: qwen2.5-coder:7b-instruct-q4_K_M
Q4-kvantisointi: ~4GB (vs. 7GB), ~40 tok/s M2:lla (vs. ~25 tok/s). Parempi nopeus/laatu-suhde. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -9,7 +9,7 @@ pub struct LlmEngine {
|
||||
|
||||
impl LlmEngine {
|
||||
pub async fn load() -> Result<Self, String> {
|
||||
let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b".to_string());
|
||||
let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b-instruct-q4_K_M".to_string());
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(600))
|
||||
|
||||
Reference in New Issue
Block a user