use std::time::Instant; use std::cell::RefCell; pub struct LlmEngine { ollama_url: String, model: RefCell, client: reqwest::Client, } impl LlmEngine { pub async fn load() -> Result { let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b".to_string()); let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(600)) .connect_timeout(std::time::Duration::from_secs(3)) .build() .map_err(|e| format!("HTTP client: {}", e))?; // Jos OLLAMA_URL on asetettu, käytetään sitä suoraan let ollama_url = if let Ok(url) = std::env::var("OLLAMA_URL") { tracing::info!("Ollama backend (env): {}", url); url } else { // Haistellaan Ollamaa tunnetuista osoitteista let candidates = [ "http://localhost:11434", "http://127.0.0.1:11434", "http://ollama:11434", "http://host.docker.internal:11434", ]; let mut found = None; for url in &candidates { let probe = reqwest::Client::builder() .connect_timeout(std::time::Duration::from_secs(2)) .build().unwrap_or(client.clone()); if let Ok(resp) = probe.get(format!("{}/api/version", url)).send().await { if resp.status().is_success() { tracing::info!("Ollama löytyi osoitteesta: {}", url); found = Some(url.to_string()); break; } } } found.unwrap_or_else(|| { tracing::warn!("Ollamaa ei löytynyt — käytetään oletusta http://localhost:11434"); "http://localhost:11434".to_string() }) }; tracing::info!("Ollama backend: {} | malli: {}", ollama_url, model); Ok(LlmEngine { ollama_url, model: RefCell::new(model), client }) } pub fn model_name(&self) -> String { self.model.borrow().clone() } pub fn set_model(&self, new_model: String) { *self.model.borrow_mut() = new_model; } /// Varmistaa että malli on ladattu Ollamaan (ollama pull) pub async fn ensure_model(&self) -> Result<(), String> { let model = self.model.borrow().clone(); tracing::info!("Tarkistetaan malli {}...", model); let resp = self.client.post(format!("{}/api/pull", self.ollama_url)) .json(&serde_json::json!({ "name": model, "stream": false })) .send() .await .map_err(|e| format!("Ollama pull: {}", e))?; if resp.status().is_success() { tracing::info!("Malli {} valmis", model); Ok(()) } else { Err(format!("Ollama pull epäonnistui: {}", resp.status())) } } pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result { let system = "You are a coding assistant. Respond with ONLY code. Use proper newlines and indentation. No explanations, no markdown fences, no comments unless asked."; let model = self.model.borrow().clone(); let start = Instant::now(); let resp = self.client.post(format!("{}/api/generate", self.ollama_url)) .json(&serde_json::json!({ "model": model, "prompt": prompt, "system": system, "stream": false, "options": { "num_predict": max_tokens, "temperature": 0.7, "top_k": 40, "repeat_penalty": 1.15, "stop": ["<|im_end|>", "\n###", "\nExplanation", "\nNote:"] } })) .send() .await .map_err(|e| format!("Ollama generate: {}", e))?; if !resp.status().is_success() { return Err(format!("Ollama HTTP {}", resp.status())); } let body: serde_json::Value = resp.json().await .map_err(|e| format!("Ollama JSON: {}", e))?; let text = body["response"].as_str().unwrap_or("").to_string(); let total_duration_ns = body["total_duration"].as_u64().unwrap_or(0); let eval_count = body["eval_count"].as_u64().unwrap_or(0) as usize; let eval_duration_ns = body["eval_duration"].as_u64().unwrap_or(1); let duration_ms = start.elapsed().as_millis() as f64; let tokens_per_sec = if eval_duration_ns > 0 { eval_count as f64 / (eval_duration_ns as f64 / 1_000_000_000.0) } else { 0.0 }; Ok(GenerateResult { text: strip_code_fences(&text), tokens_generated: eval_count, duration_ms, tokens_per_sec, }) } } /// Siivoa mahdolliset markdown-koodiblokki-merkit fn strip_code_fences(text: &str) -> String { let mut result = text.trim().to_string(); // Poista aloittava ```lang if result.starts_with("```") { if let Some(nl) = result.find('\n') { result = result[nl + 1..].to_string(); } } // Poista sulkeva ``` let trimmed = result.trim_end(); if trimmed.ends_with("```") { let before = &trimmed[..trimmed.len() - 3]; if before.is_empty() || before.ends_with('\n') { result = before.trim_end().to_string(); } } result } pub struct GenerateResult { pub text: String, pub tokens_generated: usize, pub duration_ms: f64, pub tokens_per_sec: f64, }