Files
agentic-studio/network-poc/native-node/src/inference.rs
jaakko 5d2027b2ca Native-node: automaattinen Ollama-haistelu käynnistyksessä
Jos OLLAMA_URL ei ole asetettu, kokeillaan järjestyksessä:
1. localhost:11434 (paikallinen Ollama)
2. 127.0.0.1:11434
3. ollama:11434 (Docker-verkko)
4. host.docker.internal:11434 (Docker-kontti → isäntä)

Ensimmäinen joka vastaa /api/version-kutsuun valitaan.
Timeout 2s per kokeilu. Jos OLLAMA_URL on asetettu, sitä käytetään suoraan.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 17:41:44 +03:00

159 lines
5.6 KiB
Rust

use std::time::Instant;
use std::cell::RefCell;
pub struct LlmEngine {
ollama_url: String,
model: RefCell<String>,
client: reqwest::Client,
}
impl LlmEngine {
pub async fn load() -> Result<Self, String> {
let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:7b".to_string());
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(600))
.connect_timeout(std::time::Duration::from_secs(3))
.build()
.map_err(|e| format!("HTTP client: {}", e))?;
// Jos OLLAMA_URL on asetettu, käytetään sitä suoraan
let ollama_url = if let Ok(url) = std::env::var("OLLAMA_URL") {
tracing::info!("Ollama backend (env): {}", url);
url
} else {
// Haistellaan Ollamaa tunnetuista osoitteista
let candidates = [
"http://localhost:11434",
"http://127.0.0.1:11434",
"http://ollama:11434",
"http://host.docker.internal:11434",
];
let mut found = None;
for url in &candidates {
let probe = reqwest::Client::builder()
.connect_timeout(std::time::Duration::from_secs(2))
.build().unwrap_or(client.clone());
if let Ok(resp) = probe.get(format!("{}/api/version", url)).send().await {
if resp.status().is_success() {
tracing::info!("Ollama löytyi osoitteesta: {}", url);
found = Some(url.to_string());
break;
}
}
}
found.unwrap_or_else(|| {
tracing::warn!("Ollamaa ei löytynyt — käytetään oletusta http://localhost:11434");
"http://localhost:11434".to_string()
})
};
tracing::info!("Ollama backend: {} | malli: {}", ollama_url, model);
Ok(LlmEngine { ollama_url, model: RefCell::new(model), client })
}
pub fn model_name(&self) -> String {
self.model.borrow().clone()
}
pub fn set_model(&self, new_model: String) {
*self.model.borrow_mut() = new_model;
}
/// Varmistaa että malli on ladattu Ollamaan (ollama pull)
pub async fn ensure_model(&self) -> Result<(), String> {
let model = self.model.borrow().clone();
tracing::info!("Tarkistetaan malli {}...", model);
let resp = self.client.post(format!("{}/api/pull", self.ollama_url))
.json(&serde_json::json!({ "name": model, "stream": false }))
.send()
.await
.map_err(|e| format!("Ollama pull: {}", e))?;
if resp.status().is_success() {
tracing::info!("Malli {} valmis", model);
Ok(())
} else {
Err(format!("Ollama pull epäonnistui: {}", resp.status()))
}
}
pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
let system = "You are a coding assistant. Respond with ONLY code. Use proper newlines and indentation. No explanations, no markdown fences, no comments unless asked.";
let model = self.model.borrow().clone();
let start = Instant::now();
let resp = self.client.post(format!("{}/api/generate", self.ollama_url))
.json(&serde_json::json!({
"model": model,
"prompt": prompt,
"system": system,
"stream": false,
"options": {
"num_predict": max_tokens,
"temperature": 0.7,
"top_k": 40,
"repeat_penalty": 1.15,
"stop": ["<|im_end|>", "\n###", "\nExplanation", "\nNote:"]
}
}))
.send()
.await
.map_err(|e| format!("Ollama generate: {}", e))?;
if !resp.status().is_success() {
return Err(format!("Ollama HTTP {}", resp.status()));
}
let body: serde_json::Value = resp.json().await
.map_err(|e| format!("Ollama JSON: {}", e))?;
let text = body["response"].as_str().unwrap_or("").to_string();
let total_duration_ns = body["total_duration"].as_u64().unwrap_or(0);
let eval_count = body["eval_count"].as_u64().unwrap_or(0) as usize;
let eval_duration_ns = body["eval_duration"].as_u64().unwrap_or(1);
let duration_ms = start.elapsed().as_millis() as f64;
let tokens_per_sec = if eval_duration_ns > 0 {
eval_count as f64 / (eval_duration_ns as f64 / 1_000_000_000.0)
} else { 0.0 };
Ok(GenerateResult {
text: strip_code_fences(&text),
tokens_generated: eval_count,
duration_ms,
tokens_per_sec,
})
}
}
/// Siivoa mahdolliset markdown-koodiblokki-merkit
fn strip_code_fences(text: &str) -> String {
let mut result = text.trim().to_string();
// Poista aloittava ```lang
if result.starts_with("```") {
if let Some(nl) = result.find('\n') {
result = result[nl + 1..].to_string();
}
}
// Poista sulkeva ```
let trimmed = result.trim_end();
if trimmed.ends_with("```") {
let before = &trimmed[..trimmed.len() - 3];
if before.is_empty() || before.ends_with('\n') {
result = before.trim_end().to_string();
}
}
result
}
pub struct GenerateResult {
pub text: String,
pub tokens_generated: usize,
pub duration_ms: f64,
pub tokens_per_sec: f64,
}