diff --git a/network-poc/hub/src/main.rs b/network-poc/hub/src/main.rs index 03fea2e..0a2cf9b 100644 --- a/network-poc/hub/src/main.rs +++ b/network-poc/hub/src/main.rs @@ -384,6 +384,7 @@ async fn main() { .route("/api/pairs", get(api_pairs)) .route("/api/stats", get(api_stats)) .route("/api/v1/chat/completions", axum::routing::post(api_chat_completions)) + .route("/api/v1/model", axum::routing::post(api_change_model)) .route("/admin", get(admin_page)) .nest_service("/", { let static_dir = std::env::var("STATIC_DIR").unwrap_or_else(|_| "../static".to_string()); @@ -958,6 +959,20 @@ struct ChatCompletionResponse { tokens_generated: u64, } +async fn api_change_model( + axum::extract::State(state): axum::extract::State>, + axum::Json(payload): axum::Json, +) -> axum::response::Response { + let model = payload.get("model").and_then(|v| v.as_str()).unwrap_or(""); + if model.is_empty() { + return (axum::http::StatusCode::BAD_REQUEST, "model puuttuu").into_response(); + } + tracing::info!("Mallin vaihto: {}", model); + let msg = serde_json::json!({ "type": "change_model", "model": model }); + let _ = state.stats_tx.send(msg.to_string()); + axum::Json(serde_json::json!({ "status": "ok", "model": model })).into_response() +} + async fn api_chat_completions( axum::extract::State(state): axum::extract::State>, ConnectInfo(addr): ConnectInfo, diff --git a/network-poc/native-node/src/inference.rs b/network-poc/native-node/src/inference.rs index 9018a26..b6bde90 100644 --- a/network-poc/native-node/src/inference.rs +++ b/network-poc/native-node/src/inference.rs @@ -1,8 +1,9 @@ use std::time::Instant; +use std::cell::RefCell; pub struct LlmEngine { ollama_url: String, - model: String, + model: RefCell, client: reqwest::Client, } @@ -18,24 +19,29 @@ impl LlmEngine { .build() .map_err(|e| format!("HTTP client: {}", e))?; - Ok(LlmEngine { ollama_url, model, client }) + Ok(LlmEngine { ollama_url, model: RefCell::new(model), client }) } - pub fn model_name(&self) -> &str { - &self.model + pub fn model_name(&self) -> String { + self.model.borrow().clone() + } + + pub fn set_model(&self, new_model: String) { + *self.model.borrow_mut() = new_model; } /// Varmistaa että malli on ladattu Ollamaan (ollama pull) pub async fn ensure_model(&self) -> Result<(), String> { - tracing::info!("Tarkistetaan malli {}...", self.model); + let model = self.model.borrow().clone(); + tracing::info!("Tarkistetaan malli {}...", model); let resp = self.client.post(format!("{}/api/pull", self.ollama_url)) - .json(&serde_json::json!({ "name": self.model, "stream": false })) + .json(&serde_json::json!({ "name": model, "stream": false })) .send() .await .map_err(|e| format!("Ollama pull: {}", e))?; if resp.status().is_success() { - tracing::info!("Malli {} valmis", self.model); + tracing::info!("Malli {} valmis", model); Ok(()) } else { Err(format!("Ollama pull epäonnistui: {}", resp.status())) @@ -44,11 +50,12 @@ impl LlmEngine { pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result { let system = "You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked."; + let model = self.model.borrow().clone(); let start = Instant::now(); let resp = self.client.post(format!("{}/api/generate", self.ollama_url)) .json(&serde_json::json!({ - "model": self.model, + "model": model, "prompt": prompt, "system": system, "stream": false, diff --git a/network-poc/native-node/src/main.rs b/network-poc/native-node/src/main.rs index 6ec4de9..2fe768b 100644 --- a/network-poc/native-node/src/main.rs +++ b/network-poc/native-node/src/main.rs @@ -366,7 +366,21 @@ async fn main() { } } } - // Ohitetaan pair_task, stats jne. + // Mallin vaihto lennossa + if text.contains("change_model") { + if let Ok(task) = serde_json::from_str::(&text) { + if let Some(new_model) = task.get("model").and_then(|v| v.as_str()) { + if let Some(ref engine) = llm { + tracing::info!("Vaihdetaan malli: {}", new_model); + engine.set_model(new_model.to_string()); + match engine.ensure_model().await { + Ok(()) => tracing::info!("Malli {} valmis!", new_model), + Err(e) => tracing::error!("Mallin lataus epäonnistui: {}", e), + } + } + } + } + } } } tracing::warn!("Yhteys hubiin katkesi — yritetään uudelleen 5s..."); diff --git a/network-poc/static/index.html b/network-poc/static/index.html index 8fd1f9a..e612526 100644 --- a/network-poc/static/index.html +++ b/network-poc/static/index.html @@ -2371,18 +2371,58 @@ Files: ${Object.keys(generatedFiles).join(', ')}`; } if (sub === 'load') { - const btn = document.getElementById('agent-compute-btn'); - if (btn?.dataset.state === 'ready') { - termLog(' ✓ Qwen2.5-Coder:0.5B on jo ladattu ja valmis (selain)', '#3fb950'); - termLog(' Natiivisolmu (Docker) on nopeampi — ks. kpn models'); + const arg = parts[2]; + const ollamaModels = [ + { id: '1', name: 'qwen2.5-coder:0.5b', size: '~400 MB', type: 'selain + Ollama' }, + { id: '2', name: 'qwen2.5-coder:1.5b', size: '~1 GB', type: 'Ollama GPU' }, + { id: '3', name: 'qwen2.5-coder:7b', size: '~4.7 GB', type: 'Ollama GPU', default: true }, + { id: '4', name: 'qwen2.5-coder:14b', size: '~9 GB', type: 'Ollama GPU' }, + { id: '5', name: 'qwen2.5-coder:32b', size: '~20 GB', type: 'Ollama GPU' }, + ]; + if (!arg) { + termLog(' Mallit:', '#c9d1d9'); + for (const m of ollamaModels) { + const active = m.default ? ' ← aktiivinen' : ''; + termLog(` ${m.id} ${m.name} ${m.size} | ${m.type}${active}`); + } + termLog(' Käyttö: kpn load <numero>', '#8b949e'); return; } - coderSize = '05b'; - localStorage.setItem('kpn-coder-size', coderSize); - termLog(' Ladataan Qwen2.5-Coder:0.5B (~990 MB) selaimeen...', '#d29922'); - termLog(' Vinkki: natiivisolmu (Docker) on ~20× nopeampi'); - if (btn) btn.click(); - else ensureCoderNode(); + const selected = ollamaModels.find(m => m.id === arg || m.name === arg); + if (!selected) { + termLog(` Tuntematon malli "${esc(arg)}". Kokeile: kpn load`, '#f85149'); + return; + } + // Selain-WASM (vain 0.5b) + if (selected.id === '1') { + const btn = document.getElementById('agent-compute-btn'); + if (btn?.dataset.state === 'ready') { + termLog(' ✓ Qwen2.5-Coder:0.5B on jo ladattu (selain)', '#3fb950'); + return; + } + coderSize = '05b'; + termLog(' Ladataan Qwen2.5-Coder:0.5B selaimeen...', '#d29922'); + if (btn) btn.click(); + else ensureCoderNode(); + return; + } + // Ollama: vaihdetaan malli hubin kautta + termLog(` Vaihdetaan Ollama-malli: ${selected.name} (${selected.size})...`, '#d29922'); + fetch('/api/v1/model', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model: selected.name }), + }).then(r => r.json()).then(data => { + if (data.status === 'ok') { + termLog(` Malli vaihdettu: ${selected.name}`, '#3fb950'); + termLog(' Ollama lataa mallin ensimmäisellä pyynnöllä'); + // Päivitetään aktiivinen default + ollamaModels.forEach(m => m.default = false); + selected.default = true; + } else { + termLog(` ✗ Mallin vaihto epäonnistui`, '#f85149'); + } + }).catch(e => termLog(` ✗ ${e.message}`, '#f85149')); return; }