kpn load: Ollama-mallin vaihto lennossa (0.5b → 32b)

- Hub: uusi POST /api/v1/model endpoint, broadcastaa change_model
- Native node: kuuntelee change_model, kutsuu Ollaman pull + vaihtaa mallin
- Frontend: kpn load näyttää 5 mallia, numero vaihtaa Ollaman mallin
- Selain-WASM pysyy 0.5B:nä (kpn load 1)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-07 07:05:57 +03:00
parent 54a5af96c7
commit 34ef19472a
4 changed files with 95 additions and 19 deletions

View File

@@ -384,6 +384,7 @@ async fn main() {
.route("/api/pairs", get(api_pairs))
.route("/api/stats", get(api_stats))
.route("/api/v1/chat/completions", axum::routing::post(api_chat_completions))
.route("/api/v1/model", axum::routing::post(api_change_model))
.route("/admin", get(admin_page))
.nest_service("/", {
let static_dir = std::env::var("STATIC_DIR").unwrap_or_else(|_| "../static".to_string());
@@ -958,6 +959,20 @@ struct ChatCompletionResponse {
tokens_generated: u64,
}
async fn api_change_model(
axum::extract::State(state): axum::extract::State<Arc<AppState>>,
axum::Json(payload): axum::Json<serde_json::Value>,
) -> axum::response::Response {
let model = payload.get("model").and_then(|v| v.as_str()).unwrap_or("");
if model.is_empty() {
return (axum::http::StatusCode::BAD_REQUEST, "model puuttuu").into_response();
}
tracing::info!("Mallin vaihto: {}", model);
let msg = serde_json::json!({ "type": "change_model", "model": model });
let _ = state.stats_tx.send(msg.to_string());
axum::Json(serde_json::json!({ "status": "ok", "model": model })).into_response()
}
async fn api_chat_completions(
axum::extract::State(state): axum::extract::State<Arc<AppState>>,
ConnectInfo(addr): ConnectInfo<SocketAddr>,

View File

@@ -1,8 +1,9 @@
use std::time::Instant;
use std::cell::RefCell;
pub struct LlmEngine {
ollama_url: String,
model: String,
model: RefCell<String>,
client: reqwest::Client,
}
@@ -18,24 +19,29 @@ impl LlmEngine {
.build()
.map_err(|e| format!("HTTP client: {}", e))?;
Ok(LlmEngine { ollama_url, model, client })
Ok(LlmEngine { ollama_url, model: RefCell::new(model), client })
}
pub fn model_name(&self) -> &str {
&self.model
pub fn model_name(&self) -> String {
self.model.borrow().clone()
}
pub fn set_model(&self, new_model: String) {
*self.model.borrow_mut() = new_model;
}
/// Varmistaa että malli on ladattu Ollamaan (ollama pull)
pub async fn ensure_model(&self) -> Result<(), String> {
tracing::info!("Tarkistetaan malli {}...", self.model);
let model = self.model.borrow().clone();
tracing::info!("Tarkistetaan malli {}...", model);
let resp = self.client.post(format!("{}/api/pull", self.ollama_url))
.json(&serde_json::json!({ "name": self.model, "stream": false }))
.json(&serde_json::json!({ "name": model, "stream": false }))
.send()
.await
.map_err(|e| format!("Ollama pull: {}", e))?;
if resp.status().is_success() {
tracing::info!("Malli {} valmis", self.model);
tracing::info!("Malli {} valmis", model);
Ok(())
} else {
Err(format!("Ollama pull epäonnistui: {}", resp.status()))
@@ -44,11 +50,12 @@ impl LlmEngine {
pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
let system = "You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.";
let model = self.model.borrow().clone();
let start = Instant::now();
let resp = self.client.post(format!("{}/api/generate", self.ollama_url))
.json(&serde_json::json!({
"model": self.model,
"model": model,
"prompt": prompt,
"system": system,
"stream": false,

View File

@@ -366,7 +366,21 @@ async fn main() {
}
}
}
// Ohitetaan pair_task, stats jne.
// Mallin vaihto lennossa
if text.contains("change_model") {
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&text) {
if let Some(new_model) = task.get("model").and_then(|v| v.as_str()) {
if let Some(ref engine) = llm {
tracing::info!("Vaihdetaan malli: {}", new_model);
engine.set_model(new_model.to_string());
match engine.ensure_model().await {
Ok(()) => tracing::info!("Malli {} valmis!", new_model),
Err(e) => tracing::error!("Mallin lataus epäonnistui: {}", e),
}
}
}
}
}
}
}
tracing::warn!("Yhteys hubiin katkesi — yritetään uudelleen 5s...");

View File

@@ -2371,18 +2371,58 @@ Files: ${Object.keys(generatedFiles).join(', ')}`;
}
if (sub === 'load') {
const btn = document.getElementById('agent-compute-btn');
if (btn?.dataset.state === 'ready') {
termLog(' ✓ Qwen2.5-Coder:0.5B on jo ladattu ja valmis (selain)', '#3fb950');
termLog(' <span style="color:#8b949e">Natiivisolmu (Docker) on nopeampi — ks. kpn models</span>');
const arg = parts[2];
const ollamaModels = [
{ id: '1', name: 'qwen2.5-coder:0.5b', size: '~400 MB', type: 'selain + Ollama' },
{ id: '2', name: 'qwen2.5-coder:1.5b', size: '~1 GB', type: 'Ollama GPU' },
{ id: '3', name: 'qwen2.5-coder:7b', size: '~4.7 GB', type: 'Ollama GPU', default: true },
{ id: '4', name: 'qwen2.5-coder:14b', size: '~9 GB', type: 'Ollama GPU' },
{ id: '5', name: 'qwen2.5-coder:32b', size: '~20 GB', type: 'Ollama GPU' },
];
if (!arg) {
termLog(' Mallit:', '#c9d1d9');
for (const m of ollamaModels) {
const active = m.default ? ' <span style="color:#3fb950">← aktiivinen</span>' : '';
termLog(` <span style="color:#58a6ff">${m.id}</span> ${m.name} <span style="color:#8b949e">${m.size} | ${m.type}</span>${active}`);
}
termLog(' Käyttö: kpn load &lt;numero&gt;', '#8b949e');
return;
}
coderSize = '05b';
localStorage.setItem('kpn-coder-size', coderSize);
termLog(' Ladataan Qwen2.5-Coder:0.5B (~990 MB) selaimeen...', '#d29922');
termLog(' <span style="color:#8b949e">Vinkki: natiivisolmu (Docker) on ~20× nopeampi</span>');
if (btn) btn.click();
else ensureCoderNode();
const selected = ollamaModels.find(m => m.id === arg || m.name === arg);
if (!selected) {
termLog(` Tuntematon malli "${esc(arg)}". Kokeile: kpn load`, '#f85149');
return;
}
// Selain-WASM (vain 0.5b)
if (selected.id === '1') {
const btn = document.getElementById('agent-compute-btn');
if (btn?.dataset.state === 'ready') {
termLog(' ✓ Qwen2.5-Coder:0.5B on jo ladattu (selain)', '#3fb950');
return;
}
coderSize = '05b';
termLog(' Ladataan Qwen2.5-Coder:0.5B selaimeen...', '#d29922');
if (btn) btn.click();
else ensureCoderNode();
return;
}
// Ollama: vaihdetaan malli hubin kautta
termLog(` Vaihdetaan Ollama-malli: ${selected.name} (${selected.size})...`, '#d29922');
fetch('/api/v1/model', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: selected.name }),
}).then(r => r.json()).then(data => {
if (data.status === 'ok') {
termLog(` <span style="color:#3fb950">✓</span> Malli vaihdettu: ${selected.name}`, '#3fb950');
termLog(' <span style="color:#8b949e">Ollama lataa mallin ensimmäisellä pyynnöllä</span>');
// Päivitetään aktiivinen default
ollamaModels.forEach(m => m.default = false);
selected.default = true;
} else {
termLog(` ✗ Mallin vaihto epäonnistui`, '#f85149');
}
}).catch(e => termLog(`${e.message}`, '#f85149'));
return;
}