kpn load: Ollama-mallin vaihto lennossa (0.5b → 32b)
- Hub: uusi POST /api/v1/model endpoint, broadcastaa change_model - Native node: kuuntelee change_model, kutsuu Ollaman pull + vaihtaa mallin - Frontend: kpn load näyttää 5 mallia, numero vaihtaa Ollaman mallin - Selain-WASM pysyy 0.5B:nä (kpn load 1) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -384,6 +384,7 @@ async fn main() {
|
|||||||
.route("/api/pairs", get(api_pairs))
|
.route("/api/pairs", get(api_pairs))
|
||||||
.route("/api/stats", get(api_stats))
|
.route("/api/stats", get(api_stats))
|
||||||
.route("/api/v1/chat/completions", axum::routing::post(api_chat_completions))
|
.route("/api/v1/chat/completions", axum::routing::post(api_chat_completions))
|
||||||
|
.route("/api/v1/model", axum::routing::post(api_change_model))
|
||||||
.route("/admin", get(admin_page))
|
.route("/admin", get(admin_page))
|
||||||
.nest_service("/", {
|
.nest_service("/", {
|
||||||
let static_dir = std::env::var("STATIC_DIR").unwrap_or_else(|_| "../static".to_string());
|
let static_dir = std::env::var("STATIC_DIR").unwrap_or_else(|_| "../static".to_string());
|
||||||
@@ -958,6 +959,20 @@ struct ChatCompletionResponse {
|
|||||||
tokens_generated: u64,
|
tokens_generated: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn api_change_model(
|
||||||
|
axum::extract::State(state): axum::extract::State<Arc<AppState>>,
|
||||||
|
axum::Json(payload): axum::Json<serde_json::Value>,
|
||||||
|
) -> axum::response::Response {
|
||||||
|
let model = payload.get("model").and_then(|v| v.as_str()).unwrap_or("");
|
||||||
|
if model.is_empty() {
|
||||||
|
return (axum::http::StatusCode::BAD_REQUEST, "model puuttuu").into_response();
|
||||||
|
}
|
||||||
|
tracing::info!("Mallin vaihto: {}", model);
|
||||||
|
let msg = serde_json::json!({ "type": "change_model", "model": model });
|
||||||
|
let _ = state.stats_tx.send(msg.to_string());
|
||||||
|
axum::Json(serde_json::json!({ "status": "ok", "model": model })).into_response()
|
||||||
|
}
|
||||||
|
|
||||||
async fn api_chat_completions(
|
async fn api_chat_completions(
|
||||||
axum::extract::State(state): axum::extract::State<Arc<AppState>>,
|
axum::extract::State(state): axum::extract::State<Arc<AppState>>,
|
||||||
ConnectInfo(addr): ConnectInfo<SocketAddr>,
|
ConnectInfo(addr): ConnectInfo<SocketAddr>,
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
use std::cell::RefCell;
|
||||||
|
|
||||||
pub struct LlmEngine {
|
pub struct LlmEngine {
|
||||||
ollama_url: String,
|
ollama_url: String,
|
||||||
model: String,
|
model: RefCell<String>,
|
||||||
client: reqwest::Client,
|
client: reqwest::Client,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -18,24 +19,29 @@ impl LlmEngine {
|
|||||||
.build()
|
.build()
|
||||||
.map_err(|e| format!("HTTP client: {}", e))?;
|
.map_err(|e| format!("HTTP client: {}", e))?;
|
||||||
|
|
||||||
Ok(LlmEngine { ollama_url, model, client })
|
Ok(LlmEngine { ollama_url, model: RefCell::new(model), client })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn model_name(&self) -> &str {
|
pub fn model_name(&self) -> String {
|
||||||
&self.model
|
self.model.borrow().clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_model(&self, new_model: String) {
|
||||||
|
*self.model.borrow_mut() = new_model;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Varmistaa että malli on ladattu Ollamaan (ollama pull)
|
/// Varmistaa että malli on ladattu Ollamaan (ollama pull)
|
||||||
pub async fn ensure_model(&self) -> Result<(), String> {
|
pub async fn ensure_model(&self) -> Result<(), String> {
|
||||||
tracing::info!("Tarkistetaan malli {}...", self.model);
|
let model = self.model.borrow().clone();
|
||||||
|
tracing::info!("Tarkistetaan malli {}...", model);
|
||||||
let resp = self.client.post(format!("{}/api/pull", self.ollama_url))
|
let resp = self.client.post(format!("{}/api/pull", self.ollama_url))
|
||||||
.json(&serde_json::json!({ "name": self.model, "stream": false }))
|
.json(&serde_json::json!({ "name": model, "stream": false }))
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.map_err(|e| format!("Ollama pull: {}", e))?;
|
.map_err(|e| format!("Ollama pull: {}", e))?;
|
||||||
|
|
||||||
if resp.status().is_success() {
|
if resp.status().is_success() {
|
||||||
tracing::info!("Malli {} valmis", self.model);
|
tracing::info!("Malli {} valmis", model);
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
Err(format!("Ollama pull epäonnistui: {}", resp.status()))
|
Err(format!("Ollama pull epäonnistui: {}", resp.status()))
|
||||||
@@ -44,11 +50,12 @@ impl LlmEngine {
|
|||||||
|
|
||||||
pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
|
pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
|
||||||
let system = "You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.";
|
let system = "You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.";
|
||||||
|
let model = self.model.borrow().clone();
|
||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let resp = self.client.post(format!("{}/api/generate", self.ollama_url))
|
let resp = self.client.post(format!("{}/api/generate", self.ollama_url))
|
||||||
.json(&serde_json::json!({
|
.json(&serde_json::json!({
|
||||||
"model": self.model,
|
"model": model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"system": system,
|
"system": system,
|
||||||
"stream": false,
|
"stream": false,
|
||||||
|
|||||||
@@ -366,7 +366,21 @@ async fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Ohitetaan pair_task, stats jne.
|
// Mallin vaihto lennossa
|
||||||
|
if text.contains("change_model") {
|
||||||
|
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&text) {
|
||||||
|
if let Some(new_model) = task.get("model").and_then(|v| v.as_str()) {
|
||||||
|
if let Some(ref engine) = llm {
|
||||||
|
tracing::info!("Vaihdetaan malli: {}", new_model);
|
||||||
|
engine.set_model(new_model.to_string());
|
||||||
|
match engine.ensure_model().await {
|
||||||
|
Ok(()) => tracing::info!("Malli {} valmis!", new_model),
|
||||||
|
Err(e) => tracing::error!("Mallin lataus epäonnistui: {}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tracing::warn!("Yhteys hubiin katkesi — yritetään uudelleen 5s...");
|
tracing::warn!("Yhteys hubiin katkesi — yritetään uudelleen 5s...");
|
||||||
|
|||||||
@@ -2371,20 +2371,60 @@ Files: ${Object.keys(generatedFiles).join(', ')}`;
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (sub === 'load') {
|
if (sub === 'load') {
|
||||||
|
const arg = parts[2];
|
||||||
|
const ollamaModels = [
|
||||||
|
{ id: '1', name: 'qwen2.5-coder:0.5b', size: '~400 MB', type: 'selain + Ollama' },
|
||||||
|
{ id: '2', name: 'qwen2.5-coder:1.5b', size: '~1 GB', type: 'Ollama GPU' },
|
||||||
|
{ id: '3', name: 'qwen2.5-coder:7b', size: '~4.7 GB', type: 'Ollama GPU', default: true },
|
||||||
|
{ id: '4', name: 'qwen2.5-coder:14b', size: '~9 GB', type: 'Ollama GPU' },
|
||||||
|
{ id: '5', name: 'qwen2.5-coder:32b', size: '~20 GB', type: 'Ollama GPU' },
|
||||||
|
];
|
||||||
|
if (!arg) {
|
||||||
|
termLog(' Mallit:', '#c9d1d9');
|
||||||
|
for (const m of ollamaModels) {
|
||||||
|
const active = m.default ? ' <span style="color:#3fb950">← aktiivinen</span>' : '';
|
||||||
|
termLog(` <span style="color:#58a6ff">${m.id}</span> ${m.name} <span style="color:#8b949e">${m.size} | ${m.type}</span>${active}`);
|
||||||
|
}
|
||||||
|
termLog(' Käyttö: kpn load <numero>', '#8b949e');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const selected = ollamaModels.find(m => m.id === arg || m.name === arg);
|
||||||
|
if (!selected) {
|
||||||
|
termLog(` Tuntematon malli "${esc(arg)}". Kokeile: kpn load`, '#f85149');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Selain-WASM (vain 0.5b)
|
||||||
|
if (selected.id === '1') {
|
||||||
const btn = document.getElementById('agent-compute-btn');
|
const btn = document.getElementById('agent-compute-btn');
|
||||||
if (btn?.dataset.state === 'ready') {
|
if (btn?.dataset.state === 'ready') {
|
||||||
termLog(' ✓ Qwen2.5-Coder:0.5B on jo ladattu ja valmis (selain)', '#3fb950');
|
termLog(' ✓ Qwen2.5-Coder:0.5B on jo ladattu (selain)', '#3fb950');
|
||||||
termLog(' <span style="color:#8b949e">Natiivisolmu (Docker) on nopeampi — ks. kpn models</span>');
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
coderSize = '05b';
|
coderSize = '05b';
|
||||||
localStorage.setItem('kpn-coder-size', coderSize);
|
termLog(' Ladataan Qwen2.5-Coder:0.5B selaimeen...', '#d29922');
|
||||||
termLog(' Ladataan Qwen2.5-Coder:0.5B (~990 MB) selaimeen...', '#d29922');
|
|
||||||
termLog(' <span style="color:#8b949e">Vinkki: natiivisolmu (Docker) on ~20× nopeampi</span>');
|
|
||||||
if (btn) btn.click();
|
if (btn) btn.click();
|
||||||
else ensureCoderNode();
|
else ensureCoderNode();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// Ollama: vaihdetaan malli hubin kautta
|
||||||
|
termLog(` Vaihdetaan Ollama-malli: ${selected.name} (${selected.size})...`, '#d29922');
|
||||||
|
fetch('/api/v1/model', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ model: selected.name }),
|
||||||
|
}).then(r => r.json()).then(data => {
|
||||||
|
if (data.status === 'ok') {
|
||||||
|
termLog(` <span style="color:#3fb950">✓</span> Malli vaihdettu: ${selected.name}`, '#3fb950');
|
||||||
|
termLog(' <span style="color:#8b949e">Ollama lataa mallin ensimmäisellä pyynnöllä</span>');
|
||||||
|
// Päivitetään aktiivinen default
|
||||||
|
ollamaModels.forEach(m => m.default = false);
|
||||||
|
selected.default = true;
|
||||||
|
} else {
|
||||||
|
termLog(` ✗ Mallin vaihto epäonnistui`, '#f85149');
|
||||||
|
}
|
||||||
|
}).catch(e => termLog(` ✗ ${e.message}`, '#f85149'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (sub === 'status') {
|
if (sub === 'status') {
|
||||||
const nodes = statNodes.textContent || '0';
|
const nodes = statNodes.textContent || '0';
|
||||||
|
|||||||
Reference in New Issue
Block a user