Remote start stop control

This commit is contained in:
Jaakko Vanhala
2026-04-11 19:14:20 +03:00
parent 660e80c2bc
commit 80806498e0
7 changed files with 279 additions and 78 deletions

View File

@@ -19,3 +19,5 @@ wgpu = { version = "24", optional = true }
reqwest = { version = "0.12", features = ["json"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
dialoguer = "0.12.0"
console = "0.16.3"

View File

@@ -9,8 +9,6 @@ pub struct LlmEngine {
impl LlmEngine {
pub async fn load() -> Result<Self, String> {
let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "qwen2.5-coder:3b".to_string());
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(600))
.connect_timeout(std::time::Duration::from_secs(3))
@@ -48,6 +46,13 @@ impl LlmEngine {
})
};
// Kysytään malli TUI:lla jos ei pakotettu ympäristöstä
let model = if let Ok(m) = std::env::var("OLLAMA_MODEL") {
m
} else {
crate::tui::select_model(&ollama_url, &client).await?
};
tracing::info!("Ollama backend: {} | malli: {}", ollama_url, model);
Ok(LlmEngine { ollama_url, model: RefCell::new(model), client })
}

View File

@@ -5,6 +5,7 @@ use tokio_tungstenite::connect_async;
use tokio_tungstenite::tungstenite::Message;
mod inference;
mod tui;
/// GPU-tietorakenne — yhtenäinen kaikille valmistajille
struct GpuInfo {
@@ -354,74 +355,115 @@ async fn main() {
continue;
}
while let Some(Ok(msg)) = read.next().await {
if let Message::Text(text) = msg {
// LLM-promptit
if text.contains("llm_prompt") {
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&text) {
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("");
let task_id = task.get("task_id").and_then(|v| v.as_str()).unwrap_or("?");
let msg_model = task.get("model").and_then(|v| v.as_str()).unwrap_or("");
if !prompt.is_empty() && (msg_model.starts_with("qwen-coder") || msg_model.starts_with("qwen2.5-coder")) {
use tokio::io::AsyncBufReadExt;
let mut stdin_lines = tokio::io::BufReader::new(tokio::io::stdin()).lines();
if let Some(ref engine) = llm {
let max_tokens = task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(1024) as usize;
let prompt_lines = prompt.lines().count();
let prompt_last: String = prompt.lines().last().unwrap_or("").chars().take(60).collect();
tracing::info!("→ task_id:{} | {}r prompti | \"{}...\"", task_id, prompt_lines, prompt_last);
let model_name = engine.model_name();
match engine.generate(prompt, max_tokens).await {
Ok(result) => {
tracing::info!(
"✓ {} | {} tok | {:.0}ms | {:.1} tok/s",
model_name,
result.tokens_generated,
result.duration_ms,
result.tokens_per_sec,
);
// Lähetetään vain lyhyt prompti-esikatselu (ei koko kontekstia)
let prompt_short: String = prompt.lines().last().unwrap_or("").chars().take(100).collect();
let done = json!({
"type": "llm_done",
"prompt": prompt_short,
"model": format!("{} (Ollama)", model_name),
"response": result.text,
"tokens_generated": result.tokens_generated,
"duration_ms": result.duration_ms,
"tokens_per_sec": (result.tokens_per_sec * 10.0).round() / 10.0,
"load_time_ms": 0,
"task_id": task_id,
});
let _ = write.send(Message::Text(done.to_string())).await;
}
Err(e) => {
tracing::error!("Inferenssivirhe: {}", e);
}
}
}
loop {
tokio::select! {
line = stdin_lines.next_line() => {
if let Ok(Some(text)) = line {
let t = text.trim();
if t == "p" || t == "pause" {
tracing::info!("Tauotetaan solmun suoritus (Hub ei lähetä tehtäviä)...");
let req = json!({"type": "status_update", "status": "paused"});
let _ = write.send(Message::Text(req.to_string())).await;
} else if t == "r" || t == "resume" || t == "s" {
tracing::info!("Jatketaan solmun suoritusta...");
let req = json!({"type": "status_update", "status": "active"});
let _ = write.send(Message::Text(req.to_string())).await;
}
}
}
// Mallin vaihto lennossa
if text.contains("change_model") {
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&text) {
if let Some(new_model) = task.get("model").and_then(|v| v.as_str()) {
if let Some(ref engine) = llm {
tracing::info!("Vaihdetaan malli: {}", new_model);
engine.set_model(new_model.to_string());
match engine.ensure_model().await {
Ok(()) => tracing::info!("Malli {} valmis!", new_model),
Err(e) => tracing::error!("Mallin lataus epäonnistui: {}", e),
ws_msg = read.next() => {
match ws_msg {
Some(Ok(Message::Text(text))) => {
// Hubin control-viestit
if text.contains(r#""type":"control""#) {
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&text) {
if let Some(action) = task.get("action").and_then(|v| v.as_str()) {
if action == "pause" {
tracing::info!("Hub pakotti solmun tauolle (Pause)");
let req = json!({"type": "status_update", "status": "paused"});
let _ = write.send(Message::Text(req.to_string())).await;
} else if action == "resume" {
tracing::info!("Hub aktivoi solmun suorituksen (Resume)");
let req = json!({"type": "status_update", "status": "active"});
let _ = write.send(Message::Text(req.to_string())).await;
}
}
}
}
// LLM-promptit
if text.contains("llm_prompt") {
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&text) {
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("");
let task_id = task.get("task_id").and_then(|v| v.as_str()).unwrap_or("?");
let msg_model = task.get("model").and_then(|v| v.as_str()).unwrap_or("");
if !prompt.is_empty() && (msg_model.starts_with("qwen-coder") || msg_model.starts_with("qwen2.5-coder") || msg_model.starts_with("phi")) {
if let Some(ref engine) = llm {
let max_tokens = task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(1024) as usize;
let prompt_lines = prompt.lines().count();
let prompt_last: String = prompt.lines().last().unwrap_or("").chars().take(60).collect();
tracing::info!("→ task_id:{} | {}r prompti | \"{}...\"", task_id, prompt_lines, prompt_last);
let model_name = engine.model_name();
match engine.generate(prompt, max_tokens).await {
Ok(result) => {
tracing::info!(
"✓ {} | {} tok | {:.0}ms | {:.1} tok/s",
model_name,
result.tokens_generated,
result.duration_ms,
result.tokens_per_sec,
);
let prompt_short: String = prompt.lines().last().unwrap_or("").chars().take(100).collect();
let done = json!({
"type": "llm_done",
"prompt": prompt_short,
"model": format!("{} (Ollama)", model_name),
"response": result.text,
"tokens_generated": result.tokens_generated,
"duration_ms": result.duration_ms,
"tokens_per_sec": (result.tokens_per_sec * 10.0).round() / 10.0,
"load_time_ms": 0,
"task_id": task_id,
});
let _ = write.send(Message::Text(done.to_string())).await;
}
Err(e) => {
tracing::error!("Inferenssivirhe: {}", e);
}
}
}
}
}
}
// Mallin vaihto lennossa
if text.contains("change_model") {
if let Ok(task) = serde_json::from_str::<serde_json::Value>(&text) {
if let Some(new_model) = task.get("model").and_then(|v| v.as_str()) {
if let Some(ref engine) = llm {
tracing::info!("Vaihdetaan malli: {}", new_model);
engine.set_model(new_model.to_string());
match engine.ensure_model().await {
Ok(()) => tracing::info!("Malli {} valmis!", new_model),
Err(e) => tracing::error!("Mallin lataus epäonnistui: {}", e),
}
}
}
}
}
}
Some(Ok(_)) => {} // Muut viestityypit (binary/ping)
Some(Err(_)) | None => break, // Yhteys poikki
}
}
}
}
tracing::warn!("Yhteys hubiin katkesi — yritetään uudelleen 5s...");
}
Err(e) => {

View File

@@ -0,0 +1,67 @@
use dialoguer::{Select, Input, theme::ColorfulTheme};
use reqwest::Client;
pub async fn select_model(ollama_url: &str, client: &Client) -> Result<String, String> {
// 1. Hae tagit
let mut models = vec![];
println!(" Haetaan asennettuja malleja osoitteesta {}...", ollama_url);
if let Ok(resp) = client.get(&format!("{}/api/tags", ollama_url)).send().await {
if resp.status().is_success() {
if let Ok(json) = resp.json::<serde_json::Value>().await {
if let Some(arr) = json.get("models").and_then(|v| v.as_array()) {
for m in arr {
if let Some(name) = m.get("name").and_then(|v| v.as_str()) {
models.push(name.to_string());
}
}
}
}
}
}
let download_opt = "[ Lataa uusi malli internetistä]";
let mut options = vec![download_opt.to_string()];
options.extend(models);
// 2. Kysy käyttäjältä Selectillä
let theme = ColorfulTheme::default();
let selection = Select::with_theme(&theme)
.with_prompt("Valitse Ollama-malli Kipinä-verkkoa varten:")
.default(if options.len() > 1 { 1 } else { 0 })
.items(&options)
.interact()
.map_err(|e| format!("TUI virhe: {}", e))?;
let selected = &options[selection];
// 3. Jos käyttäjä haluaa uuden, kysy nimeä
if selected == download_opt {
let new_model: String = Input::with_theme(&theme)
.with_prompt("Syötä ladattavan mallin nimi (esim. llama3 tai qwen2.5-coder:3b)")
.interact_text()
.map_err(|e| format!("TUI virhe: {}", e))?;
let new_model = new_model.trim().to_string();
if new_model.is_empty() {
return Err("Mallin nimi ei voi olla tyhjä".to_string());
}
println!(" Ladataan malleja taustalla... Tämä voi kestää hetken ({})", new_model);
// Odotetaan että pull on valmis
let pull_body = serde_json::json!({ "name": &new_model });
let resp = client.post(&format!("{}/api/pull", ollama_url))
.json(&pull_body)
.send()
.await
.map_err(|e| format!("Pull req virhe: {}", e))?;
if resp.status().is_success() {
println!(" ✓ Malli {} ladattu onnistuneesti!", new_model);
return Ok(new_model);
} else {
return Err(format!("Ollama pull epäonnistui: {}", resp.status()));
}
}
Ok(selected.clone())
}