kpn load: laitteiston VRAM/RAM tarkistus, liian isot mallit merkitään
Hub: uusi GET /api/v1/hardware palauttaa natiivisolmun GPU/RAM-tiedot. Frontend: kpn load hakee laitteistotiedon ja näyttää mallit joihin laite riittää. Liian isot mallit näkyvät yliviivattuina + varoitus. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -385,6 +385,7 @@ async fn main() {
|
|||||||
.route("/api/stats", get(api_stats))
|
.route("/api/stats", get(api_stats))
|
||||||
.route("/api/v1/chat/completions", axum::routing::post(api_chat_completions))
|
.route("/api/v1/chat/completions", axum::routing::post(api_chat_completions))
|
||||||
.route("/api/v1/model", axum::routing::post(api_change_model))
|
.route("/api/v1/model", axum::routing::post(api_change_model))
|
||||||
|
.route("/api/v1/hardware", get(api_hardware))
|
||||||
.route("/admin", get(admin_page))
|
.route("/admin", get(admin_page))
|
||||||
.nest_service("/", {
|
.nest_service("/", {
|
||||||
let static_dir = std::env::var("STATIC_DIR").unwrap_or_else(|_| "../static".to_string());
|
let static_dir = std::env::var("STATIC_DIR").unwrap_or_else(|_| "../static".to_string());
|
||||||
@@ -959,6 +960,33 @@ struct ChatCompletionResponse {
|
|||||||
tokens_generated: u64,
|
tokens_generated: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn api_hardware(
|
||||||
|
axum::extract::State(state): axum::extract::State<Arc<AppState>>,
|
||||||
|
) -> axum::response::Response {
|
||||||
|
// Etsitään natiivisolmun GPU-tiedot sessiosta
|
||||||
|
let sessions = state.db.get_sessions(50);
|
||||||
|
let native = sessions.iter().find(|s| {
|
||||||
|
s.get("node_type").and_then(|v| v.as_str()) == Some("native")
|
||||||
|
});
|
||||||
|
|
||||||
|
let (vram_mb, gpu_name, ram_mb) = if let Some(s) = native {
|
||||||
|
let gpus = s.get("gpus").and_then(|v| v.as_array());
|
||||||
|
let gpu = gpus.and_then(|g| g.first());
|
||||||
|
let vram = gpu.and_then(|g| g.get("vram_total_mb")).and_then(|v| v.as_u64()).unwrap_or(0);
|
||||||
|
let name = gpu.and_then(|g| g.get("name")).and_then(|v| v.as_str()).unwrap_or("?");
|
||||||
|
let ram = s.get("system").and_then(|v| v.get("ram_total_mb")).and_then(|v| v.as_u64()).unwrap_or(0);
|
||||||
|
(vram, name.to_string(), ram)
|
||||||
|
} else {
|
||||||
|
(0, "ei natiivisolmua".to_string(), 0)
|
||||||
|
};
|
||||||
|
|
||||||
|
axum::Json(serde_json::json!({
|
||||||
|
"gpu_name": gpu_name,
|
||||||
|
"vram_mb": vram_mb,
|
||||||
|
"ram_mb": ram_mb,
|
||||||
|
})).into_response()
|
||||||
|
}
|
||||||
|
|
||||||
async fn api_change_model(
|
async fn api_change_model(
|
||||||
axum::extract::State(state): axum::extract::State<Arc<AppState>>,
|
axum::extract::State(state): axum::extract::State<Arc<AppState>>,
|
||||||
axum::Json(payload): axum::Json<serde_json::Value>,
|
axum::Json(payload): axum::Json<serde_json::Value>,
|
||||||
|
|||||||
@@ -2373,19 +2373,41 @@ Files: ${Object.keys(generatedFiles).join(', ')}`;
|
|||||||
if (sub === 'load') {
|
if (sub === 'load') {
|
||||||
const arg = parts[2];
|
const arg = parts[2];
|
||||||
const ollamaModels = [
|
const ollamaModels = [
|
||||||
{ id: '1', name: 'qwen2.5-coder:0.5b', size: '~400 MB', type: 'selain + Ollama' },
|
{ id: '1', name: 'qwen2.5-coder:0.5b', size: '~400 MB', vram_mb: 0, type: 'selain + Ollama' },
|
||||||
{ id: '2', name: 'qwen2.5-coder:1.5b', size: '~1 GB', type: 'Ollama GPU' },
|
{ id: '2', name: 'qwen2.5-coder:1.5b', size: '~1 GB', vram_mb: 1500, type: 'Ollama GPU' },
|
||||||
{ id: '3', name: 'qwen2.5-coder:7b', size: '~4.7 GB', type: 'Ollama GPU', default: true },
|
{ id: '3', name: 'qwen2.5-coder:7b', size: '~4.7 GB', vram_mb: 5500, type: 'Ollama GPU', default: true },
|
||||||
{ id: '4', name: 'qwen2.5-coder:14b', size: '~9 GB', type: 'Ollama GPU' },
|
{ id: '4', name: 'qwen2.5-coder:14b', size: '~9 GB', vram_mb: 10000, type: 'Ollama GPU' },
|
||||||
{ id: '5', name: 'qwen2.5-coder:32b', size: '~20 GB', type: 'Ollama GPU' },
|
{ id: '5', name: 'qwen2.5-coder:32b', size: '~20 GB', vram_mb: 21000, type: 'Ollama GPU' },
|
||||||
];
|
];
|
||||||
if (!arg) {
|
if (!arg) {
|
||||||
termLog(' Mallit:', '#c9d1d9');
|
// Haetaan laitteistotiedot ja näytetään sopivat mallit
|
||||||
for (const m of ollamaModels) {
|
fetch('/api/v1/hardware').then(r => r.json()).then(hw => {
|
||||||
const active = m.default ? ' <span style="color:#3fb950">← aktiivinen</span>' : '';
|
const vram = hw.vram_mb || 0;
|
||||||
termLog(` <span style="color:#58a6ff">${m.id}</span> ${m.name} <span style="color:#8b949e">${m.size} | ${m.type}</span>${active}`);
|
const ram = hw.ram_mb || 0;
|
||||||
}
|
const gpu = hw.gpu_name || '?';
|
||||||
termLog(' Käyttö: kpn load <numero>', '#8b949e');
|
const available = vram || ram; // CPU-fallback käyttää RAM:ia
|
||||||
|
if (vram > 0) {
|
||||||
|
termLog(` <span style="color:#8b949e">GPU: ${gpu} | VRAM: ${Math.round(vram/1024)} GB | RAM: ${Math.round(ram/1024)} GB</span>`);
|
||||||
|
} else if (ram > 0) {
|
||||||
|
termLog(` <span style="color:#8b949e">Ei GPU:ta | RAM: ${Math.round(ram/1024)} GB (CPU-moodi)</span>`);
|
||||||
|
}
|
||||||
|
termLog(' Mallit:', '#c9d1d9');
|
||||||
|
for (const m of ollamaModels) {
|
||||||
|
const fits = m.vram_mb === 0 || m.vram_mb < available;
|
||||||
|
const active = m.default ? ' <span style="color:#3fb950">← aktiivinen</span>' : '';
|
||||||
|
const icon = fits ? `<span style="color:#58a6ff">${m.id}</span>` : `<span style="color:#8b949e;text-decoration:line-through">${m.id}</span>`;
|
||||||
|
const warn = !fits ? ' <span style="color:#f85149">⚠ ei mahdu</span>' : '';
|
||||||
|
termLog(` ${icon} ${fits ? '' : '<span style="color:#8b949e">'}${m.name} ${m.size} | ${m.type}${fits ? '' : '</span>'}${active}${warn}`);
|
||||||
|
}
|
||||||
|
termLog(' Käyttö: kpn load <numero>', '#8b949e');
|
||||||
|
}).catch(() => {
|
||||||
|
termLog(' Mallit:', '#c9d1d9');
|
||||||
|
for (const m of ollamaModels) {
|
||||||
|
const active = m.default ? ' <span style="color:#3fb950">← aktiivinen</span>' : '';
|
||||||
|
termLog(` <span style="color:#58a6ff">${m.id}</span> ${m.name} <span style="color:#8b949e">${m.size} | ${m.type}</span>${active}`);
|
||||||
|
}
|
||||||
|
termLog(' Käyttö: kpn load <numero>', '#8b949e');
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const selected = ollamaModels.find(m => m.id === arg || m.name === arg);
|
const selected = ollamaModels.find(m => m.id === arg || m.name === arg);
|
||||||
|
|||||||
Reference in New Issue
Block a user