deployment kokonaan uusiksi

This commit is contained in:
Jaakko Vanhala
2026-04-12 11:41:09 +03:00
parent 4983217ee0
commit 5f147b774f
24 changed files with 119 additions and 1162 deletions

View File

@@ -1,63 +0,0 @@
#!/bin/bash
# Käännä kipina-node binäärit kaikille alustoille
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
OUT="$SCRIPT_DIR/frontend/public/download"
HASH_FILE="$OUT/.build-hash"
mkdir -p "$OUT"
# Tarkistetaan onko native-node muuttunut edellisen buildin jälkeen
CURRENT_HASH=$(git -C "$SCRIPT_DIR" log -1 --format=%H -- native-node/ Cargo.toml Cargo.lock)
if [ -n "$(git -C "$SCRIPT_DIR" status --porcelain -- native-node/ Cargo.toml Cargo.lock)" ]; then
CURRENT_HASH="dirty-$(git -C "$SCRIPT_DIR" status --porcelain | md5sum | awk '{print $1}')"
fi
if [ -f "$HASH_FILE" ] && [ "$(cat "$HASH_FILE")" = "$CURRENT_HASH" ]; then
echo "=== Kipinä Node — ei muutoksia, ohitetaan build ==="
ls -lh "$OUT"/kipina-node-* 2>/dev/null || true
exit 0
fi
echo "=== Kipinä Node — Binary Build ==="
# macOS ARM (natiivi)
echo "[1/4] macOS ARM64..."
cd "$SCRIPT_DIR"
cargo build --release -p native-node --no-default-features 2>&1 | tail -1
cp target/release/native-node "$OUT/kipina-node-macos-arm64"
echo " $(ls -lh "$OUT/kipina-node-macos-arm64" | awk '{print $5}')"
# Linux x86_64 (Docker)
echo "[2/4] Linux x86_64..."
docker run --rm \
-v "$SCRIPT_DIR":/app -w /app \
--platform linux/amd64 \
rust:slim \
bash -c "apt-get update -qq && apt-get install -y -qq pkg-config libssl-dev >/dev/null 2>&1 && cargo build --release -p native-node --no-default-features --target-dir target/docker_linux_amd64 && cp target/docker_linux_amd64/release/native-node /app/frontend/public/download/kipina-node-linux-x86_64"
echo " $(ls -lh "$OUT/kipina-node-linux-x86_64" | awk '{print $5}')"
# Linux ARM64 (Docker)
echo "[3/4] Linux ARM64..."
docker run --rm \
-v "$SCRIPT_DIR":/app -w /app \
--platform linux/arm64 \
rust:slim \
bash -c "apt-get update -qq && apt-get install -y -qq pkg-config libssl-dev >/dev/null 2>&1 && cargo build --release -p native-node --no-default-features --target-dir target/docker_linux_arm64 && cp target/docker_linux_arm64/release/native-node /app/frontend/public/download/kipina-node-linux-arm64"
echo " $(ls -lh "$OUT/kipina-node-linux-arm64" | awk '{print $5}')"
# Windows x86_64 (Docker + mingw-w64)
echo "[4/4] Windows x86_64..."
docker run --rm \
-v "$SCRIPT_DIR":/app -w /app \
--platform linux/amd64 \
rust:slim \
bash -c "apt-get update -qq && apt-get install -y -qq gcc-mingw-w64-x86-64 pkg-config libssl-dev >/dev/null 2>&1 && rustup target add x86_64-pc-windows-gnu && cargo build --release -p native-node --no-default-features --target x86_64-pc-windows-gnu && cp target/x86_64-pc-windows-gnu/release/native-node.exe /app/frontend/public/download/kipina-node-windows-x86_64.exe"
echo " $(ls -lh "$OUT/kipina-node-windows-x86_64.exe" | awk '{print $5}')"
# Tallennetaan onnistuneen buildin hash
echo "$CURRENT_HASH" > "$HASH_FILE"
echo ""
echo "=== Binäärit valmiina ==="
ls -lh "$OUT"/kipina-node-*

View File

@@ -1,30 +0,0 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SERVER="ubuntu@86.50.252.98"
REMOTE_DIR="~/code/agentic-studio/network-poc"
KEY="$HOME/.ssh/id_rsa"
SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY"
if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then
echo "SSH-avain ei ole agentissa. Lisätään..."
ssh-add "$KEY"
fi
echo "=== Kipinä Node - Vain Binäärien Päivitys ==="
# 1. Käännetään binäärit (hyödyntää korjattua build-binaries.sh cache logiikkaa)
"$SCRIPT_DIR/build-binaries.sh"
# 2. Siirretään binäärit suoraan kohdekoneen hakemistoon ohittaen Docker-imagen täyden rakennuksen
echo ""
echo "[Vieminen uuteen kohteeseen...]"
ssh $SSH_OPTS "$SERVER" "mkdir -p $REMOTE_DIR/frontend/dist/download"
scp $SSH_OPTS "$SCRIPT_DIR/frontend/public/download"/kipina-node-* "$SERVER:$REMOTE_DIR/frontend/dist/download/"
scp $SSH_OPTS "$SCRIPT_DIR/frontend/public"/kipina-node "$SERVER:$REMOTE_DIR/frontend/dist/"
# 3. Luvat kuntoon
ssh $SSH_OPTS "$SERVER" "chmod +x $REMOTE_DIR/frontend/dist/download/kipina-node-*"
echo "=== Valmis! Binäärit ovat nyt asennettu livenä ja ladattavissa kipina.studiosta ==="

View File

@@ -1,28 +0,0 @@
#!/bin/bash
# Nopea deploy: päivittää vain frontendin (ei kontin uudelleenkäynnistystä)
# Hub-binäärin päivitys: käytä deploy.sh tai deploy-light.sh
set -e
SERVER="ubuntu@86.50.252.98"
REMOTE_DIR="~/code/agentic-studio/network-poc"
SSH_OPTS="-o StrictHostKeyChecking=no"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
echo "=== Kipinä Studio — Frontend Deploy ==="
# 1. Buildaa frontend paikallisesti
echo "[1/2] Rakennetaan frontend..."
cd "$SCRIPT_DIR/frontend"
[ -d node_modules ] || npm install --silent
npm run build --silent 2>&1 | tail -1
# 2. Synkataan dist/ palvelimelle (vain muuttuneet tiedostot)
echo "[2/2] Synkataan dist/ → palvelin..."
ssh $SSH_OPTS $SERVER "mkdir -p $REMOTE_DIR/frontend/dist"
rsync -az --delete -e "ssh $SSH_OPTS" "$SCRIPT_DIR/frontend/dist/" "$SERVER:$REMOTE_DIR/frontend/dist/"
echo ""
echo "=== Valmis! Frontend päivitetty — ei uudelleenkäynnistystä ==="
echo " https://kipina.studio"
echo ""
echo "Huom: Jos Rust-koodi (hub/) muuttui, aja: ./deploy.sh"

View File

@@ -1,33 +0,0 @@
#!/bin/bash
# Kevyt deploy: lähetetään vain koodi, palvelin buildaa itse
set -e
SERVER="ubuntu@86.50.252.98"
REMOTE_DIR="~/code/agentic-studio/network-poc"
SSH_OPTS="-o StrictHostKeyChecking=no"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
echo "=== Kipinä Studio Deploy (remote build) ==="
# 1. Synkataan koodi palvelimelle (vain muuttuneet tiedostot)
echo "[1/3] Synkataan koodi..."
rsync -az --delete \
--exclude 'target/' \
--exclude 'node_modules/' \
--exclude 'dist/' \
--exclude '.astro/' \
--exclude 'temp/' \
--exclude '*.db' \
--exclude '.git/' \
"$SCRIPT_DIR/" "$SERVER:$REMOTE_DIR/"
# 2. Rakennetaan image palvelimella
echo "[2/3] Rakennetaan image palvelimella..."
ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker build -f Dockerfile.prod -t kipina-agentic:latest ."
# 3. Käynnistetään
echo "[3/3] Käynnistetään..."
ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d"
echo "=== Valmis! https://kipina.studio ==="

56
network-poc/deploy-local.sh Executable file
View File

@@ -0,0 +1,56 @@
#!/bin/bash
# Kipinä Studio — paikallinen kehitysympäristö
# Buildaa frontendin, käynnistää hubin ja native-noden (Ollama)
# Käyttö: ./deploy-local.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"
cleanup() { echo ""; echo "Pysäytetään..."; kill $HUB_PID $NODE_PID 2>/dev/null; exit 0; }
trap cleanup INT TERM
# Portti vapaaksi
lsof -ti:3000 | xargs kill -9 2>/dev/null || true
# Frontend
echo "[1/3] Frontend..."
cd "$SCRIPT_DIR/frontend"
[ -d node_modules ] || npm install --silent
npm run build 2>&1 | tail -1
cd "$SCRIPT_DIR"
# Hub
echo "[2/3] Hub..."
STATIC_DIR="$SCRIPT_DIR/frontend/dist" cargo run -p hub 2>&1 &
HUB_PID=$!
until curl -sf http://localhost:3000 >/dev/null 2>&1; do sleep 1; done
# Native-node
NODE_PID=""
if curl -sf http://localhost:11434/api/tags >/dev/null 2>&1; then
MODEL=$(curl -s http://localhost:11434/api/tags | python3 -c "
import sys,json
ms=json.load(sys.stdin).get('models',[])
for m in ms:
n=m['name']
if '7b' in n and 'coder' in n: print(n); exit()
for m in ms:
if 'coder' in m['name']: print(m['name']); exit()
if ms: print(ms[0]['name'])
" 2>/dev/null)
if [ -n "$MODEL" ]; then
echo "[3/3] Native-node ($MODEL)..."
HUB_URL=ws://localhost:3000/ws OLLAMA_MODEL="$MODEL" \
cargo run -p native-node --no-default-features 2>&1 &
NODE_PID=$!
else
echo "[3/3] Ollama: ei malleja (ollama pull qwen2.5-coder:7b)"
fi
else
echo "[3/3] Ei Ollamaa — Wasm-fallback selaimessa"
fi
echo ""
echo "=== http://localhost:3000 === Ctrl+C pysäyttää"
open http://localhost:3000 2>/dev/null || xdg-open http://localhost:3000 2>/dev/null || true
wait $HUB_PID

58
network-poc/deploy-remote.sh Executable file
View File

@@ -0,0 +1,58 @@
#!/bin/bash
# Kipinä Studio — tuotanto-deploy kipina.studioon
# Buildaa Docker-imagen (frontend + hub + wasm) ja vie palvelimelle
# Käyttö: ./deploy-remote.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"
SERVER="ubuntu@86.50.252.98"
REMOTE_DIR="~/code/agentic-studio/network-poc"
KEY="$HOME/.ssh/id_rsa"
SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY"
# SSH-avain
if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then
echo "SSH-avain puuttuu agentista..."
ssh-add "$KEY"
fi
# Auto-commit
if ! git diff --quiet HEAD 2>/dev/null || \
[ -n "$(git ls-files --others --exclude-standard 2>/dev/null)" ]; then
echo "Uncommitted muutoksia — commitoidaan..."
read -rp " Commit-viesti: " msg
[ -z "$msg" ] && msg="Deploy $(date +%Y-%m-%d\ %H:%M)"
git add -A && git commit -m "$msg"
fi
echo "=== Kipinä Studio Deploy → kipina.studio ==="
# 1. Docker-image
echo "[1/4] Docker build..."
docker build --platform linux/amd64 -f Dockerfile.prod -t kipina-agentic:latest .
# 2. Pakkaus
echo "[2/4] Pakataan..."
docker save kipina-agentic:latest | gzip > /tmp/kipina-agentic.tar.gz
echo " $(du -h /tmp/kipina-agentic.tar.gz | cut -f1)"
# 3. Siirto
echo "[3/4] Siirretään..."
scp $SSH_OPTS /tmp/kipina-agentic.tar.gz "$SERVER:/tmp/"
scp $SSH_OPTS docker-compose.prod.yml Caddyfile.prod "$SERVER:$REMOTE_DIR/"
# 4. Käynnistys
echo "[4/4] Käynnistetään..."
ssh $SSH_OPTS "$SERVER" "gunzip -c /tmp/kipina-agentic.tar.gz | docker load && rm /tmp/kipina-agentic.tar.gz"
ssh $SSH_OPTS "$SERVER" "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d"
# Discord
WEBHOOK="https://discord.com/api/webhooks/1489504066898755687/8U02d0wug-3MkVax0xMmRoj0s_-V1psnNLPWdSOjnGnKRBUpPjaU6XiX9Iu8DgJI69AP"
HASH=$(git log -1 --pretty=format:"%h" 2>/dev/null || echo "?")
MSG=$(git log -1 --pretty=format:"%s" 2>/dev/null || echo "?")
PAYLOAD=$(python3 -c "import json,sys; print(json.dumps({'content':sys.argv[1]}))" \
"🚀 **Kipinä Studio julkaistu!** \`${HASH}\` ${MSG} https://kipina.studio")
curl -sf -H "Content-Type: application/json" -d "$PAYLOAD" "$WEBHOOK" >/dev/null || true
echo "=== Valmis! https://kipina.studio ==="

View File

@@ -1,14 +0,0 @@
#!/bin/bash
# Deploy + native-node-binäärien käännös (jos muutoksia)
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"
echo "=== Kipinä Studio Deploy (+ native binäärit) ==="
# 1. Käännetään native-node-binäärit (ohittaa automaattisesti jos ei muutoksia)
./build-binaries.sh
# 2. Ajetaan normaali deploy
exec ./deploy.sh

View File

@@ -1,68 +0,0 @@
#!/bin/bash
set -e
if [ "$1" == "local" ]; then
echo "=== Kipinä Studio Local Development ==="
echo "Käynnistetään kokonaisuus puhtaasti Docker-kontissa..."
docker compose up agentic-poc
exit 0
fi
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"
SERVER="ubuntu@86.50.252.98"
REMOTE_DIR="~/code/agentic-studio/network-poc"
KEY="$HOME/.ssh/id_rsa"
SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY"
if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then
echo "SSH-avain ei ole agentissa. Lisätään..."
ssh-add "$KEY"
fi
echo "=== Kipinä Studio Deploy ==="
# 0. Commitoidaan uncommitted muutokset ennen deployta
if ! git diff --quiet HEAD 2>/dev/null || \
[ -n "$(git ls-files --others --exclude-standard 2>/dev/null)" ]; then
echo "[0] Uncommitted muutoksia havaittu — commitoidaan..."
read -rp " Commit-viesti: " DEPLOY_MSG
if [ -z "$DEPLOY_MSG" ]; then
DEPLOY_MSG="Deploy $(date +%Y-%m-%d\ %H:%M)"
fi
git add -A
git commit -m "$DEPLOY_MSG"
echo " Commitoitu: $DEPLOY_MSG"
fi
# 1. Docker-image (frontend + hub + wasm)
echo "[1/4] Rakennetaan image..."
docker build --platform linux/amd64 -f Dockerfile.prod -t kipina-agentic:latest .
# 2. Pakataan
echo "[2/4] Pakataan image..."
docker save kipina-agentic:latest | gzip > /tmp/kipina-agentic.tar.gz
echo " Koko: $(du -h /tmp/kipina-agentic.tar.gz | cut -f1)"
# 3. Siirretään
echo "[3/4] Siirretään palvelimelle..."
scp $SSH_OPTS /tmp/kipina-agentic.tar.gz $SERVER:/tmp/
scp $SSH_OPTS docker-compose.prod.yml Caddyfile.prod $SERVER:$REMOTE_DIR/
# 4. Käynnistetään
echo "[4/4] Käynnistetään palvelimella..."
ssh $SSH_OPTS $SERVER "gunzip -c /tmp/kipina-agentic.tar.gz | docker load && rm /tmp/kipina-agentic.tar.gz"
ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d"
echo "=== Valmis! https://kipina.studio ==="
# Discord-notifikaatio
DISCORD_WEBHOOK="https://discord.com/api/webhooks/1489504066898755687/8U02d0wug-3MkVax0xMmRoj0s_-V1psnNLPWdSOjnGnKRBUpPjaU6XiX9Iu8DgJI69AP"
COMMIT_HASH=$(git log -1 --pretty=format:"%h" 2>/dev/null || echo "?")
COMMIT_MSG=$(git log -1 --pretty=format:"%s" 2>/dev/null || echo "?")
PAYLOAD=$(python3 -c "import json,sys; print(json.dumps({'content': sys.argv[1]}))" \
"🚀 **Kipinä Studio julkaistu!**
> \`${COMMIT_HASH}\` ${COMMIT_MSG}
> https://kipina.studio")
curl -s -H "Content-Type: application/json" -d "$PAYLOAD" "$DISCORD_WEBHOOK" > /dev/null

View File

@@ -613,7 +613,7 @@ OUTPUT FORMAT:
// === Terminal commands ===
const kpnCommands = {
'kpn': ['help','run','project','pipeline','stop','load','status','models','clear'],
'kpn run': ['coder','coder-3b','manager','tester','qa','qwen-coder','smollm-135m'],
'kpn run': ['coder','coder-3b','manager','tester','qa','qwen-coder'],
'kpn load': ['1','2'],
'kpn project': ['"'],
'kpn pipeline': ['"'],
@@ -703,9 +703,8 @@ OUTPUT FORMAT:
if (btn && btn.textContent.includes('Valmis')) { termLog(' ✓ Malli jo ladattu', '#3fb950'); }
else { btn?.click(); }
} else if (sub === 'models') {
termLog(' <span style="color:var(--accent)">1</span> qwen-coder Qwen2.5-Coder:0.5B <span style="color:#8b949e">~990 MB</span>');
termLog(' <span style="color:var(--accent)">2</span> qwen-coder-3b Qwen2.5-Coder:3B <span style="color:#8b949e">~6.2 GB</span>');
termLog(' <span style="color:var(--accent)">3</span> smollm-135m SmolLM 135M <span style="color:#8b949e">~270 MB</span>');
termLog(' <span style="color:var(--accent)">1</span> qwen-coder Qwen2.5-Coder:0.5B <span style="color:#8b949e">~990 MB (selain)</span>');
termLog(' <span style="color:var(--accent)">2</span> qwen-coder-3b Qwen2.5-Coder:3B <span style="color:#8b949e">~6.2 GB (Ollama)</span>');
} else if (sub === 'status') {
termLog(` Hub: ${document.getElementById('hub-label').textContent} | Laskenta: ${document.getElementById('compute-label').textContent}`, '#a5d6ff');
} else if (sub === 'run') {

View File

@@ -196,7 +196,7 @@ async function load() {
].map(s => `<div class="stat-card"><div class="val">${s.v}</div><div class="label">${s.l}</div></div>`).join('');
// Sessions — lajittelu: 1) aktiiviset nodet (online + ei viewer), 2) katsojat (online + viewer), 3) offline
const taskNames = {'tokenize':'Tokenisaatio','smollm-135m':'SmolLM 135M','qwen-05b':'Qwen2.5 0.5B','phi3-mini':'Phi-3 Mini','qwen-coder-05b':'Coder 0.5B','qwen-coder-3b':'Coder 3B','viewer':'Katsoja','codelab-viewer':'Koodilabra'};
const taskNames = {'tokenize':'Tokenisaatio','qwen-05b':'Qwen2.5 0.5B','qwen-coder-05b':'Coder 0.5B','qwen-coder-3b':'Coder 3B','viewer':'Katsoja','codelab-viewer':'Koodilabra'};
sessions.sort((a, b) => {
const aOnline = !a.disconnected_at;
const bOnline = !b.disconnected_at;
@@ -419,9 +419,7 @@ async fn main() {
// Vapaa node -> lähetetään oikea tehtävä
let msg = match task.as_str() {
"tokenize" => Some(serde_json::json!({ "type": "pair_task", "en": en, "fi": fi })),
"smollm-135m" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "smollm-135m" })),
"qwen-05b" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "qwen-05b" })),
"phi3-mini" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "phi3-mini" })),
_ => None, // Coder ja viewer ei saa auto-tehtäviä
};

View File

@@ -1,59 +0,0 @@
#!/bin/bash
# Kipinä Agentic Studio — asennusskripti (Debian/Ubuntu)
set -e
echo "=== Kipinä Agentic Studio — Asennus ==="
echo ""
# Tarkistetaan käyttöjärjestelmä
if [ ! -f /etc/debian_version ]; then
echo "⚠ Tämä skripti on suunniteltu Debian/Ubuntu-järjestelmille."
echo " Muilla jakeluilla voit asentaa riippuvuudet manuaalisesti."
read -p " Jatketaanko? (k/e) " -n 1 -r; echo
[[ $REPLY =~ ^[Kk]$ ]] || exit 1
fi
echo "[1/6] Päivitetään pakettilistaus..."
sudo apt-get update -qq
echo "[2/6] Asennetaan peruspaketteja..."
sudo apt-get install -y -qq curl git build-essential pkg-config libssl-dev
# Rust
if command -v rustc &>/dev/null; then
echo "[3/6] Rust löytyi: $(rustc --version)"
else
echo "[3/6] Asennetaan Rust..."
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
source "$HOME/.cargo/env"
fi
# Node.js (Astro-frontend vaatii)
if command -v node &>/dev/null; then
echo "[4/6] Node.js löytyi: $(node --version)"
else
echo "[4/6] Asennetaan Node.js 22..."
curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash -
sudo apt-get install -y -qq nodejs
fi
# Ollama
if command -v ollama &>/dev/null; then
echo "[5/6] Ollama löytyi"
else
echo "[5/6] Asennetaan Ollama..."
curl -fsSL https://ollama.ai/install.sh | sh
fi
# Malli
echo "[6/6] Ladataan kielimalli (qwen2.5-coder:3b)..."
ollama pull qwen2.5-coder:3b
echo ""
echo "=== Asennus valmis! ==="
echo ""
echo "Käynnistä:"
echo " cd $(pwd)"
echo " ./network-poc/local.sh"
echo ""
echo "Avaa selaimessa: http://localhost:3000"

View File

@@ -1,68 +0,0 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"
echo "=== Kipinä Studio Local Development ==="
# Tapetaan vanhat prosessit portissa 3000
if lsof -ti:3000 >/dev/null 2>&1; then
echo "[0] Vapautetaan portti 3000..."
lsof -ti:3000 | xargs kill -9 2>/dev/null || true
sleep 1
fi
# Frontend
echo "[1/3] Rakennetaan frontend..."
cd "$SCRIPT_DIR/frontend"
[ -d node_modules ] || npm install --silent
npm run build 2>&1 | tail -1
cd "$SCRIPT_DIR"
# Hub
echo "[2/3] Käynnistetään hub..."
STATIC_DIR="$SCRIPT_DIR/frontend/dist" cargo run -p hub 2>&1 &
HUB_PID=$!
# Odotetaan että hub on pystyssä
for i in $(seq 1 10); do
if curl -s -o /dev/null http://localhost:3000 2>/dev/null; then break; fi
sleep 1
done
# Native-node (jos Ollama on käynnissä)
NODE_PID=""
if curl -s http://localhost:11434/api/tags >/dev/null 2>&1; then
MODEL=$(curl -s http://localhost:11434/api/tags | python3 -c "
import sys, json
models = json.load(sys.stdin).get('models', [])
# Priorisoi: 7b > 3b > mikä tahansa coder > mikä tahansa
best = None
for m in models:
name = m['name']
if '7b' in name and 'coder' in name: best = name; break
if 'coder' in name and not best: best = name
if not best and models: best = models[0]['name']
if best: print(best)
" 2>/dev/null)
if [ -n "$MODEL" ]; then
echo "[3/3] Ollama: $MODEL — käynnistetään native-node..."
HUB_URL=ws://localhost:3000/ws OLLAMA_MODEL="$MODEL" cargo run -p native-node --no-default-features 2>&1 &
NODE_PID=$!
else
echo "[3/3] Ollama käynnissä mutta ei malleja — asenna: ollama pull qwen2.5-coder:7b"
fi
else
echo "[3/3] Ollama ei käynnissä — käytetään selaimen Wasm-laskentaa"
fi
echo ""
echo "=== http://localhost:3000 ==="
echo " Ctrl+C pysäyttää"
open http://localhost:3000 2>/dev/null || xdg-open http://localhost:3000 2>/dev/null || true
trap 'echo ""; echo "Pysäytetään..."; kill $HUB_PID $NODE_PID 2>/dev/null; exit 0' INT TERM
wait $HUB_PID

View File

@@ -1,118 +0,0 @@
use burn::module::{Module, Param};
use burn::tensor::{backend::Backend, Tensor};
use super::rope::RoPE;
use super::config::SmolLMConfig;
#[derive(Clone, Debug)]
pub struct KVCache<B: Backend> {
pub k: Tensor<B, 4>,
pub v: Tensor<B, 4>,
}
#[derive(Module, Debug)]
pub struct Attention<B: Backend> {
pub q_proj: Param<Tensor<B, 2>>, // [hidden, num_heads * head_dim]
pub k_proj: Param<Tensor<B, 2>>, // [hidden, num_kv_heads * head_dim]
pub v_proj: Param<Tensor<B, 2>>, // [hidden, num_kv_heads * head_dim]
pub o_proj: Param<Tensor<B, 2>>, // [num_heads * head_dim, hidden]
num_heads: usize,
num_kv_heads: usize,
head_dim: usize,
rope: RoPE<B>,
}
impl<B: Backend> Attention<B> {
pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self {
let head_dim = config.hidden_size / config.num_attention_heads;
Self {
q_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_attention_heads * head_dim], device)),
k_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_key_value_heads * head_dim], device)),
v_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_key_value_heads * head_dim], device)),
o_proj: Param::from_tensor(Tensor::zeros([config.num_attention_heads * head_dim, config.hidden_size], device)),
num_heads: config.num_attention_heads,
num_kv_heads: config.num_key_value_heads,
head_dim,
rope: RoPE::new(head_dim, config.max_position_embeddings, config.rope_theta, device),
}
}
pub fn forward(
&self,
x: Tensor<B, 3>,
offset: usize,
cache: Option<KVCache<B>>
) -> (Tensor<B, 3>, KVCache<B>) {
let [batch, seq_len, hidden_dim] = x.dims();
// Project Q, K, V: x @ W -> [batch, seq, proj_dim]
let q = x.clone().matmul(self.q_proj.val().unsqueeze());
let k = x.clone().matmul(self.k_proj.val().unsqueeze());
let v = x.matmul(self.v_proj.val().unsqueeze());
// Reshape: [batch, seq, heads, head_dim] -> [batch, heads, seq, head_dim]
let q = q.reshape([batch, seq_len, self.num_heads, self.head_dim]).swap_dims(1, 2);
let k = k.reshape([batch, seq_len, self.num_kv_heads, self.head_dim]).swap_dims(1, 2);
let v = v.reshape([batch, seq_len, self.num_kv_heads, self.head_dim]).swap_dims(1, 2);
// Apply RoPE
let q = self.rope.forward(q, offset);
let k = self.rope.forward(k, offset);
// KV cache
let (k, v) = if let Some(c) = cache {
(Tensor::cat(vec![c.k, k], 2), Tensor::cat(vec![c.v, v], 2))
} else {
(k, v)
};
let new_cache = KVCache { k: k.clone(), v: v.clone() };
let kv_len = k.dims()[2];
// GQA: repeat K,V heads — [batch, kv_heads, kv_len, hd] -> [batch, num_heads, kv_len, hd]
let num_reps = self.num_heads / self.num_kv_heads;
let k = if num_reps > 1 {
let [b, kv_h, s, hd] = k.dims();
k.reshape([b, kv_h, 1, s, hd]).repeat_dim(2, num_reps).reshape([b, self.num_heads, s, hd])
} else { k };
let v = if num_reps > 1 {
let [b, kv_h, s, hd] = v.dims();
v.reshape([b, kv_h, 1, s, hd]).repeat_dim(2, num_reps).reshape([b, self.num_heads, s, hd])
} else { v };
// Attention: Q @ K^T / sqrt(d)
let scale = 1.0 / (self.head_dim as f64).sqrt();
let scores = q.matmul(k.swap_dims(2, 3)).mul_scalar(scale);
// scores: [batch, heads, seq_len, kv_len]
// Causal mask for prefill (seq_len > 1)
let scores = if seq_len > 1 {
let mask_data: Vec<f32> = (0..seq_len).flat_map(|i| {
(0..kv_len).map(move |j| {
if j > offset + i { f32::NEG_INFINITY } else { 0.0 }
})
}).collect();
let mask = Tensor::<B, 2>::from_data(
burn::tensor::TensorData::new(mask_data, [seq_len, kv_len]),
&scores.device()
).reshape([1, 1, seq_len, kv_len]);
scores + mask
} else {
scores
};
let attn_weights = burn::tensor::activation::softmax(scores, 3);
let context = attn_weights.matmul(v);
// [batch, heads, seq, hd] -> [batch, seq, heads*hd]
let context = context.swap_dims(1, 2).reshape([batch, seq_len, self.num_heads * self.head_dim]);
let output = context.matmul(self.o_proj.val().unsqueeze());
(output, new_cache)
}
}

View File

@@ -1,28 +0,0 @@
#[derive(Clone, Debug)]
pub struct SmolLMConfig {
pub hidden_size: usize,
pub intermediate_size: usize,
pub vocab_size: usize,
pub num_hidden_layers: usize,
pub num_attention_heads: usize,
pub num_key_value_heads: usize,
pub rms_norm_eps: f64,
pub rope_theta: f32,
pub max_position_embeddings: usize,
}
impl Default for SmolLMConfig {
fn default() -> Self {
Self {
hidden_size: 576,
intermediate_size: 1536,
vocab_size: 49152,
num_hidden_layers: 30,
num_attention_heads: 9,
num_key_value_heads: 3,
rms_norm_eps: 1e-5,
rope_theta: 10000.0,
max_position_embeddings: 2048,
}
}
}

View File

@@ -1,90 +0,0 @@
use burn::tensor::{backend::Backend, Tensor, TensorData};
use candle_core::safetensors;
use candle_core::Device as CandleDevice;
use burn::module::Param;
use super::model::LlamaModel;
use super::config::SmolLMConfig;
fn load_tensor_2d<B: Backend>(
tensors_map: &std::collections::HashMap<String, candle_core::Tensor>,
name: &str,
device: &B::Device,
shape_out_in: [usize; 2]
) -> Result<Param<Tensor<B, 2>>, String> {
let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?;
let t = t.to_dtype(candle_core::DType::F32).unwrap();
let vec = t.flatten_all().unwrap().to_vec1::<f32>().unwrap();
let t_burn = Tensor::<B, 2>::from_data(burn::tensor::TensorData::new(vec, shape_out_in), device);
// transpose from [out, in] to [in, out]
Ok(Param::from_tensor(t_burn.transpose()))
}
fn load_tensor_1d<B: Backend>(
tensors_map: &std::collections::HashMap<String, candle_core::Tensor>,
name: &str,
device: &B::Device,
_shape: [usize; 1]
) -> Result<Param<Tensor<B, 1>>, String> {
let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?;
let t = t.to_dtype(candle_core::DType::F32).unwrap();
let vec = t.flatten_all().unwrap().to_vec1::<f32>().unwrap();
Ok(Param::from_tensor(Tensor::<B, 1>::from_floats(vec.as_slice(), device)))
}
fn load_embed<B: Backend>(
tensors_map: &std::collections::HashMap<String, candle_core::Tensor>,
name: &str,
device: &B::Device,
shape: [usize; 2]
) -> Result<Param<Tensor<B, 2>>, String> {
let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?;
let t = t.to_dtype(candle_core::DType::F32).unwrap();
let vec = t.flatten_all().unwrap().to_vec1::<f32>().unwrap();
// Embed ei transponoi samalla tavalla, se pysyy [vocab, hidden]
Ok(Param::from_tensor(Tensor::<B, 2>::from_data(burn::tensor::TensorData::new(vec, shape), device)))
}
pub fn load_safetensors_to_model<B: Backend>(
buffer: &[u8],
config: &SmolLMConfig,
device: &B::Device
) -> Result<LlamaModel<B>, String> {
let mut model = LlamaModel::new(config, device);
let tensors_map = safetensors::load_buffer(buffer, &CandleDevice::Cpu)
.map_err(|e| format!("Virhe Safetensors luennassa: {}", e))?;
// Embeddings
model.embed_tokens = load_embed(&tensors_map, "model.embed_tokens.weight", device, [config.vocab_size, config.hidden_size])?;
model.norm.weight = load_tensor_1d(&tensors_map, "model.norm.weight", device, [config.hidden_size])?;
model.lm_head = load_embed(&tensors_map, "lm_head.weight", device, [config.vocab_size, config.hidden_size]).or_else(|_| {
load_embed(&tensors_map, "model.embed_tokens.weight", device, [config.vocab_size, config.hidden_size])
})?;
let head_dim = config.hidden_size / config.num_attention_heads;
for i in 0..config.num_hidden_layers {
let prefix = format!("model.layers.{}", i);
let layer = &mut model.layers[i];
// Norms
layer.input_layernorm.weight = load_tensor_1d(&tensors_map, &format!("{}.input_layernorm.weight", prefix), device, [config.hidden_size])?;
layer.post_attention_layernorm.weight = load_tensor_1d(&tensors_map, &format!("{}.post_attention_layernorm.weight", prefix), device, [config.hidden_size])?;
// Attention
let num_heads = config.num_attention_heads;
let num_kv_heads = config.num_key_value_heads;
layer.self_attn.q_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.q_proj.weight", prefix), device, [num_heads * head_dim, config.hidden_size])?;
layer.self_attn.k_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.k_proj.weight", prefix), device, [num_kv_heads * head_dim, config.hidden_size])?;
layer.self_attn.v_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.v_proj.weight", prefix), device, [num_kv_heads * head_dim, config.hidden_size])?;
layer.self_attn.o_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.o_proj.weight", prefix), device, [config.hidden_size, num_heads * head_dim])?;
// MLP
layer.mlp.gate_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.gate_proj.weight", prefix), device, [config.intermediate_size, config.hidden_size])?;
layer.mlp.up_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.up_proj.weight", prefix), device, [config.intermediate_size, config.hidden_size])?;
layer.mlp.down_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.down_proj.weight", prefix), device, [config.hidden_size, config.intermediate_size])?;
}
Ok(model)
}

View File

@@ -1,6 +0,0 @@
pub mod attention;
pub mod config;
pub mod loader;
pub mod model;
pub mod modules;
pub mod rope;

View File

@@ -1,96 +0,0 @@
use burn::module::{Module, Param};
use burn::tensor::{backend::Backend, Tensor, Int};
use super::modules::{RmsNorm, Mlp};
use super::attention::{Attention, KVCache};
use super::config::SmolLMConfig;
#[derive(Module, Debug)]
pub struct LlamaBlock<B: Backend> {
pub self_attn: Attention<B>,
pub mlp: Mlp<B>,
pub input_layernorm: RmsNorm<B>,
pub post_attention_layernorm: RmsNorm<B>,
}
impl<B: Backend> LlamaBlock<B> {
pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self {
Self {
self_attn: Attention::new(config, device),
mlp: Mlp::new(config.hidden_size, config.intermediate_size, device),
input_layernorm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device),
post_attention_layernorm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device),
}
}
pub fn forward(
&self,
x: Tensor<B, 3>,
offset: usize,
cache: Option<KVCache<B>>
) -> (Tensor<B, 3>, KVCache<B>) {
let residual = x.clone();
let x_norm = self.input_layernorm.forward(x);
let (attn_out, new_cache) = self.self_attn.forward(x_norm, offset, cache);
let x = residual + attn_out;
let residual = x.clone();
let x_norm = self.post_attention_layernorm.forward(x);
let mlp_out = self.mlp.forward(x_norm);
let x = residual + mlp_out;
(x, new_cache)
}
}
#[derive(Module, Debug)]
pub struct LlamaModel<B: Backend> {
pub embed_tokens: Param<Tensor<B, 2>>,
pub layers: Vec<LlamaBlock<B>>,
pub norm: RmsNorm<B>,
pub lm_head: Param<Tensor<B, 2>>, // For tie_word_embeddings this can point to embed_tokens
}
impl<B: Backend> LlamaModel<B> {
pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self {
let embed = Tensor::zeros([config.vocab_size, config.hidden_size], device);
let lm_head = Tensor::zeros([config.vocab_size, config.hidden_size], device);
let mut layers = Vec::new();
for _ in 0..config.num_hidden_layers {
layers.push(LlamaBlock::new(config, device));
}
Self {
embed_tokens: Param::from_tensor(embed),
layers,
norm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device),
lm_head: Param::from_tensor(lm_head),
}
}
pub fn forward(
&self,
input_ids: Tensor<B, 2, Int>,
offset: usize,
caches: &mut Vec<Option<KVCache<B>>>
) -> Tensor<B, 3> {
let [_batch, _seq_len] = input_ids.dims();
let mut x = burn::tensor::module::embedding(self.embed_tokens.val(), input_ids);
for (i, layer) in self.layers.iter().enumerate() {
let cache = caches[i].take();
let (out, new_cache) = layer.forward(x, offset, cache);
x = out;
caches[i] = Some(new_cache);
}
x = self.norm.forward(x);
// Matmul with lm_head (or embed_tokens if tied) to get logits
// Notice: lm_head is typically [vocab_size, hidden_size] in HF, so we swap dims
x.matmul(self.lm_head.val().swap_dims(0, 1).unsqueeze())
}
}

View File

@@ -1,59 +0,0 @@
use burn::module::{Module, Param};
use burn::tensor::{backend::Backend, Tensor};
#[derive(Module, Debug)]
pub struct RmsNorm<B: Backend> {
pub weight: Param<Tensor<B, 1>>,
epsilon: f64,
}
impl<B: Backend> RmsNorm<B> {
pub fn new(size: usize, epsilon: f64, device: &B::Device) -> Self {
let weight = Param::from_tensor(Tensor::ones([size], device));
Self { weight, epsilon }
}
pub fn forward(&self, x: Tensor<B, 3>) -> Tensor<B, 3> {
// x: [batch, seq_len, dim]
// RMSNorm: x * weight / sqrt(mean(x^2) + eps)
let x_sq = x.clone().powf_scalar(2.0);
// mean over last dim, keeping dims for broadcast
let [b, s, d] = x_sq.dims();
let variance = x_sq.sum_dim(2).div_scalar(d as f32);
let norm = x.div(variance.add_scalar(self.epsilon).sqrt());
let w = self.weight.val().unsqueeze::<2>().unsqueeze::<3>().reshape([1, 1, d]);
norm * w
}
}
#[derive(Module, Debug)]
pub struct Mlp<B: Backend> {
pub gate_proj: Param<Tensor<B, 2>>, // [in, intermediate]
pub up_proj: Param<Tensor<B, 2>>, // [in, intermediate]
pub down_proj: Param<Tensor<B, 2>>, // [intermediate, out]
}
impl<B: Backend> Mlp<B> {
pub fn new(hidden_size: usize, intermediate_size: usize, device: &B::Device) -> Self {
Self {
gate_proj: Param::from_tensor(Tensor::zeros([hidden_size, intermediate_size], device)),
up_proj: Param::from_tensor(Tensor::zeros([hidden_size, intermediate_size], device)),
down_proj: Param::from_tensor(Tensor::zeros([intermediate_size, hidden_size], device)),
}
}
pub fn forward(&self, x: Tensor<B, 3>) -> Tensor<B, 3> {
// x: [batch, seq, hidden]
// gate = x @ gate_proj -> [batch, seq, intermediate]
let gate = x.clone().matmul(self.gate_proj.val().unsqueeze());
let up = x.matmul(self.up_proj.val().unsqueeze());
// SiLU(gate) * up
let silu = gate.clone() * burn::tensor::activation::sigmoid(gate);
let intermediate = silu * up;
// intermediate @ down_proj -> [batch, seq, hidden]
intermediate.matmul(self.down_proj.val().unsqueeze())
}
}

View File

@@ -1,59 +0,0 @@
use burn::module::Module;
use burn::tensor::{backend::Backend, Tensor};
#[derive(Module, Debug)]
pub struct RoPE<B: Backend> {
cos_cache: Tensor<B, 2>,
sin_cache: Tensor<B, 2>,
}
impl<B: Backend> RoPE<B> {
pub fn new(head_dim: usize, max_seq_len: usize, theta: f32, device: &B::Device) -> Self {
// (head_dim / 2) values
let half_dim = head_dim / 2;
let inv_freq: Vec<f32> = (0..half_dim)
.map(|i| 1.0 / theta.powf((2 * i) as f32 / head_dim as f32))
.collect();
let inv_freq = Tensor::<B, 1>::from_floats(inv_freq.as_slice(), device).unsqueeze::<2>();
let t_floats: Vec<f32> = (0..max_seq_len).map(|v| v as f32).collect();
let t = Tensor::<B, 1>::from_floats(t_floats.as_slice(), device).unsqueeze::<2>().transpose();
// t shape: [max_seq_len, 1]
// inv_freq shape: [1, half_dim]
// freqs shape: [max_seq_len, half_dim]
let freqs = t.matmul(inv_freq);
let cos_cache = freqs.clone().cos();
let sin_cache = freqs.sin();
Self {
cos_cache,
sin_cache,
}
}
pub fn forward(&self, x: Tensor<B, 4>, offset: usize) -> Tensor<B, 4> {
let [batch, heads, seq_len, head_dim] = x.dims();
let half_dim = head_dim / 2;
// x shape: [batch, heads, seq_len, head_dim]
// valitaan viipaleet (x1 ja x2) jotta saadaan pyöritettyä rotaatiot
let x1 = x.clone().slice([0..batch, 0..heads, 0..seq_len, 0..half_dim]);
let x2 = x.clone().slice([0..batch, 0..heads, 0..seq_len, half_dim..head_dim]);
// haetaan vastaava seq offsetista alkaen
let cos = self.cos_cache.clone().slice([offset..offset+seq_len, 0..half_dim])
.unsqueeze::<4>() // [seq, half_dim, 1]
.reshape([1, 1, seq_len, half_dim]);
let sin = self.sin_cache.clone().slice([offset..offset+seq_len, 0..half_dim])
.reshape([1, 1, seq_len, half_dim]);
// x1 * cos - x2 * sin
let o1 = x1.clone().mul(cos.clone()) - x2.clone().mul(sin.clone());
// x2 * cos + x1 * sin
let o2 = x2.mul(cos) + x1.mul(sin);
Tensor::cat(vec![o1, o2], 3)
}
}

View File

@@ -8,11 +8,8 @@ use burn::backend::{Wgpu, NdArray};
pub mod storage;
pub mod sampling;
pub mod smollm;
pub mod qwen;
pub mod qwen_coder;
pub mod phi3;
pub mod burn_smollm;
#[macro_export]
macro_rules! console_log {
@@ -246,7 +243,7 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
HAS_WEBGPU.store(has_webgpu, Ordering::SeqCst);
SELECTED_TASK.store(task_id, Ordering::SeqCst);
let backend_name = if has_webgpu { "WebGPU" } else { "CPU (NdArray)" };
let task_names = ["tokenize", "smollm-135m", "qwen-05b", "phi3-mini", "qwen-coder-05b", "qwen-coder-3b"];
let task_names = ["tokenize", "qwen-05b", "qwen-coder-05b", "qwen-coder-3b"];
let task_name = task_names.get(task_id as usize).unwrap_or(&"tokenize");
console_log!("Kipinä Agent Node käynnistyy — backend: {} | tehtävä: {}", backend_name, task_name);
@@ -303,22 +300,6 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
}
}
} else if msg.contains("llm_prompt") && current_task == 1 && auto_on {
// Vain SmolLM-solmut, ja vain yksi inferenssi kerrallaan
if LLM_BUSY.load(Ordering::SeqCst) {
// Ohitetaan — edellinen inferenssi vielä käynnissä
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
if !prompt.is_empty() && model == "smollm-135m" {
LLM_BUSY.store(true, Ordering::SeqCst);
let ws_for_async = ws_clone.clone();
wasm_bindgen_futures::spawn_local(async move {
smollm::run_smollm_inference(prompt, ws_for_async).await;
LLM_BUSY.store(false, Ordering::SeqCst);
});
}
}
} else if msg.contains("llm_prompt") && current_task == 2 && auto_on {
// Qwen2.5-0.5B
if LLM_BUSY.load(Ordering::SeqCst) {
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
@@ -333,21 +314,6 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
});
}
}
} else if msg.contains("llm_prompt") && current_task == 3 && auto_on {
// Phi-3 Mini
if LLM_BUSY.load(Ordering::SeqCst) {
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
if !prompt.is_empty() && model.starts_with("phi3-mini") {
LLM_BUSY.store(true, Ordering::SeqCst);
let ws_for_async = ws_clone.clone();
wasm_bindgen_futures::spawn_local(async move {
phi3::run_phi3_inference(prompt, ws_for_async).await;
LLM_BUSY.store(false, Ordering::SeqCst);
});
}
}
} else if msg.contains("llm_prompt") {
console_log!("[DEBUG] llm_prompt vastaanotettu! current_task={}, busy={}", current_task, LLM_BUSY.load(Ordering::SeqCst));
if current_task == 4 || current_task == 5 {

View File

@@ -1,36 +0,0 @@
use candle_core::{Device, Tensor, DType};
use candle_nn::VarBuilder;
use candle_transformers::models::phi3::{Config as Phi3Config, Model as Phi3Model};
use wasm_bindgen::JsCast;
use std::cell::RefCell;
use std::rc::Rc;
use web_sys::WebSocket;
use crate::storage;
macro_rules! console_log {
($($t:tt)*) => (web_sys::console::log_1(&format_args!($($t)*).to_string().into()))
}
const MODEL_URL: &str = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/model.safetensors.index.json";
const TOKENIZER_URL: &str = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.json";
// Phi-3 Mini on iso (7.6 GB) — käytetään kvantisoidumpaa versiota myöhemmin
// Tällä hetkellä: placeholder joka raportoi koon ja jättää inferenssin väliin
pub async fn run_phi3_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
console_log!("[Phi-3] Phi-3 Mini 3.8B on liian suuri selaimessa ajettavaksi (~7.6 GB).");
console_log!("[Phi-3] Käytä SmolLM 135M tai Qwen2.5 0.5B selaininferenssiin.");
console_log!("[Phi-3] Phi-3 tuetaan native-node:lla (Docker + GPU).");
let done = serde_json::json!({
"type": "llm_done",
"prompt": prompt,
"model": "Phi-3-Mini (ei tuettu selaimessa)",
"response": "Phi-3 Mini 3.8B on liian suuri selaimessa ajettavaksi. Käytä SmolLM 135M tai Qwen2.5 0.5B.",
"tokens_generated": 0,
"duration_ms": 0,
"tokens_per_sec": 0,
"load_time_ms": 0,
});
let _ = ws.borrow().send_with_str(&done.to_string());
}

View File

@@ -1,232 +0,0 @@
use candle_core::{Device, Tensor, DType};
use candle_nn::VarBuilder;
use candle_transformers::models::llama::{Llama, LlamaConfig, LlamaEosToks, Cache};
// LogitsProcessor poistettu — käytetään greedy samplingia (argmax) Wasm-yhteensopivuuden vuoksi
use wasm_bindgen::JsCast;
use std::cell::RefCell;
use std::rc::Rc;
use web_sys::WebSocket;
use crate::storage;
macro_rules! console_log {
($($t:tt)*) => (web_sys::console::log_1(&format_args!($($t)*).to_string().into()))
}
const MODEL_URL: &str = "https://huggingface.co/HuggingFaceTB/SmolLM-135M-Instruct/resolve/main/model.safetensors";
const TOKENIZER_URL: &str = "https://huggingface.co/HuggingFaceTB/SmolLM-135M-Instruct/resolve/main/tokenizer.json";
/// Lataa tiedosto HuggingFacesta streaming-latauksella (progress-ilmoitukset) ja tallentaa IndexedDB:hen
async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Result<Vec<u8>, String> {
if let Ok(Some(bytes)) = storage::load_from_idb(key).await {
console_log!("[SmolLM] {} löytyi välimuistista ({} MB)", key, bytes.len() / 1024 / 1024);
send_progress(ws, key, 100, bytes.len(), bytes.len());
return Ok(bytes);
}
console_log!("[SmolLM] Ladataan {}...", key);
send_progress(ws, key, 0, 0, 0);
// Fetch API:lla saadaan Content-Length ja streaming-luku
let resp = crate::worker_fetch(url).await?;
if !resp.ok() {
return Err(format!("HTTP {}", resp.status()));
}
// Kokonaiskoko Content-Length-headerista
let total_size: usize = resp.headers()
.get("content-length").ok().flatten()
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let body = resp.body().ok_or("Ei bodyä")?;
let reader = body.get_reader();
let reader: web_sys::ReadableStreamDefaultReader = reader.dyn_into().map_err(|_| "Ei ReadableStreamDefaultReader".to_string())?;
let mut data: Vec<u8> = Vec::with_capacity(total_size);
let mut last_pct: u32 = 0;
loop {
let chunk = wasm_bindgen_futures::JsFuture::from(reader.read())
.await.map_err(|e| format!("Luku epäonnistui: {:?}", e))?;
let done = js_sys::Reflect::get(&chunk, &"done".into())
.map_err(|_| "done-kenttä puuttuu".to_string())?
.as_bool().unwrap_or(true);
if done { break; }
let value = js_sys::Reflect::get(&chunk, &"value".into())
.map_err(|_| "value-kenttä puuttuu".to_string())?;
let array = js_sys::Uint8Array::new(&value);
let mut buf = vec![0u8; array.length() as usize];
array.copy_to(&mut buf);
data.extend_from_slice(&buf);
// Progress-päivitys (joka 5%)
if total_size > 0 {
let pct = ((data.len() as f64 / total_size as f64) * 100.0) as u32;
if pct >= last_pct + 5 || pct == 100 {
last_pct = pct;
console_log!("[SmolLM] {} lataus: {}% ({}/{} MB)", key, pct, data.len() / 1024 / 1024, total_size / 1024 / 1024);
send_progress(ws, key, pct, data.len(), total_size);
}
}
}
console_log!("[SmolLM] Tallennetaan {} ({} MB) IndexedDB:hen...", key, data.len() / 1024 / 1024);
let _ = storage::save_to_idb(key, &data).await;
console_log!("[SmolLM] {} tallennettu!", key);
send_progress(ws, key, 100, data.len(), data.len());
Ok(data)
}
fn send_progress(ws: &Rc<RefCell<WebSocket>>, file: &str, pct: u32, loaded: usize, total: usize) {
let msg = serde_json::json!({
"type": "download_progress",
"file": file,
"pct": pct,
"loaded_mb": loaded / 1024 / 1024,
"total_mb": total / 1024 / 1024,
});
let _ = ws.borrow().send_with_str(&msg.to_string());
}
/// Lataa malli ja tokenizer, suorita inferenssi ja streamaa tokenit hubille
pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
// performance via crate::perf_now()
// 1. Lataa tokenizer
let tok_bytes = match ensure_cached("smollm-tokenizer.json", TOKENIZER_URL, &ws).await {
Ok(b) => b,
Err(e) => { console_log!("[SmolLM] Tokenizer-virhe: {}", e); return; }
};
let tokenizer = match tokenizers::Tokenizer::from_bytes(&tok_bytes) {
Ok(t) => t,
Err(e) => { console_log!("[SmolLM] Tokenizer-parsinta epäonnistui: {}", e); return; }
};
// 2. Lataa mallin painot
let model_bytes = match ensure_cached("smollm-model.safetensors", MODEL_URL, &ws).await {
Ok(b) => b,
Err(e) => { console_log!("[SmolLM] Malli-virhe: {}", e); return; }
};
// Burn 0.14 wgpu ei yhteensopiva nykyisten selainten kanssa (maxInterStageShaderComponents)
// Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu)
// → NdArray kunnes Burn 0.21 stable + Wasm-tuki
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws).await;
}
async fn run_burn_inference<B: burn::tensor::backend::Backend>(
prompt: String,
model_bytes: Vec<u8>,
tokenizer: tokenizers::Tokenizer,
ws: Rc<RefCell<WebSocket>>,
) {
let start_load = crate::perf_now();
let device = Default::default();
let config = crate::burn_smollm::config::SmolLMConfig::default();
console_log!("[SmolLM] Injektoidaan Safetensors -> Burn Params...");
let model = match crate::burn_smollm::loader::load_safetensors_to_model::<B>(&model_bytes, &config, &device) {
Ok(m) => m,
Err(e) => { console_log!("[SmolLM] Lataus epäonnistui: {}", e); return; }
};
let load_time = crate::perf_now() - start_load;
console_log!("[SmolLM] Burn-malli ladattu ({:.0}ms). Generoidaan...", load_time);
let formatted_prompt = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
let encoding = match tokenizer.encode(formatted_prompt.as_str(), true) {
Ok(e) => e,
Err(e) => { console_log!("[SmolLM] Tokenisointivirhe: {}", e); return; }
};
let mut input_ids: Vec<u32> = encoding.get_ids().to_vec();
let input_len = input_ids.len();
console_log!("[SmolLM] Syöte: {} tokenia", input_len);
let start_gen = crate::perf_now();
let max_new_tokens = 32;
let mut generated_text = String::new();
let mut tokens_generated: usize = 0;
// KV-välimuistin taulukko kerroksittain
let mut caches: Vec<Option<crate::burn_smollm::attention::KVCache<B>>> = vec![None; config.num_hidden_layers];
let mut current_offset = 0;
// Prefill: yksitellen, vältetään future token leakage koska ei causal maskia
let input_ids_i32: Vec<i32> = input_ids.iter().map(|&x| x as i32).collect();
let mut last_logits = None;
for &id in &input_ids_i32 {
let input_tensor = burn::tensor::Tensor::<B, 1, burn::tensor::Int>::from_data(
burn::tensor::TensorData::from([id]),
&device
).unsqueeze::<2>(); // [1, 1]
last_logits = Some(model.forward(input_tensor, current_offset, &mut caches));
current_offset += 1;
}
let mut logits = last_logits.unwrap();
// Argmax sämpläys
let next_token_tensor = logits.clone().argmax(2);
let mut next_token: u32 = next_token_tensor.into_scalar().to_string().parse().unwrap_or(2); // Yksinkertainen cast koska int scalar
if next_token != 2 {
if let Ok(text) = tokenizer.decode(&[next_token], true) {
generated_text.push_str(&text);
let chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "SmolLM-135M (WebGPU)" });
let _ = ws.borrow().send_with_str(&chunk.to_string());
}
tokens_generated += 1;
}
// Autoregressiivinen luuppi
for _ in 1..max_new_tokens {
if next_token == 2 { break; }
let mut input_tensor = burn::tensor::Tensor::<B, 1, burn::tensor::Int>::from_data(
burn::tensor::TensorData::from([next_token as i32]),
&device
).unsqueeze::<2>();
logits = model.forward(input_tensor, current_offset, &mut caches);
current_offset += 1;
let next_token_tensor = logits.argmax(2);
next_token = next_token_tensor.into_scalar().to_string().parse().unwrap_or(2);
if next_token == 2 { break; }
if let Ok(text) = tokenizer.decode(&[next_token], true) {
generated_text.push_str(&text);
let chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "SmolLM-135M (WebGPU)" });
let _ = ws.borrow().send_with_str(&chunk.to_string());
}
tokens_generated += 1;
}
let gen_time = crate::perf_now() - start_gen;
let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 };
let done = serde_json::json!({
"type": "llm_done",
"prompt": prompt,
"model": "SmolLM-135M-Instruct (WebGPU)",
"response": generated_text,
"tokens_generated": tokens_generated,
"duration_ms": (gen_time * 100.0).round() / 100.0,
"tokens_per_sec": (tokens_per_sec * 10.0).round() / 10.0,
"load_time_ms": (load_time * 100.0).round() / 100.0,
});
let _ = ws.borrow().send_with_str(&done.to_string());
}

Binary file not shown.

View File

@@ -1,33 +0,0 @@
#!/usr/bin/env bash
# Kipinä Agentic Network lokaali pikatesti
# Tämä ohjelma käynnistää lokaalin Kipinä Hubin taustalle, ja heittää sen jälkeen näkyviin
# visuaalisen Natiivisolmun Ratatui-ruudun yhdistäen sen automaattisesti siihen.
# Kun poistut Ratatui-näytöstä (esim painamalla Q), niin skripti sammuttaa siististi
# myös taustalla pyörivän lokaalin Hubin!
cd "$(dirname "$0")"
echo "========================================="
echo " 🔥 Kipinä Agentic Network - Local Run 🔥"
echo "========================================="
echo "Varmistetaan portin 3000 vapaus..."
lsof -ti :3000 | xargs kill -9 2>/dev/null || true
echo ""
echo "Käynnistetään Kipinä Hub taustalle..."
# Ohjataan Hubin logit erilliseen tiedostoon jottei se sotke näkymää!
env STATIC_DIR=frontend/dist cargo run -p hub > hub-local.log 2>&1 &
HUB_PID=$!
# Odotellaan, että Hub saa portit kuunteluun
sleep 2
echo "Käynnistetään Natiivisolmu ja Ratatui-dashboard..."
# Käynnistetään TUI ja pakotetaan yhdistämään lokaaliin Hubiin.
env -u OLLAMA_MODEL HUB_URL=ws://127.0.0.1:3000/ws cargo run -p native-node
# Kun TUI ohjelmasta on poistuttu
echo ""
echo "Dashboard suljettu! Ajetaan lokaali Hub (#$HUB_PID) siististi alas..."
kill $HUB_PID
echo "Kaikki sammutettu. Kiitos!"