From 5f147b774f943a4177cda84decc4c93883ddcceb Mon Sep 17 00:00:00 2001 From: Jaakko Vanhala Date: Sun, 12 Apr 2026 11:41:09 +0300 Subject: [PATCH] deployment kokonaan uusiksi --- network-poc/build-binaries.sh | 63 ----- network-poc/deploy-binaries.sh | 30 --- network-poc/deploy-fast.sh | 28 --- network-poc/deploy-light.sh | 33 --- network-poc/deploy-local.sh | 56 +++++ network-poc/deploy-remote.sh | 58 +++++ network-poc/deploy-with-native.sh | 14 -- network-poc/deploy.sh | 68 ----- network-poc/frontend/src/pages/index.astro | 7 +- network-poc/hub/src/main.rs | 4 +- network-poc/install.sh | 59 ----- network-poc/local.sh | 68 ----- network-poc/node/src/burn_smollm/attention.rs | 118 --------- network-poc/node/src/burn_smollm/config.rs | 28 --- network-poc/node/src/burn_smollm/loader.rs | 90 ------- network-poc/node/src/burn_smollm/mod.rs | 6 - network-poc/node/src/burn_smollm/model.rs | 96 -------- network-poc/node/src/burn_smollm/modules.rs | 59 ----- network-poc/node/src/burn_smollm/rope.rs | 59 ----- network-poc/node/src/lib.rs | 36 +-- network-poc/node/src/phi3.rs | 36 --- network-poc/node/src/smollm.rs | 232 ------------------ network-poc/nodes.db | Bin 40960 -> 40960 bytes network-poc/start-local.sh | 33 --- 24 files changed, 119 insertions(+), 1162 deletions(-) delete mode 100755 network-poc/build-binaries.sh delete mode 100755 network-poc/deploy-binaries.sh delete mode 100755 network-poc/deploy-fast.sh delete mode 100755 network-poc/deploy-light.sh create mode 100755 network-poc/deploy-local.sh create mode 100755 network-poc/deploy-remote.sh delete mode 100755 network-poc/deploy-with-native.sh delete mode 100755 network-poc/deploy.sh delete mode 100755 network-poc/install.sh delete mode 100755 network-poc/local.sh delete mode 100644 network-poc/node/src/burn_smollm/attention.rs delete mode 100644 network-poc/node/src/burn_smollm/config.rs delete mode 100644 network-poc/node/src/burn_smollm/loader.rs delete mode 100644 network-poc/node/src/burn_smollm/mod.rs delete mode 100644 network-poc/node/src/burn_smollm/model.rs delete mode 100644 network-poc/node/src/burn_smollm/modules.rs delete mode 100644 network-poc/node/src/burn_smollm/rope.rs delete mode 100644 network-poc/node/src/phi3.rs delete mode 100644 network-poc/node/src/smollm.rs delete mode 100755 network-poc/start-local.sh diff --git a/network-poc/build-binaries.sh b/network-poc/build-binaries.sh deleted file mode 100755 index cff3811..0000000 --- a/network-poc/build-binaries.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -# Käännä kipina-node binäärit kaikille alustoille -set -e - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -OUT="$SCRIPT_DIR/frontend/public/download" -HASH_FILE="$OUT/.build-hash" -mkdir -p "$OUT" - -# Tarkistetaan onko native-node muuttunut edellisen buildin jälkeen -CURRENT_HASH=$(git -C "$SCRIPT_DIR" log -1 --format=%H -- native-node/ Cargo.toml Cargo.lock) -if [ -n "$(git -C "$SCRIPT_DIR" status --porcelain -- native-node/ Cargo.toml Cargo.lock)" ]; then - CURRENT_HASH="dirty-$(git -C "$SCRIPT_DIR" status --porcelain | md5sum | awk '{print $1}')" -fi - -if [ -f "$HASH_FILE" ] && [ "$(cat "$HASH_FILE")" = "$CURRENT_HASH" ]; then - echo "=== Kipinä Node — ei muutoksia, ohitetaan build ===" - ls -lh "$OUT"/kipina-node-* 2>/dev/null || true - exit 0 -fi - -echo "=== Kipinä Node — Binary Build ===" - -# macOS ARM (natiivi) -echo "[1/4] macOS ARM64..." -cd "$SCRIPT_DIR" -cargo build --release -p native-node --no-default-features 2>&1 | tail -1 -cp target/release/native-node "$OUT/kipina-node-macos-arm64" -echo " $(ls -lh "$OUT/kipina-node-macos-arm64" | awk '{print $5}')" - -# Linux x86_64 (Docker) -echo "[2/4] Linux x86_64..." -docker run --rm \ - -v "$SCRIPT_DIR":/app -w /app \ - --platform linux/amd64 \ - rust:slim \ - bash -c "apt-get update -qq && apt-get install -y -qq pkg-config libssl-dev >/dev/null 2>&1 && cargo build --release -p native-node --no-default-features --target-dir target/docker_linux_amd64 && cp target/docker_linux_amd64/release/native-node /app/frontend/public/download/kipina-node-linux-x86_64" -echo " $(ls -lh "$OUT/kipina-node-linux-x86_64" | awk '{print $5}')" - -# Linux ARM64 (Docker) -echo "[3/4] Linux ARM64..." -docker run --rm \ - -v "$SCRIPT_DIR":/app -w /app \ - --platform linux/arm64 \ - rust:slim \ - bash -c "apt-get update -qq && apt-get install -y -qq pkg-config libssl-dev >/dev/null 2>&1 && cargo build --release -p native-node --no-default-features --target-dir target/docker_linux_arm64 && cp target/docker_linux_arm64/release/native-node /app/frontend/public/download/kipina-node-linux-arm64" -echo " $(ls -lh "$OUT/kipina-node-linux-arm64" | awk '{print $5}')" - -# Windows x86_64 (Docker + mingw-w64) -echo "[4/4] Windows x86_64..." -docker run --rm \ - -v "$SCRIPT_DIR":/app -w /app \ - --platform linux/amd64 \ - rust:slim \ - bash -c "apt-get update -qq && apt-get install -y -qq gcc-mingw-w64-x86-64 pkg-config libssl-dev >/dev/null 2>&1 && rustup target add x86_64-pc-windows-gnu && cargo build --release -p native-node --no-default-features --target x86_64-pc-windows-gnu && cp target/x86_64-pc-windows-gnu/release/native-node.exe /app/frontend/public/download/kipina-node-windows-x86_64.exe" -echo " $(ls -lh "$OUT/kipina-node-windows-x86_64.exe" | awk '{print $5}')" - -# Tallennetaan onnistuneen buildin hash -echo "$CURRENT_HASH" > "$HASH_FILE" - -echo "" -echo "=== Binäärit valmiina ===" -ls -lh "$OUT"/kipina-node-* diff --git a/network-poc/deploy-binaries.sh b/network-poc/deploy-binaries.sh deleted file mode 100755 index cb1316a..0000000 --- a/network-poc/deploy-binaries.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -set -e - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -SERVER="ubuntu@86.50.252.98" -REMOTE_DIR="~/code/agentic-studio/network-poc" -KEY="$HOME/.ssh/id_rsa" -SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY" - -if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then - echo "SSH-avain ei ole agentissa. Lisätään..." - ssh-add "$KEY" -fi - -echo "=== Kipinä Node - Vain Binäärien Päivitys ===" - -# 1. Käännetään binäärit (hyödyntää korjattua build-binaries.sh cache logiikkaa) -"$SCRIPT_DIR/build-binaries.sh" - -# 2. Siirretään binäärit suoraan kohdekoneen hakemistoon ohittaen Docker-imagen täyden rakennuksen -echo "" -echo "[Vieminen uuteen kohteeseen...]" -ssh $SSH_OPTS "$SERVER" "mkdir -p $REMOTE_DIR/frontend/dist/download" -scp $SSH_OPTS "$SCRIPT_DIR/frontend/public/download"/kipina-node-* "$SERVER:$REMOTE_DIR/frontend/dist/download/" -scp $SSH_OPTS "$SCRIPT_DIR/frontend/public"/kipina-node "$SERVER:$REMOTE_DIR/frontend/dist/" - -# 3. Luvat kuntoon -ssh $SSH_OPTS "$SERVER" "chmod +x $REMOTE_DIR/frontend/dist/download/kipina-node-*" - -echo "=== Valmis! Binäärit ovat nyt asennettu livenä ja ladattavissa kipina.studiosta ===" diff --git a/network-poc/deploy-fast.sh b/network-poc/deploy-fast.sh deleted file mode 100755 index 788fd75..0000000 --- a/network-poc/deploy-fast.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# Nopea deploy: päivittää vain frontendin (ei kontin uudelleenkäynnistystä) -# Hub-binäärin päivitys: käytä deploy.sh tai deploy-light.sh -set -e - -SERVER="ubuntu@86.50.252.98" -REMOTE_DIR="~/code/agentic-studio/network-poc" -SSH_OPTS="-o StrictHostKeyChecking=no" -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" - -echo "=== Kipinä Studio — Frontend Deploy ===" - -# 1. Buildaa frontend paikallisesti -echo "[1/2] Rakennetaan frontend..." -cd "$SCRIPT_DIR/frontend" -[ -d node_modules ] || npm install --silent -npm run build --silent 2>&1 | tail -1 - -# 2. Synkataan dist/ palvelimelle (vain muuttuneet tiedostot) -echo "[2/2] Synkataan dist/ → palvelin..." -ssh $SSH_OPTS $SERVER "mkdir -p $REMOTE_DIR/frontend/dist" -rsync -az --delete -e "ssh $SSH_OPTS" "$SCRIPT_DIR/frontend/dist/" "$SERVER:$REMOTE_DIR/frontend/dist/" - -echo "" -echo "=== Valmis! Frontend päivitetty — ei uudelleenkäynnistystä ===" -echo " https://kipina.studio" -echo "" -echo "Huom: Jos Rust-koodi (hub/) muuttui, aja: ./deploy.sh" diff --git a/network-poc/deploy-light.sh b/network-poc/deploy-light.sh deleted file mode 100755 index 8864d9d..0000000 --- a/network-poc/deploy-light.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# Kevyt deploy: lähetetään vain koodi, palvelin buildaa itse -set -e - -SERVER="ubuntu@86.50.252.98" -REMOTE_DIR="~/code/agentic-studio/network-poc" -SSH_OPTS="-o StrictHostKeyChecking=no" - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" - -echo "=== Kipinä Studio Deploy (remote build) ===" - -# 1. Synkataan koodi palvelimelle (vain muuttuneet tiedostot) -echo "[1/3] Synkataan koodi..." -rsync -az --delete \ - --exclude 'target/' \ - --exclude 'node_modules/' \ - --exclude 'dist/' \ - --exclude '.astro/' \ - --exclude 'temp/' \ - --exclude '*.db' \ - --exclude '.git/' \ - "$SCRIPT_DIR/" "$SERVER:$REMOTE_DIR/" - -# 2. Rakennetaan image palvelimella -echo "[2/3] Rakennetaan image palvelimella..." -ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker build -f Dockerfile.prod -t kipina-agentic:latest ." - -# 3. Käynnistetään -echo "[3/3] Käynnistetään..." -ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d" - -echo "=== Valmis! https://kipina.studio ===" diff --git a/network-poc/deploy-local.sh b/network-poc/deploy-local.sh new file mode 100755 index 0000000..625676f --- /dev/null +++ b/network-poc/deploy-local.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# Kipinä Studio — paikallinen kehitysympäristö +# Buildaa frontendin, käynnistää hubin ja native-noden (Ollama) +# Käyttö: ./deploy-local.sh +set -e +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" + +cleanup() { echo ""; echo "Pysäytetään..."; kill $HUB_PID $NODE_PID 2>/dev/null; exit 0; } +trap cleanup INT TERM + +# Portti vapaaksi +lsof -ti:3000 | xargs kill -9 2>/dev/null || true + +# Frontend +echo "[1/3] Frontend..." +cd "$SCRIPT_DIR/frontend" +[ -d node_modules ] || npm install --silent +npm run build 2>&1 | tail -1 +cd "$SCRIPT_DIR" + +# Hub +echo "[2/3] Hub..." +STATIC_DIR="$SCRIPT_DIR/frontend/dist" cargo run -p hub 2>&1 & +HUB_PID=$! +until curl -sf http://localhost:3000 >/dev/null 2>&1; do sleep 1; done + +# Native-node +NODE_PID="" +if curl -sf http://localhost:11434/api/tags >/dev/null 2>&1; then + MODEL=$(curl -s http://localhost:11434/api/tags | python3 -c " +import sys,json +ms=json.load(sys.stdin).get('models',[]) +for m in ms: + n=m['name'] + if '7b' in n and 'coder' in n: print(n); exit() +for m in ms: + if 'coder' in m['name']: print(m['name']); exit() +if ms: print(ms[0]['name']) +" 2>/dev/null) + if [ -n "$MODEL" ]; then + echo "[3/3] Native-node ($MODEL)..." + HUB_URL=ws://localhost:3000/ws OLLAMA_MODEL="$MODEL" \ + cargo run -p native-node --no-default-features 2>&1 & + NODE_PID=$! + else + echo "[3/3] Ollama: ei malleja (ollama pull qwen2.5-coder:7b)" + fi +else + echo "[3/3] Ei Ollamaa — Wasm-fallback selaimessa" +fi + +echo "" +echo "=== http://localhost:3000 === Ctrl+C pysäyttää" +open http://localhost:3000 2>/dev/null || xdg-open http://localhost:3000 2>/dev/null || true +wait $HUB_PID diff --git a/network-poc/deploy-remote.sh b/network-poc/deploy-remote.sh new file mode 100755 index 0000000..f63b2e7 --- /dev/null +++ b/network-poc/deploy-remote.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# Kipinä Studio — tuotanto-deploy kipina.studioon +# Buildaa Docker-imagen (frontend + hub + wasm) ja vie palvelimelle +# Käyttö: ./deploy-remote.sh +set -e +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" + +SERVER="ubuntu@86.50.252.98" +REMOTE_DIR="~/code/agentic-studio/network-poc" +KEY="$HOME/.ssh/id_rsa" +SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY" + +# SSH-avain +if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then + echo "SSH-avain puuttuu agentista..." + ssh-add "$KEY" +fi + +# Auto-commit +if ! git diff --quiet HEAD 2>/dev/null || \ + [ -n "$(git ls-files --others --exclude-standard 2>/dev/null)" ]; then + echo "Uncommitted muutoksia — commitoidaan..." + read -rp " Commit-viesti: " msg + [ -z "$msg" ] && msg="Deploy $(date +%Y-%m-%d\ %H:%M)" + git add -A && git commit -m "$msg" +fi + +echo "=== Kipinä Studio Deploy → kipina.studio ===" + +# 1. Docker-image +echo "[1/4] Docker build..." +docker build --platform linux/amd64 -f Dockerfile.prod -t kipina-agentic:latest . + +# 2. Pakkaus +echo "[2/4] Pakataan..." +docker save kipina-agentic:latest | gzip > /tmp/kipina-agentic.tar.gz +echo " $(du -h /tmp/kipina-agentic.tar.gz | cut -f1)" + +# 3. Siirto +echo "[3/4] Siirretään..." +scp $SSH_OPTS /tmp/kipina-agentic.tar.gz "$SERVER:/tmp/" +scp $SSH_OPTS docker-compose.prod.yml Caddyfile.prod "$SERVER:$REMOTE_DIR/" + +# 4. Käynnistys +echo "[4/4] Käynnistetään..." +ssh $SSH_OPTS "$SERVER" "gunzip -c /tmp/kipina-agentic.tar.gz | docker load && rm /tmp/kipina-agentic.tar.gz" +ssh $SSH_OPTS "$SERVER" "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d" + +# Discord +WEBHOOK="https://discord.com/api/webhooks/1489504066898755687/8U02d0wug-3MkVax0xMmRoj0s_-V1psnNLPWdSOjnGnKRBUpPjaU6XiX9Iu8DgJI69AP" +HASH=$(git log -1 --pretty=format:"%h" 2>/dev/null || echo "?") +MSG=$(git log -1 --pretty=format:"%s" 2>/dev/null || echo "?") +PAYLOAD=$(python3 -c "import json,sys; print(json.dumps({'content':sys.argv[1]}))" \ + "🚀 **Kipinä Studio julkaistu!** \`${HASH}\` ${MSG} https://kipina.studio") +curl -sf -H "Content-Type: application/json" -d "$PAYLOAD" "$WEBHOOK" >/dev/null || true + +echo "=== Valmis! https://kipina.studio ===" diff --git a/network-poc/deploy-with-native.sh b/network-poc/deploy-with-native.sh deleted file mode 100755 index da8d4a5..0000000 --- a/network-poc/deploy-with-native.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -# Deploy + native-node-binäärien käännös (jos muutoksia) -set -e - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -cd "$SCRIPT_DIR" - -echo "=== Kipinä Studio Deploy (+ native binäärit) ===" - -# 1. Käännetään native-node-binäärit (ohittaa automaattisesti jos ei muutoksia) -./build-binaries.sh - -# 2. Ajetaan normaali deploy -exec ./deploy.sh diff --git a/network-poc/deploy.sh b/network-poc/deploy.sh deleted file mode 100755 index 30746eb..0000000 --- a/network-poc/deploy.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash -set -e - -if [ "$1" == "local" ]; then - echo "=== Kipinä Studio Local Development ===" - echo "Käynnistetään kokonaisuus puhtaasti Docker-kontissa..." - docker compose up agentic-poc - exit 0 -fi - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -cd "$SCRIPT_DIR" - -SERVER="ubuntu@86.50.252.98" -REMOTE_DIR="~/code/agentic-studio/network-poc" -KEY="$HOME/.ssh/id_rsa" -SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY" - -if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then - echo "SSH-avain ei ole agentissa. Lisätään..." - ssh-add "$KEY" -fi - -echo "=== Kipinä Studio Deploy ===" - -# 0. Commitoidaan uncommitted muutokset ennen deployta -if ! git diff --quiet HEAD 2>/dev/null || \ - [ -n "$(git ls-files --others --exclude-standard 2>/dev/null)" ]; then - echo "[0] Uncommitted muutoksia havaittu — commitoidaan..." - read -rp " Commit-viesti: " DEPLOY_MSG - if [ -z "$DEPLOY_MSG" ]; then - DEPLOY_MSG="Deploy $(date +%Y-%m-%d\ %H:%M)" - fi - git add -A - git commit -m "$DEPLOY_MSG" - echo " Commitoitu: $DEPLOY_MSG" -fi - -# 1. Docker-image (frontend + hub + wasm) -echo "[1/4] Rakennetaan image..." -docker build --platform linux/amd64 -f Dockerfile.prod -t kipina-agentic:latest . - -# 2. Pakataan -echo "[2/4] Pakataan image..." -docker save kipina-agentic:latest | gzip > /tmp/kipina-agentic.tar.gz -echo " Koko: $(du -h /tmp/kipina-agentic.tar.gz | cut -f1)" - -# 3. Siirretään -echo "[3/4] Siirretään palvelimelle..." -scp $SSH_OPTS /tmp/kipina-agentic.tar.gz $SERVER:/tmp/ -scp $SSH_OPTS docker-compose.prod.yml Caddyfile.prod $SERVER:$REMOTE_DIR/ - -# 4. Käynnistetään -echo "[4/4] Käynnistetään palvelimella..." -ssh $SSH_OPTS $SERVER "gunzip -c /tmp/kipina-agentic.tar.gz | docker load && rm /tmp/kipina-agentic.tar.gz" -ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d" - -echo "=== Valmis! https://kipina.studio ===" - -# Discord-notifikaatio -DISCORD_WEBHOOK="https://discord.com/api/webhooks/1489504066898755687/8U02d0wug-3MkVax0xMmRoj0s_-V1psnNLPWdSOjnGnKRBUpPjaU6XiX9Iu8DgJI69AP" -COMMIT_HASH=$(git log -1 --pretty=format:"%h" 2>/dev/null || echo "?") -COMMIT_MSG=$(git log -1 --pretty=format:"%s" 2>/dev/null || echo "?") -PAYLOAD=$(python3 -c "import json,sys; print(json.dumps({'content': sys.argv[1]}))" \ - "🚀 **Kipinä Studio julkaistu!** -> \`${COMMIT_HASH}\` ${COMMIT_MSG} -> https://kipina.studio") -curl -s -H "Content-Type: application/json" -d "$PAYLOAD" "$DISCORD_WEBHOOK" > /dev/null diff --git a/network-poc/frontend/src/pages/index.astro b/network-poc/frontend/src/pages/index.astro index bf50d73..0206d2c 100644 --- a/network-poc/frontend/src/pages/index.astro +++ b/network-poc/frontend/src/pages/index.astro @@ -613,7 +613,7 @@ OUTPUT FORMAT: // === Terminal commands === const kpnCommands = { 'kpn': ['help','run','project','pipeline','stop','load','status','models','clear'], - 'kpn run': ['coder','coder-3b','manager','tester','qa','qwen-coder','smollm-135m'], + 'kpn run': ['coder','coder-3b','manager','tester','qa','qwen-coder'], 'kpn load': ['1','2'], 'kpn project': ['"'], 'kpn pipeline': ['"'], @@ -703,9 +703,8 @@ OUTPUT FORMAT: if (btn && btn.textContent.includes('Valmis')) { termLog(' ✓ Malli jo ladattu', '#3fb950'); } else { btn?.click(); } } else if (sub === 'models') { - termLog(' 1 qwen-coder Qwen2.5-Coder:0.5B ~990 MB'); - termLog(' 2 qwen-coder-3b Qwen2.5-Coder:3B ~6.2 GB'); - termLog(' 3 smollm-135m SmolLM 135M ~270 MB'); + termLog(' 1 qwen-coder Qwen2.5-Coder:0.5B ~990 MB (selain)'); + termLog(' 2 qwen-coder-3b Qwen2.5-Coder:3B ~6.2 GB (Ollama)'); } else if (sub === 'status') { termLog(` Hub: ${document.getElementById('hub-label').textContent} | Laskenta: ${document.getElementById('compute-label').textContent}`, '#a5d6ff'); } else if (sub === 'run') { diff --git a/network-poc/hub/src/main.rs b/network-poc/hub/src/main.rs index 78c2217..69e860e 100644 --- a/network-poc/hub/src/main.rs +++ b/network-poc/hub/src/main.rs @@ -196,7 +196,7 @@ async function load() { ].map(s => `
${s.v}
${s.l}
`).join(''); // Sessions — lajittelu: 1) aktiiviset nodet (online + ei viewer), 2) katsojat (online + viewer), 3) offline - const taskNames = {'tokenize':'Tokenisaatio','smollm-135m':'SmolLM 135M','qwen-05b':'Qwen2.5 0.5B','phi3-mini':'Phi-3 Mini','qwen-coder-05b':'Coder 0.5B','qwen-coder-3b':'Coder 3B','viewer':'Katsoja','codelab-viewer':'Koodilabra'}; + const taskNames = {'tokenize':'Tokenisaatio','qwen-05b':'Qwen2.5 0.5B','qwen-coder-05b':'Coder 0.5B','qwen-coder-3b':'Coder 3B','viewer':'Katsoja','codelab-viewer':'Koodilabra'}; sessions.sort((a, b) => { const aOnline = !a.disconnected_at; const bOnline = !b.disconnected_at; @@ -419,9 +419,7 @@ async fn main() { // Vapaa node -> lähetetään oikea tehtävä let msg = match task.as_str() { "tokenize" => Some(serde_json::json!({ "type": "pair_task", "en": en, "fi": fi })), - "smollm-135m" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "smollm-135m" })), "qwen-05b" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "qwen-05b" })), - "phi3-mini" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "phi3-mini" })), _ => None, // Coder ja viewer ei saa auto-tehtäviä }; diff --git a/network-poc/install.sh b/network-poc/install.sh deleted file mode 100755 index 71986ab..0000000 --- a/network-poc/install.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash -# Kipinä Agentic Studio — asennusskripti (Debian/Ubuntu) -set -e - -echo "=== Kipinä Agentic Studio — Asennus ===" -echo "" - -# Tarkistetaan käyttöjärjestelmä -if [ ! -f /etc/debian_version ]; then - echo "⚠ Tämä skripti on suunniteltu Debian/Ubuntu-järjestelmille." - echo " Muilla jakeluilla voit asentaa riippuvuudet manuaalisesti." - read -p " Jatketaanko? (k/e) " -n 1 -r; echo - [[ $REPLY =~ ^[Kk]$ ]] || exit 1 -fi - -echo "[1/6] Päivitetään pakettilistaus..." -sudo apt-get update -qq - -echo "[2/6] Asennetaan peruspaketteja..." -sudo apt-get install -y -qq curl git build-essential pkg-config libssl-dev - -# Rust -if command -v rustc &>/dev/null; then - echo "[3/6] Rust löytyi: $(rustc --version)" -else - echo "[3/6] Asennetaan Rust..." - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - source "$HOME/.cargo/env" -fi - -# Node.js (Astro-frontend vaatii) -if command -v node &>/dev/null; then - echo "[4/6] Node.js löytyi: $(node --version)" -else - echo "[4/6] Asennetaan Node.js 22..." - curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - - sudo apt-get install -y -qq nodejs -fi - -# Ollama -if command -v ollama &>/dev/null; then - echo "[5/6] Ollama löytyi" -else - echo "[5/6] Asennetaan Ollama..." - curl -fsSL https://ollama.ai/install.sh | sh -fi - -# Malli -echo "[6/6] Ladataan kielimalli (qwen2.5-coder:3b)..." -ollama pull qwen2.5-coder:3b - -echo "" -echo "=== Asennus valmis! ===" -echo "" -echo "Käynnistä:" -echo " cd $(pwd)" -echo " ./network-poc/local.sh" -echo "" -echo "Avaa selaimessa: http://localhost:3000" diff --git a/network-poc/local.sh b/network-poc/local.sh deleted file mode 100755 index bcb1e90..0000000 --- a/network-poc/local.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash -set -e - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -cd "$SCRIPT_DIR" - -echo "=== Kipinä Studio Local Development ===" - -# Tapetaan vanhat prosessit portissa 3000 -if lsof -ti:3000 >/dev/null 2>&1; then - echo "[0] Vapautetaan portti 3000..." - lsof -ti:3000 | xargs kill -9 2>/dev/null || true - sleep 1 -fi - -# Frontend -echo "[1/3] Rakennetaan frontend..." -cd "$SCRIPT_DIR/frontend" -[ -d node_modules ] || npm install --silent -npm run build 2>&1 | tail -1 -cd "$SCRIPT_DIR" - -# Hub -echo "[2/3] Käynnistetään hub..." -STATIC_DIR="$SCRIPT_DIR/frontend/dist" cargo run -p hub 2>&1 & -HUB_PID=$! - -# Odotetaan että hub on pystyssä -for i in $(seq 1 10); do - if curl -s -o /dev/null http://localhost:3000 2>/dev/null; then break; fi - sleep 1 -done - -# Native-node (jos Ollama on käynnissä) -NODE_PID="" -if curl -s http://localhost:11434/api/tags >/dev/null 2>&1; then - MODEL=$(curl -s http://localhost:11434/api/tags | python3 -c " -import sys, json -models = json.load(sys.stdin).get('models', []) -# Priorisoi: 7b > 3b > mikä tahansa coder > mikä tahansa -best = None -for m in models: - name = m['name'] - if '7b' in name and 'coder' in name: best = name; break - if 'coder' in name and not best: best = name -if not best and models: best = models[0]['name'] -if best: print(best) -" 2>/dev/null) - - if [ -n "$MODEL" ]; then - echo "[3/3] Ollama: $MODEL — käynnistetään native-node..." - HUB_URL=ws://localhost:3000/ws OLLAMA_MODEL="$MODEL" cargo run -p native-node --no-default-features 2>&1 & - NODE_PID=$! - else - echo "[3/3] Ollama käynnissä mutta ei malleja — asenna: ollama pull qwen2.5-coder:7b" - fi -else - echo "[3/3] Ollama ei käynnissä — käytetään selaimen Wasm-laskentaa" -fi - -echo "" -echo "=== http://localhost:3000 ===" -echo " Ctrl+C pysäyttää" - -open http://localhost:3000 2>/dev/null || xdg-open http://localhost:3000 2>/dev/null || true - -trap 'echo ""; echo "Pysäytetään..."; kill $HUB_PID $NODE_PID 2>/dev/null; exit 0' INT TERM -wait $HUB_PID diff --git a/network-poc/node/src/burn_smollm/attention.rs b/network-poc/node/src/burn_smollm/attention.rs deleted file mode 100644 index b69acfb..0000000 --- a/network-poc/node/src/burn_smollm/attention.rs +++ /dev/null @@ -1,118 +0,0 @@ -use burn::module::{Module, Param}; -use burn::tensor::{backend::Backend, Tensor}; -use super::rope::RoPE; -use super::config::SmolLMConfig; - -#[derive(Clone, Debug)] -pub struct KVCache { - pub k: Tensor, - pub v: Tensor, -} - -#[derive(Module, Debug)] -pub struct Attention { - pub q_proj: Param>, // [hidden, num_heads * head_dim] - pub k_proj: Param>, // [hidden, num_kv_heads * head_dim] - pub v_proj: Param>, // [hidden, num_kv_heads * head_dim] - pub o_proj: Param>, // [num_heads * head_dim, hidden] - - num_heads: usize, - num_kv_heads: usize, - head_dim: usize, - - rope: RoPE, -} - -impl Attention { - pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self { - let head_dim = config.hidden_size / config.num_attention_heads; - - Self { - q_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_attention_heads * head_dim], device)), - k_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_key_value_heads * head_dim], device)), - v_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_key_value_heads * head_dim], device)), - o_proj: Param::from_tensor(Tensor::zeros([config.num_attention_heads * head_dim, config.hidden_size], device)), - - num_heads: config.num_attention_heads, - num_kv_heads: config.num_key_value_heads, - head_dim, - - rope: RoPE::new(head_dim, config.max_position_embeddings, config.rope_theta, device), - } - } - - pub fn forward( - &self, - x: Tensor, - offset: usize, - cache: Option> - ) -> (Tensor, KVCache) { - let [batch, seq_len, hidden_dim] = x.dims(); - - // Project Q, K, V: x @ W -> [batch, seq, proj_dim] - let q = x.clone().matmul(self.q_proj.val().unsqueeze()); - let k = x.clone().matmul(self.k_proj.val().unsqueeze()); - let v = x.matmul(self.v_proj.val().unsqueeze()); - - // Reshape: [batch, seq, heads, head_dim] -> [batch, heads, seq, head_dim] - let q = q.reshape([batch, seq_len, self.num_heads, self.head_dim]).swap_dims(1, 2); - let k = k.reshape([batch, seq_len, self.num_kv_heads, self.head_dim]).swap_dims(1, 2); - let v = v.reshape([batch, seq_len, self.num_kv_heads, self.head_dim]).swap_dims(1, 2); - - // Apply RoPE - let q = self.rope.forward(q, offset); - let k = self.rope.forward(k, offset); - - // KV cache - let (k, v) = if let Some(c) = cache { - (Tensor::cat(vec![c.k, k], 2), Tensor::cat(vec![c.v, v], 2)) - } else { - (k, v) - }; - - let new_cache = KVCache { k: k.clone(), v: v.clone() }; - let kv_len = k.dims()[2]; - - // GQA: repeat K,V heads — [batch, kv_heads, kv_len, hd] -> [batch, num_heads, kv_len, hd] - let num_reps = self.num_heads / self.num_kv_heads; - let k = if num_reps > 1 { - let [b, kv_h, s, hd] = k.dims(); - k.reshape([b, kv_h, 1, s, hd]).repeat_dim(2, num_reps).reshape([b, self.num_heads, s, hd]) - } else { k }; - let v = if num_reps > 1 { - let [b, kv_h, s, hd] = v.dims(); - v.reshape([b, kv_h, 1, s, hd]).repeat_dim(2, num_reps).reshape([b, self.num_heads, s, hd]) - } else { v }; - - // Attention: Q @ K^T / sqrt(d) - let scale = 1.0 / (self.head_dim as f64).sqrt(); - let scores = q.matmul(k.swap_dims(2, 3)).mul_scalar(scale); - // scores: [batch, heads, seq_len, kv_len] - - // Causal mask for prefill (seq_len > 1) - let scores = if seq_len > 1 { - let mask_data: Vec = (0..seq_len).flat_map(|i| { - (0..kv_len).map(move |j| { - if j > offset + i { f32::NEG_INFINITY } else { 0.0 } - }) - }).collect(); - let mask = Tensor::::from_data( - burn::tensor::TensorData::new(mask_data, [seq_len, kv_len]), - &scores.device() - ).reshape([1, 1, seq_len, kv_len]); - scores + mask - } else { - scores - }; - - let attn_weights = burn::tensor::activation::softmax(scores, 3); - - let context = attn_weights.matmul(v); - // [batch, heads, seq, hd] -> [batch, seq, heads*hd] - let context = context.swap_dims(1, 2).reshape([batch, seq_len, self.num_heads * self.head_dim]); - - let output = context.matmul(self.o_proj.val().unsqueeze()); - - (output, new_cache) - } -} diff --git a/network-poc/node/src/burn_smollm/config.rs b/network-poc/node/src/burn_smollm/config.rs deleted file mode 100644 index ac0b263..0000000 --- a/network-poc/node/src/burn_smollm/config.rs +++ /dev/null @@ -1,28 +0,0 @@ -#[derive(Clone, Debug)] -pub struct SmolLMConfig { - pub hidden_size: usize, - pub intermediate_size: usize, - pub vocab_size: usize, - pub num_hidden_layers: usize, - pub num_attention_heads: usize, - pub num_key_value_heads: usize, - pub rms_norm_eps: f64, - pub rope_theta: f32, - pub max_position_embeddings: usize, -} - -impl Default for SmolLMConfig { - fn default() -> Self { - Self { - hidden_size: 576, - intermediate_size: 1536, - vocab_size: 49152, - num_hidden_layers: 30, - num_attention_heads: 9, - num_key_value_heads: 3, - rms_norm_eps: 1e-5, - rope_theta: 10000.0, - max_position_embeddings: 2048, - } - } -} diff --git a/network-poc/node/src/burn_smollm/loader.rs b/network-poc/node/src/burn_smollm/loader.rs deleted file mode 100644 index 6c1fcd3..0000000 --- a/network-poc/node/src/burn_smollm/loader.rs +++ /dev/null @@ -1,90 +0,0 @@ -use burn::tensor::{backend::Backend, Tensor, TensorData}; -use candle_core::safetensors; -use candle_core::Device as CandleDevice; -use burn::module::Param; -use super::model::LlamaModel; -use super::config::SmolLMConfig; - -fn load_tensor_2d( - tensors_map: &std::collections::HashMap, - name: &str, - device: &B::Device, - shape_out_in: [usize; 2] -) -> Result>, String> { - let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?; - let t = t.to_dtype(candle_core::DType::F32).unwrap(); - let vec = t.flatten_all().unwrap().to_vec1::().unwrap(); - let t_burn = Tensor::::from_data(burn::tensor::TensorData::new(vec, shape_out_in), device); - // transpose from [out, in] to [in, out] - Ok(Param::from_tensor(t_burn.transpose())) -} - -fn load_tensor_1d( - tensors_map: &std::collections::HashMap, - name: &str, - device: &B::Device, - _shape: [usize; 1] -) -> Result>, String> { - let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?; - let t = t.to_dtype(candle_core::DType::F32).unwrap(); - let vec = t.flatten_all().unwrap().to_vec1::().unwrap(); - Ok(Param::from_tensor(Tensor::::from_floats(vec.as_slice(), device))) -} - -fn load_embed( - tensors_map: &std::collections::HashMap, - name: &str, - device: &B::Device, - shape: [usize; 2] -) -> Result>, String> { - let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?; - let t = t.to_dtype(candle_core::DType::F32).unwrap(); - let vec = t.flatten_all().unwrap().to_vec1::().unwrap(); - // Embed ei transponoi samalla tavalla, se pysyy [vocab, hidden] - Ok(Param::from_tensor(Tensor::::from_data(burn::tensor::TensorData::new(vec, shape), device))) -} - -pub fn load_safetensors_to_model( - buffer: &[u8], - config: &SmolLMConfig, - device: &B::Device -) -> Result, String> { - - let mut model = LlamaModel::new(config, device); - let tensors_map = safetensors::load_buffer(buffer, &CandleDevice::Cpu) - .map_err(|e| format!("Virhe Safetensors luennassa: {}", e))?; - - // Embeddings - model.embed_tokens = load_embed(&tensors_map, "model.embed_tokens.weight", device, [config.vocab_size, config.hidden_size])?; - model.norm.weight = load_tensor_1d(&tensors_map, "model.norm.weight", device, [config.hidden_size])?; - model.lm_head = load_embed(&tensors_map, "lm_head.weight", device, [config.vocab_size, config.hidden_size]).or_else(|_| { - load_embed(&tensors_map, "model.embed_tokens.weight", device, [config.vocab_size, config.hidden_size]) - })?; - - let head_dim = config.hidden_size / config.num_attention_heads; - - for i in 0..config.num_hidden_layers { - let prefix = format!("model.layers.{}", i); - - let layer = &mut model.layers[i]; - - // Norms - layer.input_layernorm.weight = load_tensor_1d(&tensors_map, &format!("{}.input_layernorm.weight", prefix), device, [config.hidden_size])?; - layer.post_attention_layernorm.weight = load_tensor_1d(&tensors_map, &format!("{}.post_attention_layernorm.weight", prefix), device, [config.hidden_size])?; - - // Attention - let num_heads = config.num_attention_heads; - let num_kv_heads = config.num_key_value_heads; - layer.self_attn.q_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.q_proj.weight", prefix), device, [num_heads * head_dim, config.hidden_size])?; - layer.self_attn.k_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.k_proj.weight", prefix), device, [num_kv_heads * head_dim, config.hidden_size])?; - layer.self_attn.v_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.v_proj.weight", prefix), device, [num_kv_heads * head_dim, config.hidden_size])?; - layer.self_attn.o_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.o_proj.weight", prefix), device, [config.hidden_size, num_heads * head_dim])?; - - // MLP - layer.mlp.gate_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.gate_proj.weight", prefix), device, [config.intermediate_size, config.hidden_size])?; - layer.mlp.up_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.up_proj.weight", prefix), device, [config.intermediate_size, config.hidden_size])?; - layer.mlp.down_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.down_proj.weight", prefix), device, [config.hidden_size, config.intermediate_size])?; - } - - Ok(model) -} diff --git a/network-poc/node/src/burn_smollm/mod.rs b/network-poc/node/src/burn_smollm/mod.rs deleted file mode 100644 index 3664e61..0000000 --- a/network-poc/node/src/burn_smollm/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -pub mod attention; -pub mod config; -pub mod loader; -pub mod model; -pub mod modules; -pub mod rope; diff --git a/network-poc/node/src/burn_smollm/model.rs b/network-poc/node/src/burn_smollm/model.rs deleted file mode 100644 index 9a4f485..0000000 --- a/network-poc/node/src/burn_smollm/model.rs +++ /dev/null @@ -1,96 +0,0 @@ -use burn::module::{Module, Param}; -use burn::tensor::{backend::Backend, Tensor, Int}; -use super::modules::{RmsNorm, Mlp}; -use super::attention::{Attention, KVCache}; -use super::config::SmolLMConfig; - -#[derive(Module, Debug)] -pub struct LlamaBlock { - pub self_attn: Attention, - pub mlp: Mlp, - pub input_layernorm: RmsNorm, - pub post_attention_layernorm: RmsNorm, -} - -impl LlamaBlock { - pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self { - Self { - self_attn: Attention::new(config, device), - mlp: Mlp::new(config.hidden_size, config.intermediate_size, device), - input_layernorm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device), - post_attention_layernorm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device), - } - } - - pub fn forward( - &self, - x: Tensor, - offset: usize, - cache: Option> - ) -> (Tensor, KVCache) { - let residual = x.clone(); - let x_norm = self.input_layernorm.forward(x); - - let (attn_out, new_cache) = self.self_attn.forward(x_norm, offset, cache); - - let x = residual + attn_out; - - let residual = x.clone(); - let x_norm = self.post_attention_layernorm.forward(x); - let mlp_out = self.mlp.forward(x_norm); - - let x = residual + mlp_out; - (x, new_cache) - } -} - -#[derive(Module, Debug)] -pub struct LlamaModel { - pub embed_tokens: Param>, - pub layers: Vec>, - pub norm: RmsNorm, - pub lm_head: Param>, // For tie_word_embeddings this can point to embed_tokens -} - -impl LlamaModel { - pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self { - let embed = Tensor::zeros([config.vocab_size, config.hidden_size], device); - let lm_head = Tensor::zeros([config.vocab_size, config.hidden_size], device); - - let mut layers = Vec::new(); - for _ in 0..config.num_hidden_layers { - layers.push(LlamaBlock::new(config, device)); - } - - Self { - embed_tokens: Param::from_tensor(embed), - layers, - norm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device), - lm_head: Param::from_tensor(lm_head), - } - } - - pub fn forward( - &self, - input_ids: Tensor, - offset: usize, - caches: &mut Vec>> - ) -> Tensor { - let [_batch, _seq_len] = input_ids.dims(); - - let mut x = burn::tensor::module::embedding(self.embed_tokens.val(), input_ids); - - for (i, layer) in self.layers.iter().enumerate() { - let cache = caches[i].take(); - let (out, new_cache) = layer.forward(x, offset, cache); - x = out; - caches[i] = Some(new_cache); - } - - x = self.norm.forward(x); - - // Matmul with lm_head (or embed_tokens if tied) to get logits - // Notice: lm_head is typically [vocab_size, hidden_size] in HF, so we swap dims - x.matmul(self.lm_head.val().swap_dims(0, 1).unsqueeze()) - } -} diff --git a/network-poc/node/src/burn_smollm/modules.rs b/network-poc/node/src/burn_smollm/modules.rs deleted file mode 100644 index b1dc9cb..0000000 --- a/network-poc/node/src/burn_smollm/modules.rs +++ /dev/null @@ -1,59 +0,0 @@ -use burn::module::{Module, Param}; -use burn::tensor::{backend::Backend, Tensor}; - -#[derive(Module, Debug)] -pub struct RmsNorm { - pub weight: Param>, - epsilon: f64, -} - -impl RmsNorm { - pub fn new(size: usize, epsilon: f64, device: &B::Device) -> Self { - let weight = Param::from_tensor(Tensor::ones([size], device)); - Self { weight, epsilon } - } - - pub fn forward(&self, x: Tensor) -> Tensor { - // x: [batch, seq_len, dim] - // RMSNorm: x * weight / sqrt(mean(x^2) + eps) - let x_sq = x.clone().powf_scalar(2.0); - // mean over last dim, keeping dims for broadcast - let [b, s, d] = x_sq.dims(); - let variance = x_sq.sum_dim(2).div_scalar(d as f32); - let norm = x.div(variance.add_scalar(self.epsilon).sqrt()); - - let w = self.weight.val().unsqueeze::<2>().unsqueeze::<3>().reshape([1, 1, d]); - norm * w - } -} - -#[derive(Module, Debug)] -pub struct Mlp { - pub gate_proj: Param>, // [in, intermediate] - pub up_proj: Param>, // [in, intermediate] - pub down_proj: Param>, // [intermediate, out] -} - -impl Mlp { - pub fn new(hidden_size: usize, intermediate_size: usize, device: &B::Device) -> Self { - Self { - gate_proj: Param::from_tensor(Tensor::zeros([hidden_size, intermediate_size], device)), - up_proj: Param::from_tensor(Tensor::zeros([hidden_size, intermediate_size], device)), - down_proj: Param::from_tensor(Tensor::zeros([intermediate_size, hidden_size], device)), - } - } - - pub fn forward(&self, x: Tensor) -> Tensor { - // x: [batch, seq, hidden] - // gate = x @ gate_proj -> [batch, seq, intermediate] - let gate = x.clone().matmul(self.gate_proj.val().unsqueeze()); - let up = x.matmul(self.up_proj.val().unsqueeze()); - - // SiLU(gate) * up - let silu = gate.clone() * burn::tensor::activation::sigmoid(gate); - let intermediate = silu * up; - - // intermediate @ down_proj -> [batch, seq, hidden] - intermediate.matmul(self.down_proj.val().unsqueeze()) - } -} diff --git a/network-poc/node/src/burn_smollm/rope.rs b/network-poc/node/src/burn_smollm/rope.rs deleted file mode 100644 index 2ed2993..0000000 --- a/network-poc/node/src/burn_smollm/rope.rs +++ /dev/null @@ -1,59 +0,0 @@ -use burn::module::Module; -use burn::tensor::{backend::Backend, Tensor}; - -#[derive(Module, Debug)] -pub struct RoPE { - cos_cache: Tensor, - sin_cache: Tensor, -} - -impl RoPE { - pub fn new(head_dim: usize, max_seq_len: usize, theta: f32, device: &B::Device) -> Self { - // (head_dim / 2) values - let half_dim = head_dim / 2; - let inv_freq: Vec = (0..half_dim) - .map(|i| 1.0 / theta.powf((2 * i) as f32 / head_dim as f32)) - .collect(); - - let inv_freq = Tensor::::from_floats(inv_freq.as_slice(), device).unsqueeze::<2>(); - let t_floats: Vec = (0..max_seq_len).map(|v| v as f32).collect(); - let t = Tensor::::from_floats(t_floats.as_slice(), device).unsqueeze::<2>().transpose(); - // t shape: [max_seq_len, 1] - // inv_freq shape: [1, half_dim] - - // freqs shape: [max_seq_len, half_dim] - let freqs = t.matmul(inv_freq); - - let cos_cache = freqs.clone().cos(); - let sin_cache = freqs.sin(); - - Self { - cos_cache, - sin_cache, - } - } - - pub fn forward(&self, x: Tensor, offset: usize) -> Tensor { - let [batch, heads, seq_len, head_dim] = x.dims(); - let half_dim = head_dim / 2; - - // x shape: [batch, heads, seq_len, head_dim] - // valitaan viipaleet (x1 ja x2) jotta saadaan pyöritettyä rotaatiot - let x1 = x.clone().slice([0..batch, 0..heads, 0..seq_len, 0..half_dim]); - let x2 = x.clone().slice([0..batch, 0..heads, 0..seq_len, half_dim..head_dim]); - - // haetaan vastaava seq offsetista alkaen - let cos = self.cos_cache.clone().slice([offset..offset+seq_len, 0..half_dim]) - .unsqueeze::<4>() // [seq, half_dim, 1] - .reshape([1, 1, seq_len, half_dim]); - let sin = self.sin_cache.clone().slice([offset..offset+seq_len, 0..half_dim]) - .reshape([1, 1, seq_len, half_dim]); - - // x1 * cos - x2 * sin - let o1 = x1.clone().mul(cos.clone()) - x2.clone().mul(sin.clone()); - // x2 * cos + x1 * sin - let o2 = x2.mul(cos) + x1.mul(sin); - - Tensor::cat(vec![o1, o2], 3) - } -} diff --git a/network-poc/node/src/lib.rs b/network-poc/node/src/lib.rs index 12a6363..7c0a584 100644 --- a/network-poc/node/src/lib.rs +++ b/network-poc/node/src/lib.rs @@ -8,11 +8,8 @@ use burn::backend::{Wgpu, NdArray}; pub mod storage; pub mod sampling; -pub mod smollm; pub mod qwen; pub mod qwen_coder; -pub mod phi3; -pub mod burn_smollm; #[macro_export] macro_rules! console_log { @@ -246,7 +243,7 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso HAS_WEBGPU.store(has_webgpu, Ordering::SeqCst); SELECTED_TASK.store(task_id, Ordering::SeqCst); let backend_name = if has_webgpu { "WebGPU" } else { "CPU (NdArray)" }; - let task_names = ["tokenize", "smollm-135m", "qwen-05b", "phi3-mini", "qwen-coder-05b", "qwen-coder-3b"]; + let task_names = ["tokenize", "qwen-05b", "qwen-coder-05b", "qwen-coder-3b"]; let task_name = task_names.get(task_id as usize).unwrap_or(&"tokenize"); console_log!("Kipinä Agent Node käynnistyy — backend: {} | tehtävä: {}", backend_name, task_name); @@ -303,22 +300,6 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso } } } else if msg.contains("llm_prompt") && current_task == 1 && auto_on { - // Vain SmolLM-solmut, ja vain yksi inferenssi kerrallaan - if LLM_BUSY.load(Ordering::SeqCst) { - // Ohitetaan — edellinen inferenssi vielä käynnissä - } else if let Ok(task) = serde_json::from_str::(&msg) { - let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string(); - let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string(); - if !prompt.is_empty() && model == "smollm-135m" { - LLM_BUSY.store(true, Ordering::SeqCst); - let ws_for_async = ws_clone.clone(); - wasm_bindgen_futures::spawn_local(async move { - smollm::run_smollm_inference(prompt, ws_for_async).await; - LLM_BUSY.store(false, Ordering::SeqCst); - }); - } - } - } else if msg.contains("llm_prompt") && current_task == 2 && auto_on { // Qwen2.5-0.5B if LLM_BUSY.load(Ordering::SeqCst) { } else if let Ok(task) = serde_json::from_str::(&msg) { @@ -333,21 +314,6 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso }); } } - } else if msg.contains("llm_prompt") && current_task == 3 && auto_on { - // Phi-3 Mini - if LLM_BUSY.load(Ordering::SeqCst) { - } else if let Ok(task) = serde_json::from_str::(&msg) { - let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string(); - let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string(); - if !prompt.is_empty() && model.starts_with("phi3-mini") { - LLM_BUSY.store(true, Ordering::SeqCst); - let ws_for_async = ws_clone.clone(); - wasm_bindgen_futures::spawn_local(async move { - phi3::run_phi3_inference(prompt, ws_for_async).await; - LLM_BUSY.store(false, Ordering::SeqCst); - }); - } - } } else if msg.contains("llm_prompt") { console_log!("[DEBUG] llm_prompt vastaanotettu! current_task={}, busy={}", current_task, LLM_BUSY.load(Ordering::SeqCst)); if current_task == 4 || current_task == 5 { diff --git a/network-poc/node/src/phi3.rs b/network-poc/node/src/phi3.rs deleted file mode 100644 index b956973..0000000 --- a/network-poc/node/src/phi3.rs +++ /dev/null @@ -1,36 +0,0 @@ -use candle_core::{Device, Tensor, DType}; -use candle_nn::VarBuilder; -use candle_transformers::models::phi3::{Config as Phi3Config, Model as Phi3Model}; -use wasm_bindgen::JsCast; -use std::cell::RefCell; -use std::rc::Rc; -use web_sys::WebSocket; - -use crate::storage; - -macro_rules! console_log { - ($($t:tt)*) => (web_sys::console::log_1(&format_args!($($t)*).to_string().into())) -} - -const MODEL_URL: &str = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/model.safetensors.index.json"; -const TOKENIZER_URL: &str = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.json"; - -// Phi-3 Mini on iso (7.6 GB) — käytetään kvantisoidumpaa versiota myöhemmin -// Tällä hetkellä: placeholder joka raportoi koon ja jättää inferenssin väliin -pub async fn run_phi3_inference(prompt: String, ws: Rc>) { - console_log!("[Phi-3] Phi-3 Mini 3.8B on liian suuri selaimessa ajettavaksi (~7.6 GB)."); - console_log!("[Phi-3] Käytä SmolLM 135M tai Qwen2.5 0.5B selaininferenssiin."); - console_log!("[Phi-3] Phi-3 tuetaan native-node:lla (Docker + GPU)."); - - let done = serde_json::json!({ - "type": "llm_done", - "prompt": prompt, - "model": "Phi-3-Mini (ei tuettu selaimessa)", - "response": "Phi-3 Mini 3.8B on liian suuri selaimessa ajettavaksi. Käytä SmolLM 135M tai Qwen2.5 0.5B.", - "tokens_generated": 0, - "duration_ms": 0, - "tokens_per_sec": 0, - "load_time_ms": 0, - }); - let _ = ws.borrow().send_with_str(&done.to_string()); -} diff --git a/network-poc/node/src/smollm.rs b/network-poc/node/src/smollm.rs deleted file mode 100644 index 6417cb2..0000000 --- a/network-poc/node/src/smollm.rs +++ /dev/null @@ -1,232 +0,0 @@ -use candle_core::{Device, Tensor, DType}; -use candle_nn::VarBuilder; -use candle_transformers::models::llama::{Llama, LlamaConfig, LlamaEosToks, Cache}; -// LogitsProcessor poistettu — käytetään greedy samplingia (argmax) Wasm-yhteensopivuuden vuoksi -use wasm_bindgen::JsCast; -use std::cell::RefCell; -use std::rc::Rc; -use web_sys::WebSocket; - -use crate::storage; - -macro_rules! console_log { - ($($t:tt)*) => (web_sys::console::log_1(&format_args!($($t)*).to_string().into())) -} - -const MODEL_URL: &str = "https://huggingface.co/HuggingFaceTB/SmolLM-135M-Instruct/resolve/main/model.safetensors"; -const TOKENIZER_URL: &str = "https://huggingface.co/HuggingFaceTB/SmolLM-135M-Instruct/resolve/main/tokenizer.json"; - -/// Lataa tiedosto HuggingFacesta streaming-latauksella (progress-ilmoitukset) ja tallentaa IndexedDB:hen -async fn ensure_cached(key: &str, url: &str, ws: &Rc>) -> Result, String> { - if let Ok(Some(bytes)) = storage::load_from_idb(key).await { - console_log!("[SmolLM] {} löytyi välimuistista ({} MB)", key, bytes.len() / 1024 / 1024); - send_progress(ws, key, 100, bytes.len(), bytes.len()); - return Ok(bytes); - } - - console_log!("[SmolLM] Ladataan {}...", key); - send_progress(ws, key, 0, 0, 0); - - // Fetch API:lla saadaan Content-Length ja streaming-luku - let resp = crate::worker_fetch(url).await?; - - if !resp.ok() { - return Err(format!("HTTP {}", resp.status())); - } - - // Kokonaiskoko Content-Length-headerista - let total_size: usize = resp.headers() - .get("content-length").ok().flatten() - .and_then(|s| s.parse().ok()) - .unwrap_or(0); - - let body = resp.body().ok_or("Ei bodyä")?; - let reader = body.get_reader(); - let reader: web_sys::ReadableStreamDefaultReader = reader.dyn_into().map_err(|_| "Ei ReadableStreamDefaultReader".to_string())?; - - let mut data: Vec = Vec::with_capacity(total_size); - let mut last_pct: u32 = 0; - - loop { - let chunk = wasm_bindgen_futures::JsFuture::from(reader.read()) - .await.map_err(|e| format!("Luku epäonnistui: {:?}", e))?; - - let done = js_sys::Reflect::get(&chunk, &"done".into()) - .map_err(|_| "done-kenttä puuttuu".to_string())? - .as_bool().unwrap_or(true); - - if done { break; } - - let value = js_sys::Reflect::get(&chunk, &"value".into()) - .map_err(|_| "value-kenttä puuttuu".to_string())?; - let array = js_sys::Uint8Array::new(&value); - let mut buf = vec![0u8; array.length() as usize]; - array.copy_to(&mut buf); - data.extend_from_slice(&buf); - - // Progress-päivitys (joka 5%) - if total_size > 0 { - let pct = ((data.len() as f64 / total_size as f64) * 100.0) as u32; - if pct >= last_pct + 5 || pct == 100 { - last_pct = pct; - console_log!("[SmolLM] {} lataus: {}% ({}/{} MB)", key, pct, data.len() / 1024 / 1024, total_size / 1024 / 1024); - send_progress(ws, key, pct, data.len(), total_size); - } - } - } - - console_log!("[SmolLM] Tallennetaan {} ({} MB) IndexedDB:hen...", key, data.len() / 1024 / 1024); - let _ = storage::save_to_idb(key, &data).await; - console_log!("[SmolLM] {} tallennettu!", key); - send_progress(ws, key, 100, data.len(), data.len()); - - Ok(data) -} - -fn send_progress(ws: &Rc>, file: &str, pct: u32, loaded: usize, total: usize) { - let msg = serde_json::json!({ - "type": "download_progress", - "file": file, - "pct": pct, - "loaded_mb": loaded / 1024 / 1024, - "total_mb": total / 1024 / 1024, - }); - let _ = ws.borrow().send_with_str(&msg.to_string()); -} - -/// Lataa malli ja tokenizer, suorita inferenssi ja streamaa tokenit hubille -pub async fn run_smollm_inference(prompt: String, ws: Rc>) { - // performance via crate::perf_now() - - // 1. Lataa tokenizer - let tok_bytes = match ensure_cached("smollm-tokenizer.json", TOKENIZER_URL, &ws).await { - Ok(b) => b, - Err(e) => { console_log!("[SmolLM] Tokenizer-virhe: {}", e); return; } - }; - - let tokenizer = match tokenizers::Tokenizer::from_bytes(&tok_bytes) { - Ok(t) => t, - Err(e) => { console_log!("[SmolLM] Tokenizer-parsinta epäonnistui: {}", e); return; } - }; - - // 2. Lataa mallin painot - let model_bytes = match ensure_cached("smollm-model.safetensors", MODEL_URL, &ws).await { - Ok(b) => b, - Err(e) => { console_log!("[SmolLM] Malli-virhe: {}", e); return; } - }; - - // Burn 0.14 wgpu ei yhteensopiva nykyisten selainten kanssa (maxInterStageShaderComponents) - // Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu) - // → NdArray kunnes Burn 0.21 stable + Wasm-tuki - console_log!("[SmolLM] Burn NdArray (CPU) inferenssi..."); - run_burn_inference::(prompt, model_bytes, tokenizer, ws).await; -} - -async fn run_burn_inference( - prompt: String, - model_bytes: Vec, - tokenizer: tokenizers::Tokenizer, - ws: Rc>, -) { - let start_load = crate::perf_now(); - - let device = Default::default(); - let config = crate::burn_smollm::config::SmolLMConfig::default(); - - console_log!("[SmolLM] Injektoidaan Safetensors -> Burn Params..."); - let model = match crate::burn_smollm::loader::load_safetensors_to_model::(&model_bytes, &config, &device) { - Ok(m) => m, - Err(e) => { console_log!("[SmolLM] Lataus epäonnistui: {}", e); return; } - }; - - let load_time = crate::perf_now() - start_load; - console_log!("[SmolLM] Burn-malli ladattu ({:.0}ms). Generoidaan...", load_time); - - let formatted_prompt = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt); - let encoding = match tokenizer.encode(formatted_prompt.as_str(), true) { - Ok(e) => e, - Err(e) => { console_log!("[SmolLM] Tokenisointivirhe: {}", e); return; } - }; - - let mut input_ids: Vec = encoding.get_ids().to_vec(); - let input_len = input_ids.len(); - console_log!("[SmolLM] Syöte: {} tokenia", input_len); - - let start_gen = crate::perf_now(); - let max_new_tokens = 32; - let mut generated_text = String::new(); - let mut tokens_generated: usize = 0; - - // KV-välimuistin taulukko kerroksittain - let mut caches: Vec>> = vec![None; config.num_hidden_layers]; - let mut current_offset = 0; - - // Prefill: yksitellen, vältetään future token leakage koska ei causal maskia - let input_ids_i32: Vec = input_ids.iter().map(|&x| x as i32).collect(); - let mut last_logits = None; - - for &id in &input_ids_i32 { - let input_tensor = burn::tensor::Tensor::::from_data( - burn::tensor::TensorData::from([id]), - &device - ).unsqueeze::<2>(); // [1, 1] - - last_logits = Some(model.forward(input_tensor, current_offset, &mut caches)); - current_offset += 1; - } - - let mut logits = last_logits.unwrap(); - - // Argmax sämpläys - let next_token_tensor = logits.clone().argmax(2); - let mut next_token: u32 = next_token_tensor.into_scalar().to_string().parse().unwrap_or(2); // Yksinkertainen cast koska int scalar - - if next_token != 2 { - if let Ok(text) = tokenizer.decode(&[next_token], true) { - generated_text.push_str(&text); - let chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "SmolLM-135M (WebGPU)" }); - let _ = ws.borrow().send_with_str(&chunk.to_string()); - } - tokens_generated += 1; - } - - // Autoregressiivinen luuppi - for _ in 1..max_new_tokens { - if next_token == 2 { break; } - - let mut input_tensor = burn::tensor::Tensor::::from_data( - burn::tensor::TensorData::from([next_token as i32]), - &device - ).unsqueeze::<2>(); - - logits = model.forward(input_tensor, current_offset, &mut caches); - current_offset += 1; - - let next_token_tensor = logits.argmax(2); - next_token = next_token_tensor.into_scalar().to_string().parse().unwrap_or(2); - - if next_token == 2 { break; } - - if let Ok(text) = tokenizer.decode(&[next_token], true) { - generated_text.push_str(&text); - let chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "SmolLM-135M (WebGPU)" }); - let _ = ws.borrow().send_with_str(&chunk.to_string()); - } - tokens_generated += 1; - } - - let gen_time = crate::perf_now() - start_gen; - let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 }; - - let done = serde_json::json!({ - "type": "llm_done", - "prompt": prompt, - "model": "SmolLM-135M-Instruct (WebGPU)", - "response": generated_text, - "tokens_generated": tokens_generated, - "duration_ms": (gen_time * 100.0).round() / 100.0, - "tokens_per_sec": (tokens_per_sec * 10.0).round() / 10.0, - "load_time_ms": (load_time * 100.0).round() / 100.0, - }); - let _ = ws.borrow().send_with_str(&done.to_string()); -} diff --git a/network-poc/nodes.db b/network-poc/nodes.db index ee0f37a7180336fd19ceb9b917a3af174294b642..ca046f5fde1a55a825ce0b73954984f7bce8f55e 100644 GIT binary patch delta 241 zcmZoTz|?SnX@WH4v57LyjK?-6%(rK>-{im;z{awVL5%tGWcCT_Y^>hQmst`w3ku9< zW@+TMlboE`BsTfsR5@M?D-#1NBTGGVQ&UTW$sgN1xR|2tB^emyrF|JDXLsoHffbmT z>X{lCnVB0*PMjys!ePVE3o?M2lY?opgR(THA*@xlk_?=a1J%SP&+nJfgIHp&XK86+ yWNe{rU|?lnU<5Qz*T6*A&?v+J;Rq8WGl(6H+~zPx7PGQ9a$A6ToAakl_Nl; delta 73 zcmZoTz|?SnX@WH4fr&ECj0ZL*%(rK>+2p_&z{X+2Ak2DvGW!Je&4L0mSy>vn%_S!% cHi=DsJ6Ue>|EZy@tc~0jV3E!F(/dev/null || true - -echo "" -echo "Käynnistetään Kipinä Hub taustalle..." -# Ohjataan Hubin logit erilliseen tiedostoon jottei se sotke näkymää! -env STATIC_DIR=frontend/dist cargo run -p hub > hub-local.log 2>&1 & -HUB_PID=$! - -# Odotellaan, että Hub saa portit kuunteluun -sleep 2 - -echo "Käynnistetään Natiivisolmu ja Ratatui-dashboard..." -# Käynnistetään TUI ja pakotetaan yhdistämään lokaaliin Hubiin. -env -u OLLAMA_MODEL HUB_URL=ws://127.0.0.1:3000/ws cargo run -p native-node - -# Kun TUI ohjelmasta on poistuttu -echo "" -echo "Dashboard suljettu! Ajetaan lokaali Hub (#$HUB_PID) siististi alas..." -kill $HUB_PID -echo "Kaikki sammutettu. Kiitos!"