deployment kokonaan uusiksi
This commit is contained in:
@@ -1,63 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Käännä kipina-node binäärit kaikille alustoille
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
OUT="$SCRIPT_DIR/frontend/public/download"
|
||||
HASH_FILE="$OUT/.build-hash"
|
||||
mkdir -p "$OUT"
|
||||
|
||||
# Tarkistetaan onko native-node muuttunut edellisen buildin jälkeen
|
||||
CURRENT_HASH=$(git -C "$SCRIPT_DIR" log -1 --format=%H -- native-node/ Cargo.toml Cargo.lock)
|
||||
if [ -n "$(git -C "$SCRIPT_DIR" status --porcelain -- native-node/ Cargo.toml Cargo.lock)" ]; then
|
||||
CURRENT_HASH="dirty-$(git -C "$SCRIPT_DIR" status --porcelain | md5sum | awk '{print $1}')"
|
||||
fi
|
||||
|
||||
if [ -f "$HASH_FILE" ] && [ "$(cat "$HASH_FILE")" = "$CURRENT_HASH" ]; then
|
||||
echo "=== Kipinä Node — ei muutoksia, ohitetaan build ==="
|
||||
ls -lh "$OUT"/kipina-node-* 2>/dev/null || true
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "=== Kipinä Node — Binary Build ==="
|
||||
|
||||
# macOS ARM (natiivi)
|
||||
echo "[1/4] macOS ARM64..."
|
||||
cd "$SCRIPT_DIR"
|
||||
cargo build --release -p native-node --no-default-features 2>&1 | tail -1
|
||||
cp target/release/native-node "$OUT/kipina-node-macos-arm64"
|
||||
echo " $(ls -lh "$OUT/kipina-node-macos-arm64" | awk '{print $5}')"
|
||||
|
||||
# Linux x86_64 (Docker)
|
||||
echo "[2/4] Linux x86_64..."
|
||||
docker run --rm \
|
||||
-v "$SCRIPT_DIR":/app -w /app \
|
||||
--platform linux/amd64 \
|
||||
rust:slim \
|
||||
bash -c "apt-get update -qq && apt-get install -y -qq pkg-config libssl-dev >/dev/null 2>&1 && cargo build --release -p native-node --no-default-features --target-dir target/docker_linux_amd64 && cp target/docker_linux_amd64/release/native-node /app/frontend/public/download/kipina-node-linux-x86_64"
|
||||
echo " $(ls -lh "$OUT/kipina-node-linux-x86_64" | awk '{print $5}')"
|
||||
|
||||
# Linux ARM64 (Docker)
|
||||
echo "[3/4] Linux ARM64..."
|
||||
docker run --rm \
|
||||
-v "$SCRIPT_DIR":/app -w /app \
|
||||
--platform linux/arm64 \
|
||||
rust:slim \
|
||||
bash -c "apt-get update -qq && apt-get install -y -qq pkg-config libssl-dev >/dev/null 2>&1 && cargo build --release -p native-node --no-default-features --target-dir target/docker_linux_arm64 && cp target/docker_linux_arm64/release/native-node /app/frontend/public/download/kipina-node-linux-arm64"
|
||||
echo " $(ls -lh "$OUT/kipina-node-linux-arm64" | awk '{print $5}')"
|
||||
|
||||
# Windows x86_64 (Docker + mingw-w64)
|
||||
echo "[4/4] Windows x86_64..."
|
||||
docker run --rm \
|
||||
-v "$SCRIPT_DIR":/app -w /app \
|
||||
--platform linux/amd64 \
|
||||
rust:slim \
|
||||
bash -c "apt-get update -qq && apt-get install -y -qq gcc-mingw-w64-x86-64 pkg-config libssl-dev >/dev/null 2>&1 && rustup target add x86_64-pc-windows-gnu && cargo build --release -p native-node --no-default-features --target x86_64-pc-windows-gnu && cp target/x86_64-pc-windows-gnu/release/native-node.exe /app/frontend/public/download/kipina-node-windows-x86_64.exe"
|
||||
echo " $(ls -lh "$OUT/kipina-node-windows-x86_64.exe" | awk '{print $5}')"
|
||||
|
||||
# Tallennetaan onnistuneen buildin hash
|
||||
echo "$CURRENT_HASH" > "$HASH_FILE"
|
||||
|
||||
echo ""
|
||||
echo "=== Binäärit valmiina ==="
|
||||
ls -lh "$OUT"/kipina-node-*
|
||||
@@ -1,30 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SERVER="ubuntu@86.50.252.98"
|
||||
REMOTE_DIR="~/code/agentic-studio/network-poc"
|
||||
KEY="$HOME/.ssh/id_rsa"
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY"
|
||||
|
||||
if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then
|
||||
echo "SSH-avain ei ole agentissa. Lisätään..."
|
||||
ssh-add "$KEY"
|
||||
fi
|
||||
|
||||
echo "=== Kipinä Node - Vain Binäärien Päivitys ==="
|
||||
|
||||
# 1. Käännetään binäärit (hyödyntää korjattua build-binaries.sh cache logiikkaa)
|
||||
"$SCRIPT_DIR/build-binaries.sh"
|
||||
|
||||
# 2. Siirretään binäärit suoraan kohdekoneen hakemistoon ohittaen Docker-imagen täyden rakennuksen
|
||||
echo ""
|
||||
echo "[Vieminen uuteen kohteeseen...]"
|
||||
ssh $SSH_OPTS "$SERVER" "mkdir -p $REMOTE_DIR/frontend/dist/download"
|
||||
scp $SSH_OPTS "$SCRIPT_DIR/frontend/public/download"/kipina-node-* "$SERVER:$REMOTE_DIR/frontend/dist/download/"
|
||||
scp $SSH_OPTS "$SCRIPT_DIR/frontend/public"/kipina-node "$SERVER:$REMOTE_DIR/frontend/dist/"
|
||||
|
||||
# 3. Luvat kuntoon
|
||||
ssh $SSH_OPTS "$SERVER" "chmod +x $REMOTE_DIR/frontend/dist/download/kipina-node-*"
|
||||
|
||||
echo "=== Valmis! Binäärit ovat nyt asennettu livenä ja ladattavissa kipina.studiosta ==="
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Nopea deploy: päivittää vain frontendin (ei kontin uudelleenkäynnistystä)
|
||||
# Hub-binäärin päivitys: käytä deploy.sh tai deploy-light.sh
|
||||
set -e
|
||||
|
||||
SERVER="ubuntu@86.50.252.98"
|
||||
REMOTE_DIR="~/code/agentic-studio/network-poc"
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no"
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
echo "=== Kipinä Studio — Frontend Deploy ==="
|
||||
|
||||
# 1. Buildaa frontend paikallisesti
|
||||
echo "[1/2] Rakennetaan frontend..."
|
||||
cd "$SCRIPT_DIR/frontend"
|
||||
[ -d node_modules ] || npm install --silent
|
||||
npm run build --silent 2>&1 | tail -1
|
||||
|
||||
# 2. Synkataan dist/ palvelimelle (vain muuttuneet tiedostot)
|
||||
echo "[2/2] Synkataan dist/ → palvelin..."
|
||||
ssh $SSH_OPTS $SERVER "mkdir -p $REMOTE_DIR/frontend/dist"
|
||||
rsync -az --delete -e "ssh $SSH_OPTS" "$SCRIPT_DIR/frontend/dist/" "$SERVER:$REMOTE_DIR/frontend/dist/"
|
||||
|
||||
echo ""
|
||||
echo "=== Valmis! Frontend päivitetty — ei uudelleenkäynnistystä ==="
|
||||
echo " https://kipina.studio"
|
||||
echo ""
|
||||
echo "Huom: Jos Rust-koodi (hub/) muuttui, aja: ./deploy.sh"
|
||||
@@ -1,33 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Kevyt deploy: lähetetään vain koodi, palvelin buildaa itse
|
||||
set -e
|
||||
|
||||
SERVER="ubuntu@86.50.252.98"
|
||||
REMOTE_DIR="~/code/agentic-studio/network-poc"
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no"
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
echo "=== Kipinä Studio Deploy (remote build) ==="
|
||||
|
||||
# 1. Synkataan koodi palvelimelle (vain muuttuneet tiedostot)
|
||||
echo "[1/3] Synkataan koodi..."
|
||||
rsync -az --delete \
|
||||
--exclude 'target/' \
|
||||
--exclude 'node_modules/' \
|
||||
--exclude 'dist/' \
|
||||
--exclude '.astro/' \
|
||||
--exclude 'temp/' \
|
||||
--exclude '*.db' \
|
||||
--exclude '.git/' \
|
||||
"$SCRIPT_DIR/" "$SERVER:$REMOTE_DIR/"
|
||||
|
||||
# 2. Rakennetaan image palvelimella
|
||||
echo "[2/3] Rakennetaan image palvelimella..."
|
||||
ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker build -f Dockerfile.prod -t kipina-agentic:latest ."
|
||||
|
||||
# 3. Käynnistetään
|
||||
echo "[3/3] Käynnistetään..."
|
||||
ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d"
|
||||
|
||||
echo "=== Valmis! https://kipina.studio ==="
|
||||
56
network-poc/deploy-local.sh
Executable file
56
network-poc/deploy-local.sh
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/bin/bash
|
||||
# Kipinä Studio — paikallinen kehitysympäristö
|
||||
# Buildaa frontendin, käynnistää hubin ja native-noden (Ollama)
|
||||
# Käyttö: ./deploy-local.sh
|
||||
set -e
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
cleanup() { echo ""; echo "Pysäytetään..."; kill $HUB_PID $NODE_PID 2>/dev/null; exit 0; }
|
||||
trap cleanup INT TERM
|
||||
|
||||
# Portti vapaaksi
|
||||
lsof -ti:3000 | xargs kill -9 2>/dev/null || true
|
||||
|
||||
# Frontend
|
||||
echo "[1/3] Frontend..."
|
||||
cd "$SCRIPT_DIR/frontend"
|
||||
[ -d node_modules ] || npm install --silent
|
||||
npm run build 2>&1 | tail -1
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
# Hub
|
||||
echo "[2/3] Hub..."
|
||||
STATIC_DIR="$SCRIPT_DIR/frontend/dist" cargo run -p hub 2>&1 &
|
||||
HUB_PID=$!
|
||||
until curl -sf http://localhost:3000 >/dev/null 2>&1; do sleep 1; done
|
||||
|
||||
# Native-node
|
||||
NODE_PID=""
|
||||
if curl -sf http://localhost:11434/api/tags >/dev/null 2>&1; then
|
||||
MODEL=$(curl -s http://localhost:11434/api/tags | python3 -c "
|
||||
import sys,json
|
||||
ms=json.load(sys.stdin).get('models',[])
|
||||
for m in ms:
|
||||
n=m['name']
|
||||
if '7b' in n and 'coder' in n: print(n); exit()
|
||||
for m in ms:
|
||||
if 'coder' in m['name']: print(m['name']); exit()
|
||||
if ms: print(ms[0]['name'])
|
||||
" 2>/dev/null)
|
||||
if [ -n "$MODEL" ]; then
|
||||
echo "[3/3] Native-node ($MODEL)..."
|
||||
HUB_URL=ws://localhost:3000/ws OLLAMA_MODEL="$MODEL" \
|
||||
cargo run -p native-node --no-default-features 2>&1 &
|
||||
NODE_PID=$!
|
||||
else
|
||||
echo "[3/3] Ollama: ei malleja (ollama pull qwen2.5-coder:7b)"
|
||||
fi
|
||||
else
|
||||
echo "[3/3] Ei Ollamaa — Wasm-fallback selaimessa"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== http://localhost:3000 === Ctrl+C pysäyttää"
|
||||
open http://localhost:3000 2>/dev/null || xdg-open http://localhost:3000 2>/dev/null || true
|
||||
wait $HUB_PID
|
||||
58
network-poc/deploy-remote.sh
Executable file
58
network-poc/deploy-remote.sh
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/bin/bash
|
||||
# Kipinä Studio — tuotanto-deploy kipina.studioon
|
||||
# Buildaa Docker-imagen (frontend + hub + wasm) ja vie palvelimelle
|
||||
# Käyttö: ./deploy-remote.sh
|
||||
set -e
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
SERVER="ubuntu@86.50.252.98"
|
||||
REMOTE_DIR="~/code/agentic-studio/network-poc"
|
||||
KEY="$HOME/.ssh/id_rsa"
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY"
|
||||
|
||||
# SSH-avain
|
||||
if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then
|
||||
echo "SSH-avain puuttuu agentista..."
|
||||
ssh-add "$KEY"
|
||||
fi
|
||||
|
||||
# Auto-commit
|
||||
if ! git diff --quiet HEAD 2>/dev/null || \
|
||||
[ -n "$(git ls-files --others --exclude-standard 2>/dev/null)" ]; then
|
||||
echo "Uncommitted muutoksia — commitoidaan..."
|
||||
read -rp " Commit-viesti: " msg
|
||||
[ -z "$msg" ] && msg="Deploy $(date +%Y-%m-%d\ %H:%M)"
|
||||
git add -A && git commit -m "$msg"
|
||||
fi
|
||||
|
||||
echo "=== Kipinä Studio Deploy → kipina.studio ==="
|
||||
|
||||
# 1. Docker-image
|
||||
echo "[1/4] Docker build..."
|
||||
docker build --platform linux/amd64 -f Dockerfile.prod -t kipina-agentic:latest .
|
||||
|
||||
# 2. Pakkaus
|
||||
echo "[2/4] Pakataan..."
|
||||
docker save kipina-agentic:latest | gzip > /tmp/kipina-agentic.tar.gz
|
||||
echo " $(du -h /tmp/kipina-agentic.tar.gz | cut -f1)"
|
||||
|
||||
# 3. Siirto
|
||||
echo "[3/4] Siirretään..."
|
||||
scp $SSH_OPTS /tmp/kipina-agentic.tar.gz "$SERVER:/tmp/"
|
||||
scp $SSH_OPTS docker-compose.prod.yml Caddyfile.prod "$SERVER:$REMOTE_DIR/"
|
||||
|
||||
# 4. Käynnistys
|
||||
echo "[4/4] Käynnistetään..."
|
||||
ssh $SSH_OPTS "$SERVER" "gunzip -c /tmp/kipina-agentic.tar.gz | docker load && rm /tmp/kipina-agentic.tar.gz"
|
||||
ssh $SSH_OPTS "$SERVER" "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d"
|
||||
|
||||
# Discord
|
||||
WEBHOOK="https://discord.com/api/webhooks/1489504066898755687/8U02d0wug-3MkVax0xMmRoj0s_-V1psnNLPWdSOjnGnKRBUpPjaU6XiX9Iu8DgJI69AP"
|
||||
HASH=$(git log -1 --pretty=format:"%h" 2>/dev/null || echo "?")
|
||||
MSG=$(git log -1 --pretty=format:"%s" 2>/dev/null || echo "?")
|
||||
PAYLOAD=$(python3 -c "import json,sys; print(json.dumps({'content':sys.argv[1]}))" \
|
||||
"🚀 **Kipinä Studio julkaistu!** \`${HASH}\` ${MSG} https://kipina.studio")
|
||||
curl -sf -H "Content-Type: application/json" -d "$PAYLOAD" "$WEBHOOK" >/dev/null || true
|
||||
|
||||
echo "=== Valmis! https://kipina.studio ==="
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Deploy + native-node-binäärien käännös (jos muutoksia)
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
echo "=== Kipinä Studio Deploy (+ native binäärit) ==="
|
||||
|
||||
# 1. Käännetään native-node-binäärit (ohittaa automaattisesti jos ei muutoksia)
|
||||
./build-binaries.sh
|
||||
|
||||
# 2. Ajetaan normaali deploy
|
||||
exec ./deploy.sh
|
||||
@@ -1,68 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [ "$1" == "local" ]; then
|
||||
echo "=== Kipinä Studio Local Development ==="
|
||||
echo "Käynnistetään kokonaisuus puhtaasti Docker-kontissa..."
|
||||
docker compose up agentic-poc
|
||||
exit 0
|
||||
fi
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
SERVER="ubuntu@86.50.252.98"
|
||||
REMOTE_DIR="~/code/agentic-studio/network-poc"
|
||||
KEY="$HOME/.ssh/id_rsa"
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no -i $KEY"
|
||||
|
||||
if ! ssh-add -l 2>/dev/null | grep -q id_rsa; then
|
||||
echo "SSH-avain ei ole agentissa. Lisätään..."
|
||||
ssh-add "$KEY"
|
||||
fi
|
||||
|
||||
echo "=== Kipinä Studio Deploy ==="
|
||||
|
||||
# 0. Commitoidaan uncommitted muutokset ennen deployta
|
||||
if ! git diff --quiet HEAD 2>/dev/null || \
|
||||
[ -n "$(git ls-files --others --exclude-standard 2>/dev/null)" ]; then
|
||||
echo "[0] Uncommitted muutoksia havaittu — commitoidaan..."
|
||||
read -rp " Commit-viesti: " DEPLOY_MSG
|
||||
if [ -z "$DEPLOY_MSG" ]; then
|
||||
DEPLOY_MSG="Deploy $(date +%Y-%m-%d\ %H:%M)"
|
||||
fi
|
||||
git add -A
|
||||
git commit -m "$DEPLOY_MSG"
|
||||
echo " Commitoitu: $DEPLOY_MSG"
|
||||
fi
|
||||
|
||||
# 1. Docker-image (frontend + hub + wasm)
|
||||
echo "[1/4] Rakennetaan image..."
|
||||
docker build --platform linux/amd64 -f Dockerfile.prod -t kipina-agentic:latest .
|
||||
|
||||
# 2. Pakataan
|
||||
echo "[2/4] Pakataan image..."
|
||||
docker save kipina-agentic:latest | gzip > /tmp/kipina-agentic.tar.gz
|
||||
echo " Koko: $(du -h /tmp/kipina-agentic.tar.gz | cut -f1)"
|
||||
|
||||
# 3. Siirretään
|
||||
echo "[3/4] Siirretään palvelimelle..."
|
||||
scp $SSH_OPTS /tmp/kipina-agentic.tar.gz $SERVER:/tmp/
|
||||
scp $SSH_OPTS docker-compose.prod.yml Caddyfile.prod $SERVER:$REMOTE_DIR/
|
||||
|
||||
# 4. Käynnistetään
|
||||
echo "[4/4] Käynnistetään palvelimella..."
|
||||
ssh $SSH_OPTS $SERVER "gunzip -c /tmp/kipina-agentic.tar.gz | docker load && rm /tmp/kipina-agentic.tar.gz"
|
||||
ssh $SSH_OPTS $SERVER "cd $REMOTE_DIR && docker compose -f docker-compose.prod.yml down && docker compose -f docker-compose.prod.yml up -d"
|
||||
|
||||
echo "=== Valmis! https://kipina.studio ==="
|
||||
|
||||
# Discord-notifikaatio
|
||||
DISCORD_WEBHOOK="https://discord.com/api/webhooks/1489504066898755687/8U02d0wug-3MkVax0xMmRoj0s_-V1psnNLPWdSOjnGnKRBUpPjaU6XiX9Iu8DgJI69AP"
|
||||
COMMIT_HASH=$(git log -1 --pretty=format:"%h" 2>/dev/null || echo "?")
|
||||
COMMIT_MSG=$(git log -1 --pretty=format:"%s" 2>/dev/null || echo "?")
|
||||
PAYLOAD=$(python3 -c "import json,sys; print(json.dumps({'content': sys.argv[1]}))" \
|
||||
"🚀 **Kipinä Studio julkaistu!**
|
||||
> \`${COMMIT_HASH}\` ${COMMIT_MSG}
|
||||
> https://kipina.studio")
|
||||
curl -s -H "Content-Type: application/json" -d "$PAYLOAD" "$DISCORD_WEBHOOK" > /dev/null
|
||||
@@ -613,7 +613,7 @@ OUTPUT FORMAT:
|
||||
// === Terminal commands ===
|
||||
const kpnCommands = {
|
||||
'kpn': ['help','run','project','pipeline','stop','load','status','models','clear'],
|
||||
'kpn run': ['coder','coder-3b','manager','tester','qa','qwen-coder','smollm-135m'],
|
||||
'kpn run': ['coder','coder-3b','manager','tester','qa','qwen-coder'],
|
||||
'kpn load': ['1','2'],
|
||||
'kpn project': ['"'],
|
||||
'kpn pipeline': ['"'],
|
||||
@@ -703,9 +703,8 @@ OUTPUT FORMAT:
|
||||
if (btn && btn.textContent.includes('Valmis')) { termLog(' ✓ Malli jo ladattu', '#3fb950'); }
|
||||
else { btn?.click(); }
|
||||
} else if (sub === 'models') {
|
||||
termLog(' <span style="color:var(--accent)">1</span> qwen-coder Qwen2.5-Coder:0.5B <span style="color:#8b949e">~990 MB</span>');
|
||||
termLog(' <span style="color:var(--accent)">2</span> qwen-coder-3b Qwen2.5-Coder:3B <span style="color:#8b949e">~6.2 GB</span>');
|
||||
termLog(' <span style="color:var(--accent)">3</span> smollm-135m SmolLM 135M <span style="color:#8b949e">~270 MB</span>');
|
||||
termLog(' <span style="color:var(--accent)">1</span> qwen-coder Qwen2.5-Coder:0.5B <span style="color:#8b949e">~990 MB (selain)</span>');
|
||||
termLog(' <span style="color:var(--accent)">2</span> qwen-coder-3b Qwen2.5-Coder:3B <span style="color:#8b949e">~6.2 GB (Ollama)</span>');
|
||||
} else if (sub === 'status') {
|
||||
termLog(` Hub: ${document.getElementById('hub-label').textContent} | Laskenta: ${document.getElementById('compute-label').textContent}`, '#a5d6ff');
|
||||
} else if (sub === 'run') {
|
||||
|
||||
@@ -196,7 +196,7 @@ async function load() {
|
||||
].map(s => `<div class="stat-card"><div class="val">${s.v}</div><div class="label">${s.l}</div></div>`).join('');
|
||||
|
||||
// Sessions — lajittelu: 1) aktiiviset nodet (online + ei viewer), 2) katsojat (online + viewer), 3) offline
|
||||
const taskNames = {'tokenize':'Tokenisaatio','smollm-135m':'SmolLM 135M','qwen-05b':'Qwen2.5 0.5B','phi3-mini':'Phi-3 Mini','qwen-coder-05b':'Coder 0.5B','qwen-coder-3b':'Coder 3B','viewer':'Katsoja','codelab-viewer':'Koodilabra'};
|
||||
const taskNames = {'tokenize':'Tokenisaatio','qwen-05b':'Qwen2.5 0.5B','qwen-coder-05b':'Coder 0.5B','qwen-coder-3b':'Coder 3B','viewer':'Katsoja','codelab-viewer':'Koodilabra'};
|
||||
sessions.sort((a, b) => {
|
||||
const aOnline = !a.disconnected_at;
|
||||
const bOnline = !b.disconnected_at;
|
||||
@@ -419,9 +419,7 @@ async fn main() {
|
||||
// Vapaa node -> lähetetään oikea tehtävä
|
||||
let msg = match task.as_str() {
|
||||
"tokenize" => Some(serde_json::json!({ "type": "pair_task", "en": en, "fi": fi })),
|
||||
"smollm-135m" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "smollm-135m" })),
|
||||
"qwen-05b" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "qwen-05b" })),
|
||||
"phi3-mini" => Some(serde_json::json!({ "type": "llm_prompt", "prompt": llm_prompts[llm_idx], "model": "phi3-mini" })),
|
||||
_ => None, // Coder ja viewer ei saa auto-tehtäviä
|
||||
};
|
||||
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Kipinä Agentic Studio — asennusskripti (Debian/Ubuntu)
|
||||
set -e
|
||||
|
||||
echo "=== Kipinä Agentic Studio — Asennus ==="
|
||||
echo ""
|
||||
|
||||
# Tarkistetaan käyttöjärjestelmä
|
||||
if [ ! -f /etc/debian_version ]; then
|
||||
echo "⚠ Tämä skripti on suunniteltu Debian/Ubuntu-järjestelmille."
|
||||
echo " Muilla jakeluilla voit asentaa riippuvuudet manuaalisesti."
|
||||
read -p " Jatketaanko? (k/e) " -n 1 -r; echo
|
||||
[[ $REPLY =~ ^[Kk]$ ]] || exit 1
|
||||
fi
|
||||
|
||||
echo "[1/6] Päivitetään pakettilistaus..."
|
||||
sudo apt-get update -qq
|
||||
|
||||
echo "[2/6] Asennetaan peruspaketteja..."
|
||||
sudo apt-get install -y -qq curl git build-essential pkg-config libssl-dev
|
||||
|
||||
# Rust
|
||||
if command -v rustc &>/dev/null; then
|
||||
echo "[3/6] Rust löytyi: $(rustc --version)"
|
||||
else
|
||||
echo "[3/6] Asennetaan Rust..."
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
source "$HOME/.cargo/env"
|
||||
fi
|
||||
|
||||
# Node.js (Astro-frontend vaatii)
|
||||
if command -v node &>/dev/null; then
|
||||
echo "[4/6] Node.js löytyi: $(node --version)"
|
||||
else
|
||||
echo "[4/6] Asennetaan Node.js 22..."
|
||||
curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash -
|
||||
sudo apt-get install -y -qq nodejs
|
||||
fi
|
||||
|
||||
# Ollama
|
||||
if command -v ollama &>/dev/null; then
|
||||
echo "[5/6] Ollama löytyi"
|
||||
else
|
||||
echo "[5/6] Asennetaan Ollama..."
|
||||
curl -fsSL https://ollama.ai/install.sh | sh
|
||||
fi
|
||||
|
||||
# Malli
|
||||
echo "[6/6] Ladataan kielimalli (qwen2.5-coder:3b)..."
|
||||
ollama pull qwen2.5-coder:3b
|
||||
|
||||
echo ""
|
||||
echo "=== Asennus valmis! ==="
|
||||
echo ""
|
||||
echo "Käynnistä:"
|
||||
echo " cd $(pwd)"
|
||||
echo " ./network-poc/local.sh"
|
||||
echo ""
|
||||
echo "Avaa selaimessa: http://localhost:3000"
|
||||
@@ -1,68 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
echo "=== Kipinä Studio Local Development ==="
|
||||
|
||||
# Tapetaan vanhat prosessit portissa 3000
|
||||
if lsof -ti:3000 >/dev/null 2>&1; then
|
||||
echo "[0] Vapautetaan portti 3000..."
|
||||
lsof -ti:3000 | xargs kill -9 2>/dev/null || true
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
# Frontend
|
||||
echo "[1/3] Rakennetaan frontend..."
|
||||
cd "$SCRIPT_DIR/frontend"
|
||||
[ -d node_modules ] || npm install --silent
|
||||
npm run build 2>&1 | tail -1
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
# Hub
|
||||
echo "[2/3] Käynnistetään hub..."
|
||||
STATIC_DIR="$SCRIPT_DIR/frontend/dist" cargo run -p hub 2>&1 &
|
||||
HUB_PID=$!
|
||||
|
||||
# Odotetaan että hub on pystyssä
|
||||
for i in $(seq 1 10); do
|
||||
if curl -s -o /dev/null http://localhost:3000 2>/dev/null; then break; fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Native-node (jos Ollama on käynnissä)
|
||||
NODE_PID=""
|
||||
if curl -s http://localhost:11434/api/tags >/dev/null 2>&1; then
|
||||
MODEL=$(curl -s http://localhost:11434/api/tags | python3 -c "
|
||||
import sys, json
|
||||
models = json.load(sys.stdin).get('models', [])
|
||||
# Priorisoi: 7b > 3b > mikä tahansa coder > mikä tahansa
|
||||
best = None
|
||||
for m in models:
|
||||
name = m['name']
|
||||
if '7b' in name and 'coder' in name: best = name; break
|
||||
if 'coder' in name and not best: best = name
|
||||
if not best and models: best = models[0]['name']
|
||||
if best: print(best)
|
||||
" 2>/dev/null)
|
||||
|
||||
if [ -n "$MODEL" ]; then
|
||||
echo "[3/3] Ollama: $MODEL — käynnistetään native-node..."
|
||||
HUB_URL=ws://localhost:3000/ws OLLAMA_MODEL="$MODEL" cargo run -p native-node --no-default-features 2>&1 &
|
||||
NODE_PID=$!
|
||||
else
|
||||
echo "[3/3] Ollama käynnissä mutta ei malleja — asenna: ollama pull qwen2.5-coder:7b"
|
||||
fi
|
||||
else
|
||||
echo "[3/3] Ollama ei käynnissä — käytetään selaimen Wasm-laskentaa"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== http://localhost:3000 ==="
|
||||
echo " Ctrl+C pysäyttää"
|
||||
|
||||
open http://localhost:3000 2>/dev/null || xdg-open http://localhost:3000 2>/dev/null || true
|
||||
|
||||
trap 'echo ""; echo "Pysäytetään..."; kill $HUB_PID $NODE_PID 2>/dev/null; exit 0' INT TERM
|
||||
wait $HUB_PID
|
||||
@@ -1,118 +0,0 @@
|
||||
use burn::module::{Module, Param};
|
||||
use burn::tensor::{backend::Backend, Tensor};
|
||||
use super::rope::RoPE;
|
||||
use super::config::SmolLMConfig;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct KVCache<B: Backend> {
|
||||
pub k: Tensor<B, 4>,
|
||||
pub v: Tensor<B, 4>,
|
||||
}
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Attention<B: Backend> {
|
||||
pub q_proj: Param<Tensor<B, 2>>, // [hidden, num_heads * head_dim]
|
||||
pub k_proj: Param<Tensor<B, 2>>, // [hidden, num_kv_heads * head_dim]
|
||||
pub v_proj: Param<Tensor<B, 2>>, // [hidden, num_kv_heads * head_dim]
|
||||
pub o_proj: Param<Tensor<B, 2>>, // [num_heads * head_dim, hidden]
|
||||
|
||||
num_heads: usize,
|
||||
num_kv_heads: usize,
|
||||
head_dim: usize,
|
||||
|
||||
rope: RoPE<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Attention<B> {
|
||||
pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self {
|
||||
let head_dim = config.hidden_size / config.num_attention_heads;
|
||||
|
||||
Self {
|
||||
q_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_attention_heads * head_dim], device)),
|
||||
k_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_key_value_heads * head_dim], device)),
|
||||
v_proj: Param::from_tensor(Tensor::zeros([config.hidden_size, config.num_key_value_heads * head_dim], device)),
|
||||
o_proj: Param::from_tensor(Tensor::zeros([config.num_attention_heads * head_dim, config.hidden_size], device)),
|
||||
|
||||
num_heads: config.num_attention_heads,
|
||||
num_kv_heads: config.num_key_value_heads,
|
||||
head_dim,
|
||||
|
||||
rope: RoPE::new(head_dim, config.max_position_embeddings, config.rope_theta, device),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward(
|
||||
&self,
|
||||
x: Tensor<B, 3>,
|
||||
offset: usize,
|
||||
cache: Option<KVCache<B>>
|
||||
) -> (Tensor<B, 3>, KVCache<B>) {
|
||||
let [batch, seq_len, hidden_dim] = x.dims();
|
||||
|
||||
// Project Q, K, V: x @ W -> [batch, seq, proj_dim]
|
||||
let q = x.clone().matmul(self.q_proj.val().unsqueeze());
|
||||
let k = x.clone().matmul(self.k_proj.val().unsqueeze());
|
||||
let v = x.matmul(self.v_proj.val().unsqueeze());
|
||||
|
||||
// Reshape: [batch, seq, heads, head_dim] -> [batch, heads, seq, head_dim]
|
||||
let q = q.reshape([batch, seq_len, self.num_heads, self.head_dim]).swap_dims(1, 2);
|
||||
let k = k.reshape([batch, seq_len, self.num_kv_heads, self.head_dim]).swap_dims(1, 2);
|
||||
let v = v.reshape([batch, seq_len, self.num_kv_heads, self.head_dim]).swap_dims(1, 2);
|
||||
|
||||
// Apply RoPE
|
||||
let q = self.rope.forward(q, offset);
|
||||
let k = self.rope.forward(k, offset);
|
||||
|
||||
// KV cache
|
||||
let (k, v) = if let Some(c) = cache {
|
||||
(Tensor::cat(vec![c.k, k], 2), Tensor::cat(vec![c.v, v], 2))
|
||||
} else {
|
||||
(k, v)
|
||||
};
|
||||
|
||||
let new_cache = KVCache { k: k.clone(), v: v.clone() };
|
||||
let kv_len = k.dims()[2];
|
||||
|
||||
// GQA: repeat K,V heads — [batch, kv_heads, kv_len, hd] -> [batch, num_heads, kv_len, hd]
|
||||
let num_reps = self.num_heads / self.num_kv_heads;
|
||||
let k = if num_reps > 1 {
|
||||
let [b, kv_h, s, hd] = k.dims();
|
||||
k.reshape([b, kv_h, 1, s, hd]).repeat_dim(2, num_reps).reshape([b, self.num_heads, s, hd])
|
||||
} else { k };
|
||||
let v = if num_reps > 1 {
|
||||
let [b, kv_h, s, hd] = v.dims();
|
||||
v.reshape([b, kv_h, 1, s, hd]).repeat_dim(2, num_reps).reshape([b, self.num_heads, s, hd])
|
||||
} else { v };
|
||||
|
||||
// Attention: Q @ K^T / sqrt(d)
|
||||
let scale = 1.0 / (self.head_dim as f64).sqrt();
|
||||
let scores = q.matmul(k.swap_dims(2, 3)).mul_scalar(scale);
|
||||
// scores: [batch, heads, seq_len, kv_len]
|
||||
|
||||
// Causal mask for prefill (seq_len > 1)
|
||||
let scores = if seq_len > 1 {
|
||||
let mask_data: Vec<f32> = (0..seq_len).flat_map(|i| {
|
||||
(0..kv_len).map(move |j| {
|
||||
if j > offset + i { f32::NEG_INFINITY } else { 0.0 }
|
||||
})
|
||||
}).collect();
|
||||
let mask = Tensor::<B, 2>::from_data(
|
||||
burn::tensor::TensorData::new(mask_data, [seq_len, kv_len]),
|
||||
&scores.device()
|
||||
).reshape([1, 1, seq_len, kv_len]);
|
||||
scores + mask
|
||||
} else {
|
||||
scores
|
||||
};
|
||||
|
||||
let attn_weights = burn::tensor::activation::softmax(scores, 3);
|
||||
|
||||
let context = attn_weights.matmul(v);
|
||||
// [batch, heads, seq, hd] -> [batch, seq, heads*hd]
|
||||
let context = context.swap_dims(1, 2).reshape([batch, seq_len, self.num_heads * self.head_dim]);
|
||||
|
||||
let output = context.matmul(self.o_proj.val().unsqueeze());
|
||||
|
||||
(output, new_cache)
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SmolLMConfig {
|
||||
pub hidden_size: usize,
|
||||
pub intermediate_size: usize,
|
||||
pub vocab_size: usize,
|
||||
pub num_hidden_layers: usize,
|
||||
pub num_attention_heads: usize,
|
||||
pub num_key_value_heads: usize,
|
||||
pub rms_norm_eps: f64,
|
||||
pub rope_theta: f32,
|
||||
pub max_position_embeddings: usize,
|
||||
}
|
||||
|
||||
impl Default for SmolLMConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
hidden_size: 576,
|
||||
intermediate_size: 1536,
|
||||
vocab_size: 49152,
|
||||
num_hidden_layers: 30,
|
||||
num_attention_heads: 9,
|
||||
num_key_value_heads: 3,
|
||||
rms_norm_eps: 1e-5,
|
||||
rope_theta: 10000.0,
|
||||
max_position_embeddings: 2048,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
use burn::tensor::{backend::Backend, Tensor, TensorData};
|
||||
use candle_core::safetensors;
|
||||
use candle_core::Device as CandleDevice;
|
||||
use burn::module::Param;
|
||||
use super::model::LlamaModel;
|
||||
use super::config::SmolLMConfig;
|
||||
|
||||
fn load_tensor_2d<B: Backend>(
|
||||
tensors_map: &std::collections::HashMap<String, candle_core::Tensor>,
|
||||
name: &str,
|
||||
device: &B::Device,
|
||||
shape_out_in: [usize; 2]
|
||||
) -> Result<Param<Tensor<B, 2>>, String> {
|
||||
let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?;
|
||||
let t = t.to_dtype(candle_core::DType::F32).unwrap();
|
||||
let vec = t.flatten_all().unwrap().to_vec1::<f32>().unwrap();
|
||||
let t_burn = Tensor::<B, 2>::from_data(burn::tensor::TensorData::new(vec, shape_out_in), device);
|
||||
// transpose from [out, in] to [in, out]
|
||||
Ok(Param::from_tensor(t_burn.transpose()))
|
||||
}
|
||||
|
||||
fn load_tensor_1d<B: Backend>(
|
||||
tensors_map: &std::collections::HashMap<String, candle_core::Tensor>,
|
||||
name: &str,
|
||||
device: &B::Device,
|
||||
_shape: [usize; 1]
|
||||
) -> Result<Param<Tensor<B, 1>>, String> {
|
||||
let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?;
|
||||
let t = t.to_dtype(candle_core::DType::F32).unwrap();
|
||||
let vec = t.flatten_all().unwrap().to_vec1::<f32>().unwrap();
|
||||
Ok(Param::from_tensor(Tensor::<B, 1>::from_floats(vec.as_slice(), device)))
|
||||
}
|
||||
|
||||
fn load_embed<B: Backend>(
|
||||
tensors_map: &std::collections::HashMap<String, candle_core::Tensor>,
|
||||
name: &str,
|
||||
device: &B::Device,
|
||||
shape: [usize; 2]
|
||||
) -> Result<Param<Tensor<B, 2>>, String> {
|
||||
let t = tensors_map.get(name).ok_or_else(|| format!("Puuttuu: {}", name))?;
|
||||
let t = t.to_dtype(candle_core::DType::F32).unwrap();
|
||||
let vec = t.flatten_all().unwrap().to_vec1::<f32>().unwrap();
|
||||
// Embed ei transponoi samalla tavalla, se pysyy [vocab, hidden]
|
||||
Ok(Param::from_tensor(Tensor::<B, 2>::from_data(burn::tensor::TensorData::new(vec, shape), device)))
|
||||
}
|
||||
|
||||
pub fn load_safetensors_to_model<B: Backend>(
|
||||
buffer: &[u8],
|
||||
config: &SmolLMConfig,
|
||||
device: &B::Device
|
||||
) -> Result<LlamaModel<B>, String> {
|
||||
|
||||
let mut model = LlamaModel::new(config, device);
|
||||
let tensors_map = safetensors::load_buffer(buffer, &CandleDevice::Cpu)
|
||||
.map_err(|e| format!("Virhe Safetensors luennassa: {}", e))?;
|
||||
|
||||
// Embeddings
|
||||
model.embed_tokens = load_embed(&tensors_map, "model.embed_tokens.weight", device, [config.vocab_size, config.hidden_size])?;
|
||||
model.norm.weight = load_tensor_1d(&tensors_map, "model.norm.weight", device, [config.hidden_size])?;
|
||||
model.lm_head = load_embed(&tensors_map, "lm_head.weight", device, [config.vocab_size, config.hidden_size]).or_else(|_| {
|
||||
load_embed(&tensors_map, "model.embed_tokens.weight", device, [config.vocab_size, config.hidden_size])
|
||||
})?;
|
||||
|
||||
let head_dim = config.hidden_size / config.num_attention_heads;
|
||||
|
||||
for i in 0..config.num_hidden_layers {
|
||||
let prefix = format!("model.layers.{}", i);
|
||||
|
||||
let layer = &mut model.layers[i];
|
||||
|
||||
// Norms
|
||||
layer.input_layernorm.weight = load_tensor_1d(&tensors_map, &format!("{}.input_layernorm.weight", prefix), device, [config.hidden_size])?;
|
||||
layer.post_attention_layernorm.weight = load_tensor_1d(&tensors_map, &format!("{}.post_attention_layernorm.weight", prefix), device, [config.hidden_size])?;
|
||||
|
||||
// Attention
|
||||
let num_heads = config.num_attention_heads;
|
||||
let num_kv_heads = config.num_key_value_heads;
|
||||
layer.self_attn.q_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.q_proj.weight", prefix), device, [num_heads * head_dim, config.hidden_size])?;
|
||||
layer.self_attn.k_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.k_proj.weight", prefix), device, [num_kv_heads * head_dim, config.hidden_size])?;
|
||||
layer.self_attn.v_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.v_proj.weight", prefix), device, [num_kv_heads * head_dim, config.hidden_size])?;
|
||||
layer.self_attn.o_proj = load_tensor_2d(&tensors_map, &format!("{}.self_attn.o_proj.weight", prefix), device, [config.hidden_size, num_heads * head_dim])?;
|
||||
|
||||
// MLP
|
||||
layer.mlp.gate_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.gate_proj.weight", prefix), device, [config.intermediate_size, config.hidden_size])?;
|
||||
layer.mlp.up_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.up_proj.weight", prefix), device, [config.intermediate_size, config.hidden_size])?;
|
||||
layer.mlp.down_proj = load_tensor_2d(&tensors_map, &format!("{}.mlp.down_proj.weight", prefix), device, [config.hidden_size, config.intermediate_size])?;
|
||||
}
|
||||
|
||||
Ok(model)
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
pub mod attention;
|
||||
pub mod config;
|
||||
pub mod loader;
|
||||
pub mod model;
|
||||
pub mod modules;
|
||||
pub mod rope;
|
||||
@@ -1,96 +0,0 @@
|
||||
use burn::module::{Module, Param};
|
||||
use burn::tensor::{backend::Backend, Tensor, Int};
|
||||
use super::modules::{RmsNorm, Mlp};
|
||||
use super::attention::{Attention, KVCache};
|
||||
use super::config::SmolLMConfig;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct LlamaBlock<B: Backend> {
|
||||
pub self_attn: Attention<B>,
|
||||
pub mlp: Mlp<B>,
|
||||
pub input_layernorm: RmsNorm<B>,
|
||||
pub post_attention_layernorm: RmsNorm<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> LlamaBlock<B> {
|
||||
pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self {
|
||||
Self {
|
||||
self_attn: Attention::new(config, device),
|
||||
mlp: Mlp::new(config.hidden_size, config.intermediate_size, device),
|
||||
input_layernorm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device),
|
||||
post_attention_layernorm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward(
|
||||
&self,
|
||||
x: Tensor<B, 3>,
|
||||
offset: usize,
|
||||
cache: Option<KVCache<B>>
|
||||
) -> (Tensor<B, 3>, KVCache<B>) {
|
||||
let residual = x.clone();
|
||||
let x_norm = self.input_layernorm.forward(x);
|
||||
|
||||
let (attn_out, new_cache) = self.self_attn.forward(x_norm, offset, cache);
|
||||
|
||||
let x = residual + attn_out;
|
||||
|
||||
let residual = x.clone();
|
||||
let x_norm = self.post_attention_layernorm.forward(x);
|
||||
let mlp_out = self.mlp.forward(x_norm);
|
||||
|
||||
let x = residual + mlp_out;
|
||||
(x, new_cache)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct LlamaModel<B: Backend> {
|
||||
pub embed_tokens: Param<Tensor<B, 2>>,
|
||||
pub layers: Vec<LlamaBlock<B>>,
|
||||
pub norm: RmsNorm<B>,
|
||||
pub lm_head: Param<Tensor<B, 2>>, // For tie_word_embeddings this can point to embed_tokens
|
||||
}
|
||||
|
||||
impl<B: Backend> LlamaModel<B> {
|
||||
pub fn new(config: &SmolLMConfig, device: &B::Device) -> Self {
|
||||
let embed = Tensor::zeros([config.vocab_size, config.hidden_size], device);
|
||||
let lm_head = Tensor::zeros([config.vocab_size, config.hidden_size], device);
|
||||
|
||||
let mut layers = Vec::new();
|
||||
for _ in 0..config.num_hidden_layers {
|
||||
layers.push(LlamaBlock::new(config, device));
|
||||
}
|
||||
|
||||
Self {
|
||||
embed_tokens: Param::from_tensor(embed),
|
||||
layers,
|
||||
norm: RmsNorm::new(config.hidden_size, config.rms_norm_eps, device),
|
||||
lm_head: Param::from_tensor(lm_head),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward(
|
||||
&self,
|
||||
input_ids: Tensor<B, 2, Int>,
|
||||
offset: usize,
|
||||
caches: &mut Vec<Option<KVCache<B>>>
|
||||
) -> Tensor<B, 3> {
|
||||
let [_batch, _seq_len] = input_ids.dims();
|
||||
|
||||
let mut x = burn::tensor::module::embedding(self.embed_tokens.val(), input_ids);
|
||||
|
||||
for (i, layer) in self.layers.iter().enumerate() {
|
||||
let cache = caches[i].take();
|
||||
let (out, new_cache) = layer.forward(x, offset, cache);
|
||||
x = out;
|
||||
caches[i] = Some(new_cache);
|
||||
}
|
||||
|
||||
x = self.norm.forward(x);
|
||||
|
||||
// Matmul with lm_head (or embed_tokens if tied) to get logits
|
||||
// Notice: lm_head is typically [vocab_size, hidden_size] in HF, so we swap dims
|
||||
x.matmul(self.lm_head.val().swap_dims(0, 1).unsqueeze())
|
||||
}
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
use burn::module::{Module, Param};
|
||||
use burn::tensor::{backend::Backend, Tensor};
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct RmsNorm<B: Backend> {
|
||||
pub weight: Param<Tensor<B, 1>>,
|
||||
epsilon: f64,
|
||||
}
|
||||
|
||||
impl<B: Backend> RmsNorm<B> {
|
||||
pub fn new(size: usize, epsilon: f64, device: &B::Device) -> Self {
|
||||
let weight = Param::from_tensor(Tensor::ones([size], device));
|
||||
Self { weight, epsilon }
|
||||
}
|
||||
|
||||
pub fn forward(&self, x: Tensor<B, 3>) -> Tensor<B, 3> {
|
||||
// x: [batch, seq_len, dim]
|
||||
// RMSNorm: x * weight / sqrt(mean(x^2) + eps)
|
||||
let x_sq = x.clone().powf_scalar(2.0);
|
||||
// mean over last dim, keeping dims for broadcast
|
||||
let [b, s, d] = x_sq.dims();
|
||||
let variance = x_sq.sum_dim(2).div_scalar(d as f32);
|
||||
let norm = x.div(variance.add_scalar(self.epsilon).sqrt());
|
||||
|
||||
let w = self.weight.val().unsqueeze::<2>().unsqueeze::<3>().reshape([1, 1, d]);
|
||||
norm * w
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Mlp<B: Backend> {
|
||||
pub gate_proj: Param<Tensor<B, 2>>, // [in, intermediate]
|
||||
pub up_proj: Param<Tensor<B, 2>>, // [in, intermediate]
|
||||
pub down_proj: Param<Tensor<B, 2>>, // [intermediate, out]
|
||||
}
|
||||
|
||||
impl<B: Backend> Mlp<B> {
|
||||
pub fn new(hidden_size: usize, intermediate_size: usize, device: &B::Device) -> Self {
|
||||
Self {
|
||||
gate_proj: Param::from_tensor(Tensor::zeros([hidden_size, intermediate_size], device)),
|
||||
up_proj: Param::from_tensor(Tensor::zeros([hidden_size, intermediate_size], device)),
|
||||
down_proj: Param::from_tensor(Tensor::zeros([intermediate_size, hidden_size], device)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward(&self, x: Tensor<B, 3>) -> Tensor<B, 3> {
|
||||
// x: [batch, seq, hidden]
|
||||
// gate = x @ gate_proj -> [batch, seq, intermediate]
|
||||
let gate = x.clone().matmul(self.gate_proj.val().unsqueeze());
|
||||
let up = x.matmul(self.up_proj.val().unsqueeze());
|
||||
|
||||
// SiLU(gate) * up
|
||||
let silu = gate.clone() * burn::tensor::activation::sigmoid(gate);
|
||||
let intermediate = silu * up;
|
||||
|
||||
// intermediate @ down_proj -> [batch, seq, hidden]
|
||||
intermediate.matmul(self.down_proj.val().unsqueeze())
|
||||
}
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
use burn::module::Module;
|
||||
use burn::tensor::{backend::Backend, Tensor};
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct RoPE<B: Backend> {
|
||||
cos_cache: Tensor<B, 2>,
|
||||
sin_cache: Tensor<B, 2>,
|
||||
}
|
||||
|
||||
impl<B: Backend> RoPE<B> {
|
||||
pub fn new(head_dim: usize, max_seq_len: usize, theta: f32, device: &B::Device) -> Self {
|
||||
// (head_dim / 2) values
|
||||
let half_dim = head_dim / 2;
|
||||
let inv_freq: Vec<f32> = (0..half_dim)
|
||||
.map(|i| 1.0 / theta.powf((2 * i) as f32 / head_dim as f32))
|
||||
.collect();
|
||||
|
||||
let inv_freq = Tensor::<B, 1>::from_floats(inv_freq.as_slice(), device).unsqueeze::<2>();
|
||||
let t_floats: Vec<f32> = (0..max_seq_len).map(|v| v as f32).collect();
|
||||
let t = Tensor::<B, 1>::from_floats(t_floats.as_slice(), device).unsqueeze::<2>().transpose();
|
||||
// t shape: [max_seq_len, 1]
|
||||
// inv_freq shape: [1, half_dim]
|
||||
|
||||
// freqs shape: [max_seq_len, half_dim]
|
||||
let freqs = t.matmul(inv_freq);
|
||||
|
||||
let cos_cache = freqs.clone().cos();
|
||||
let sin_cache = freqs.sin();
|
||||
|
||||
Self {
|
||||
cos_cache,
|
||||
sin_cache,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward(&self, x: Tensor<B, 4>, offset: usize) -> Tensor<B, 4> {
|
||||
let [batch, heads, seq_len, head_dim] = x.dims();
|
||||
let half_dim = head_dim / 2;
|
||||
|
||||
// x shape: [batch, heads, seq_len, head_dim]
|
||||
// valitaan viipaleet (x1 ja x2) jotta saadaan pyöritettyä rotaatiot
|
||||
let x1 = x.clone().slice([0..batch, 0..heads, 0..seq_len, 0..half_dim]);
|
||||
let x2 = x.clone().slice([0..batch, 0..heads, 0..seq_len, half_dim..head_dim]);
|
||||
|
||||
// haetaan vastaava seq offsetista alkaen
|
||||
let cos = self.cos_cache.clone().slice([offset..offset+seq_len, 0..half_dim])
|
||||
.unsqueeze::<4>() // [seq, half_dim, 1]
|
||||
.reshape([1, 1, seq_len, half_dim]);
|
||||
let sin = self.sin_cache.clone().slice([offset..offset+seq_len, 0..half_dim])
|
||||
.reshape([1, 1, seq_len, half_dim]);
|
||||
|
||||
// x1 * cos - x2 * sin
|
||||
let o1 = x1.clone().mul(cos.clone()) - x2.clone().mul(sin.clone());
|
||||
// x2 * cos + x1 * sin
|
||||
let o2 = x2.mul(cos) + x1.mul(sin);
|
||||
|
||||
Tensor::cat(vec![o1, o2], 3)
|
||||
}
|
||||
}
|
||||
@@ -8,11 +8,8 @@ use burn::backend::{Wgpu, NdArray};
|
||||
|
||||
pub mod storage;
|
||||
pub mod sampling;
|
||||
pub mod smollm;
|
||||
pub mod qwen;
|
||||
pub mod qwen_coder;
|
||||
pub mod phi3;
|
||||
pub mod burn_smollm;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! console_log {
|
||||
@@ -246,7 +243,7 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
|
||||
HAS_WEBGPU.store(has_webgpu, Ordering::SeqCst);
|
||||
SELECTED_TASK.store(task_id, Ordering::SeqCst);
|
||||
let backend_name = if has_webgpu { "WebGPU" } else { "CPU (NdArray)" };
|
||||
let task_names = ["tokenize", "smollm-135m", "qwen-05b", "phi3-mini", "qwen-coder-05b", "qwen-coder-3b"];
|
||||
let task_names = ["tokenize", "qwen-05b", "qwen-coder-05b", "qwen-coder-3b"];
|
||||
let task_name = task_names.get(task_id as usize).unwrap_or(&"tokenize");
|
||||
console_log!("Kipinä Agent Node käynnistyy — backend: {} | tehtävä: {}", backend_name, task_name);
|
||||
|
||||
@@ -303,22 +300,6 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
|
||||
}
|
||||
}
|
||||
} else if msg.contains("llm_prompt") && current_task == 1 && auto_on {
|
||||
// Vain SmolLM-solmut, ja vain yksi inferenssi kerrallaan
|
||||
if LLM_BUSY.load(Ordering::SeqCst) {
|
||||
// Ohitetaan — edellinen inferenssi vielä käynnissä
|
||||
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
||||
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
if !prompt.is_empty() && model == "smollm-135m" {
|
||||
LLM_BUSY.store(true, Ordering::SeqCst);
|
||||
let ws_for_async = ws_clone.clone();
|
||||
wasm_bindgen_futures::spawn_local(async move {
|
||||
smollm::run_smollm_inference(prompt, ws_for_async).await;
|
||||
LLM_BUSY.store(false, Ordering::SeqCst);
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if msg.contains("llm_prompt") && current_task == 2 && auto_on {
|
||||
// Qwen2.5-0.5B
|
||||
if LLM_BUSY.load(Ordering::SeqCst) {
|
||||
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
||||
@@ -333,21 +314,6 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if msg.contains("llm_prompt") && current_task == 3 && auto_on {
|
||||
// Phi-3 Mini
|
||||
if LLM_BUSY.load(Ordering::SeqCst) {
|
||||
} else if let Ok(task) = serde_json::from_str::<serde_json::Value>(&msg) {
|
||||
let prompt = task.get("prompt").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
let model = task.get("model").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
if !prompt.is_empty() && model.starts_with("phi3-mini") {
|
||||
LLM_BUSY.store(true, Ordering::SeqCst);
|
||||
let ws_for_async = ws_clone.clone();
|
||||
wasm_bindgen_futures::spawn_local(async move {
|
||||
phi3::run_phi3_inference(prompt, ws_for_async).await;
|
||||
LLM_BUSY.store(false, Ordering::SeqCst);
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if msg.contains("llm_prompt") {
|
||||
console_log!("[DEBUG] llm_prompt vastaanotettu! current_task={}, busy={}", current_task, LLM_BUSY.load(Ordering::SeqCst));
|
||||
if current_task == 4 || current_task == 5 {
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
use candle_core::{Device, Tensor, DType};
|
||||
use candle_nn::VarBuilder;
|
||||
use candle_transformers::models::phi3::{Config as Phi3Config, Model as Phi3Model};
|
||||
use wasm_bindgen::JsCast;
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
use web_sys::WebSocket;
|
||||
|
||||
use crate::storage;
|
||||
|
||||
macro_rules! console_log {
|
||||
($($t:tt)*) => (web_sys::console::log_1(&format_args!($($t)*).to_string().into()))
|
||||
}
|
||||
|
||||
const MODEL_URL: &str = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/model.safetensors.index.json";
|
||||
const TOKENIZER_URL: &str = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.json";
|
||||
|
||||
// Phi-3 Mini on iso (7.6 GB) — käytetään kvantisoidumpaa versiota myöhemmin
|
||||
// Tällä hetkellä: placeholder joka raportoi koon ja jättää inferenssin väliin
|
||||
pub async fn run_phi3_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
console_log!("[Phi-3] Phi-3 Mini 3.8B on liian suuri selaimessa ajettavaksi (~7.6 GB).");
|
||||
console_log!("[Phi-3] Käytä SmolLM 135M tai Qwen2.5 0.5B selaininferenssiin.");
|
||||
console_log!("[Phi-3] Phi-3 tuetaan native-node:lla (Docker + GPU).");
|
||||
|
||||
let done = serde_json::json!({
|
||||
"type": "llm_done",
|
||||
"prompt": prompt,
|
||||
"model": "Phi-3-Mini (ei tuettu selaimessa)",
|
||||
"response": "Phi-3 Mini 3.8B on liian suuri selaimessa ajettavaksi. Käytä SmolLM 135M tai Qwen2.5 0.5B.",
|
||||
"tokens_generated": 0,
|
||||
"duration_ms": 0,
|
||||
"tokens_per_sec": 0,
|
||||
"load_time_ms": 0,
|
||||
});
|
||||
let _ = ws.borrow().send_with_str(&done.to_string());
|
||||
}
|
||||
@@ -1,232 +0,0 @@
|
||||
use candle_core::{Device, Tensor, DType};
|
||||
use candle_nn::VarBuilder;
|
||||
use candle_transformers::models::llama::{Llama, LlamaConfig, LlamaEosToks, Cache};
|
||||
// LogitsProcessor poistettu — käytetään greedy samplingia (argmax) Wasm-yhteensopivuuden vuoksi
|
||||
use wasm_bindgen::JsCast;
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
use web_sys::WebSocket;
|
||||
|
||||
use crate::storage;
|
||||
|
||||
macro_rules! console_log {
|
||||
($($t:tt)*) => (web_sys::console::log_1(&format_args!($($t)*).to_string().into()))
|
||||
}
|
||||
|
||||
const MODEL_URL: &str = "https://huggingface.co/HuggingFaceTB/SmolLM-135M-Instruct/resolve/main/model.safetensors";
|
||||
const TOKENIZER_URL: &str = "https://huggingface.co/HuggingFaceTB/SmolLM-135M-Instruct/resolve/main/tokenizer.json";
|
||||
|
||||
/// Lataa tiedosto HuggingFacesta streaming-latauksella (progress-ilmoitukset) ja tallentaa IndexedDB:hen
|
||||
async fn ensure_cached(key: &str, url: &str, ws: &Rc<RefCell<WebSocket>>) -> Result<Vec<u8>, String> {
|
||||
if let Ok(Some(bytes)) = storage::load_from_idb(key).await {
|
||||
console_log!("[SmolLM] {} löytyi välimuistista ({} MB)", key, bytes.len() / 1024 / 1024);
|
||||
send_progress(ws, key, 100, bytes.len(), bytes.len());
|
||||
return Ok(bytes);
|
||||
}
|
||||
|
||||
console_log!("[SmolLM] Ladataan {}...", key);
|
||||
send_progress(ws, key, 0, 0, 0);
|
||||
|
||||
// Fetch API:lla saadaan Content-Length ja streaming-luku
|
||||
let resp = crate::worker_fetch(url).await?;
|
||||
|
||||
if !resp.ok() {
|
||||
return Err(format!("HTTP {}", resp.status()));
|
||||
}
|
||||
|
||||
// Kokonaiskoko Content-Length-headerista
|
||||
let total_size: usize = resp.headers()
|
||||
.get("content-length").ok().flatten()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
|
||||
let body = resp.body().ok_or("Ei bodyä")?;
|
||||
let reader = body.get_reader();
|
||||
let reader: web_sys::ReadableStreamDefaultReader = reader.dyn_into().map_err(|_| "Ei ReadableStreamDefaultReader".to_string())?;
|
||||
|
||||
let mut data: Vec<u8> = Vec::with_capacity(total_size);
|
||||
let mut last_pct: u32 = 0;
|
||||
|
||||
loop {
|
||||
let chunk = wasm_bindgen_futures::JsFuture::from(reader.read())
|
||||
.await.map_err(|e| format!("Luku epäonnistui: {:?}", e))?;
|
||||
|
||||
let done = js_sys::Reflect::get(&chunk, &"done".into())
|
||||
.map_err(|_| "done-kenttä puuttuu".to_string())?
|
||||
.as_bool().unwrap_or(true);
|
||||
|
||||
if done { break; }
|
||||
|
||||
let value = js_sys::Reflect::get(&chunk, &"value".into())
|
||||
.map_err(|_| "value-kenttä puuttuu".to_string())?;
|
||||
let array = js_sys::Uint8Array::new(&value);
|
||||
let mut buf = vec![0u8; array.length() as usize];
|
||||
array.copy_to(&mut buf);
|
||||
data.extend_from_slice(&buf);
|
||||
|
||||
// Progress-päivitys (joka 5%)
|
||||
if total_size > 0 {
|
||||
let pct = ((data.len() as f64 / total_size as f64) * 100.0) as u32;
|
||||
if pct >= last_pct + 5 || pct == 100 {
|
||||
last_pct = pct;
|
||||
console_log!("[SmolLM] {} lataus: {}% ({}/{} MB)", key, pct, data.len() / 1024 / 1024, total_size / 1024 / 1024);
|
||||
send_progress(ws, key, pct, data.len(), total_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console_log!("[SmolLM] Tallennetaan {} ({} MB) IndexedDB:hen...", key, data.len() / 1024 / 1024);
|
||||
let _ = storage::save_to_idb(key, &data).await;
|
||||
console_log!("[SmolLM] {} tallennettu!", key);
|
||||
send_progress(ws, key, 100, data.len(), data.len());
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
fn send_progress(ws: &Rc<RefCell<WebSocket>>, file: &str, pct: u32, loaded: usize, total: usize) {
|
||||
let msg = serde_json::json!({
|
||||
"type": "download_progress",
|
||||
"file": file,
|
||||
"pct": pct,
|
||||
"loaded_mb": loaded / 1024 / 1024,
|
||||
"total_mb": total / 1024 / 1024,
|
||||
});
|
||||
let _ = ws.borrow().send_with_str(&msg.to_string());
|
||||
}
|
||||
|
||||
/// Lataa malli ja tokenizer, suorita inferenssi ja streamaa tokenit hubille
|
||||
pub async fn run_smollm_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
// performance via crate::perf_now()
|
||||
|
||||
// 1. Lataa tokenizer
|
||||
let tok_bytes = match ensure_cached("smollm-tokenizer.json", TOKENIZER_URL, &ws).await {
|
||||
Ok(b) => b,
|
||||
Err(e) => { console_log!("[SmolLM] Tokenizer-virhe: {}", e); return; }
|
||||
};
|
||||
|
||||
let tokenizer = match tokenizers::Tokenizer::from_bytes(&tok_bytes) {
|
||||
Ok(t) => t,
|
||||
Err(e) => { console_log!("[SmolLM] Tokenizer-parsinta epäonnistui: {}", e); return; }
|
||||
};
|
||||
|
||||
// 2. Lataa mallin painot
|
||||
let model_bytes = match ensure_cached("smollm-model.safetensors", MODEL_URL, &ws).await {
|
||||
Ok(b) => b,
|
||||
Err(e) => { console_log!("[SmolLM] Malli-virhe: {}", e); return; }
|
||||
};
|
||||
|
||||
// Burn 0.14 wgpu ei yhteensopiva nykyisten selainten kanssa (maxInterStageShaderComponents)
|
||||
// Burn 0.21-pre.2 cubecl-runtime ei käänny Wasmille (println! puuttuu)
|
||||
// → NdArray kunnes Burn 0.21 stable + Wasm-tuki
|
||||
console_log!("[SmolLM] Burn NdArray (CPU) inferenssi...");
|
||||
run_burn_inference::<burn::backend::NdArray>(prompt, model_bytes, tokenizer, ws).await;
|
||||
}
|
||||
|
||||
async fn run_burn_inference<B: burn::tensor::backend::Backend>(
|
||||
prompt: String,
|
||||
model_bytes: Vec<u8>,
|
||||
tokenizer: tokenizers::Tokenizer,
|
||||
ws: Rc<RefCell<WebSocket>>,
|
||||
) {
|
||||
let start_load = crate::perf_now();
|
||||
|
||||
let device = Default::default();
|
||||
let config = crate::burn_smollm::config::SmolLMConfig::default();
|
||||
|
||||
console_log!("[SmolLM] Injektoidaan Safetensors -> Burn Params...");
|
||||
let model = match crate::burn_smollm::loader::load_safetensors_to_model::<B>(&model_bytes, &config, &device) {
|
||||
Ok(m) => m,
|
||||
Err(e) => { console_log!("[SmolLM] Lataus epäonnistui: {}", e); return; }
|
||||
};
|
||||
|
||||
let load_time = crate::perf_now() - start_load;
|
||||
console_log!("[SmolLM] Burn-malli ladattu ({:.0}ms). Generoidaan...", load_time);
|
||||
|
||||
let formatted_prompt = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
|
||||
let encoding = match tokenizer.encode(formatted_prompt.as_str(), true) {
|
||||
Ok(e) => e,
|
||||
Err(e) => { console_log!("[SmolLM] Tokenisointivirhe: {}", e); return; }
|
||||
};
|
||||
|
||||
let mut input_ids: Vec<u32> = encoding.get_ids().to_vec();
|
||||
let input_len = input_ids.len();
|
||||
console_log!("[SmolLM] Syöte: {} tokenia", input_len);
|
||||
|
||||
let start_gen = crate::perf_now();
|
||||
let max_new_tokens = 32;
|
||||
let mut generated_text = String::new();
|
||||
let mut tokens_generated: usize = 0;
|
||||
|
||||
// KV-välimuistin taulukko kerroksittain
|
||||
let mut caches: Vec<Option<crate::burn_smollm::attention::KVCache<B>>> = vec![None; config.num_hidden_layers];
|
||||
let mut current_offset = 0;
|
||||
|
||||
// Prefill: yksitellen, vältetään future token leakage koska ei causal maskia
|
||||
let input_ids_i32: Vec<i32> = input_ids.iter().map(|&x| x as i32).collect();
|
||||
let mut last_logits = None;
|
||||
|
||||
for &id in &input_ids_i32 {
|
||||
let input_tensor = burn::tensor::Tensor::<B, 1, burn::tensor::Int>::from_data(
|
||||
burn::tensor::TensorData::from([id]),
|
||||
&device
|
||||
).unsqueeze::<2>(); // [1, 1]
|
||||
|
||||
last_logits = Some(model.forward(input_tensor, current_offset, &mut caches));
|
||||
current_offset += 1;
|
||||
}
|
||||
|
||||
let mut logits = last_logits.unwrap();
|
||||
|
||||
// Argmax sämpläys
|
||||
let next_token_tensor = logits.clone().argmax(2);
|
||||
let mut next_token: u32 = next_token_tensor.into_scalar().to_string().parse().unwrap_or(2); // Yksinkertainen cast koska int scalar
|
||||
|
||||
if next_token != 2 {
|
||||
if let Ok(text) = tokenizer.decode(&[next_token], true) {
|
||||
generated_text.push_str(&text);
|
||||
let chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "SmolLM-135M (WebGPU)" });
|
||||
let _ = ws.borrow().send_with_str(&chunk.to_string());
|
||||
}
|
||||
tokens_generated += 1;
|
||||
}
|
||||
|
||||
// Autoregressiivinen luuppi
|
||||
for _ in 1..max_new_tokens {
|
||||
if next_token == 2 { break; }
|
||||
|
||||
let mut input_tensor = burn::tensor::Tensor::<B, 1, burn::tensor::Int>::from_data(
|
||||
burn::tensor::TensorData::from([next_token as i32]),
|
||||
&device
|
||||
).unsqueeze::<2>();
|
||||
|
||||
logits = model.forward(input_tensor, current_offset, &mut caches);
|
||||
current_offset += 1;
|
||||
|
||||
let next_token_tensor = logits.argmax(2);
|
||||
next_token = next_token_tensor.into_scalar().to_string().parse().unwrap_or(2);
|
||||
|
||||
if next_token == 2 { break; }
|
||||
|
||||
if let Ok(text) = tokenizer.decode(&[next_token], true) {
|
||||
generated_text.push_str(&text);
|
||||
let chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "SmolLM-135M (WebGPU)" });
|
||||
let _ = ws.borrow().send_with_str(&chunk.to_string());
|
||||
}
|
||||
tokens_generated += 1;
|
||||
}
|
||||
|
||||
let gen_time = crate::perf_now() - start_gen;
|
||||
let tokens_per_sec = if gen_time > 0.0 { (tokens_generated as f64 / gen_time) * 1000.0 } else { 0.0 };
|
||||
|
||||
let done = serde_json::json!({
|
||||
"type": "llm_done",
|
||||
"prompt": prompt,
|
||||
"model": "SmolLM-135M-Instruct (WebGPU)",
|
||||
"response": generated_text,
|
||||
"tokens_generated": tokens_generated,
|
||||
"duration_ms": (gen_time * 100.0).round() / 100.0,
|
||||
"tokens_per_sec": (tokens_per_sec * 10.0).round() / 10.0,
|
||||
"load_time_ms": (load_time * 100.0).round() / 100.0,
|
||||
});
|
||||
let _ = ws.borrow().send_with_str(&done.to_string());
|
||||
}
|
||||
Binary file not shown.
@@ -1,33 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Kipinä Agentic Network lokaali pikatesti
|
||||
# Tämä ohjelma käynnistää lokaalin Kipinä Hubin taustalle, ja heittää sen jälkeen näkyviin
|
||||
# visuaalisen Natiivisolmun Ratatui-ruudun yhdistäen sen automaattisesti siihen.
|
||||
# Kun poistut Ratatui-näytöstä (esim painamalla Q), niin skripti sammuttaa siististi
|
||||
# myös taustalla pyörivän lokaalin Hubin!
|
||||
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
echo "========================================="
|
||||
echo " 🔥 Kipinä Agentic Network - Local Run 🔥"
|
||||
echo "========================================="
|
||||
echo "Varmistetaan portin 3000 vapaus..."
|
||||
lsof -ti :3000 | xargs kill -9 2>/dev/null || true
|
||||
|
||||
echo ""
|
||||
echo "Käynnistetään Kipinä Hub taustalle..."
|
||||
# Ohjataan Hubin logit erilliseen tiedostoon jottei se sotke näkymää!
|
||||
env STATIC_DIR=frontend/dist cargo run -p hub > hub-local.log 2>&1 &
|
||||
HUB_PID=$!
|
||||
|
||||
# Odotellaan, että Hub saa portit kuunteluun
|
||||
sleep 2
|
||||
|
||||
echo "Käynnistetään Natiivisolmu ja Ratatui-dashboard..."
|
||||
# Käynnistetään TUI ja pakotetaan yhdistämään lokaaliin Hubiin.
|
||||
env -u OLLAMA_MODEL HUB_URL=ws://127.0.0.1:3000/ws cargo run -p native-node
|
||||
|
||||
# Kun TUI ohjelmasta on poistuttu
|
||||
echo ""
|
||||
echo "Dashboard suljettu! Ajetaan lokaali Hub (#$HUB_PID) siististi alas..."
|
||||
kill $HUB_PID
|
||||
echo "Kaikki sammutettu. Kiitos!"
|
||||
Reference in New Issue
Block a user