uusi projekti

This commit is contained in:
Jaakko Vanhala
2026-04-12 10:28:57 +03:00
parent 094b183c17
commit 2f140c8a15
16 changed files with 521 additions and 102 deletions

View File

@@ -1 +1 @@
5f005820535910a5052a33cfcfc0bd6909d11c25
dirty-3e9cdd70c60dadfb970cee47ebbd912c

View File

@@ -0,0 +1,33 @@
{
"name": "Data Analytics Pipeline",
"description": "ETL, analysis, and visualization with Docker (MariaDB + Jupyter)",
"keywords": ["data", "analytics", "csv", "etl", "visualization", "statistics", "dashboard", "jupyter", "pandas", "matplotlib"],
"files": {
"etl.py": {
"description": "Data loading, cleaning, and transformation",
"example": "import pandas as pd\nfrom pathlib import Path\nfrom sqlalchemy import create_engine\n\nDB_URL = \"mysql+pymysql://root:secret@localhost:3306/analytics\"\nengine = create_engine(DB_URL)\n\ndef load_csv(path: str) -> pd.DataFrame:\n df = pd.read_csv(path)\n print(f\"Loaded {len(df)} rows from {path}\")\n return df\n\ndef clean(df: pd.DataFrame) -> pd.DataFrame:\n df = df.dropna(subset=[\"x\", \"y\"])\n df = df[(df[\"x\"] >= 0) & (df[\"y\"] >= 0)] # Remove outliers\n df[\"timestamp\"] = pd.to_datetime(df[\"timestamp\"])\n return df.sort_values(\"timestamp\").reset_index(drop=True)\n\ndef to_database(df: pd.DataFrame, table: str):\n df.to_sql(table, engine, if_exists=\"replace\", index=False)\n print(f\"Wrote {len(df)} rows to {table}\")\n\nif __name__ == \"__main__\":\n for csv_file in sorted(Path(\"data\").glob(\"*.csv\")):\n df = load_csv(str(csv_file))\n df = clean(df)\n to_database(df, \"measurements\")",
"instructions": "Write the ETL pipeline:\n- Load CSV files from data/ directory using pandas\n- Clean: remove nulls, filter outliers, parse timestamps\n- Transform: convert units, compute derived columns\n- Load into MariaDB via SQLAlchemy\n- Make it runnable as a standalone script"
},
"analysis.py": {
"description": "Statistical analysis and metrics computation",
"example": "import pandas as pd\nfrom sqlalchemy import create_engine\n\nDB_URL = \"mysql+pymysql://root:secret@localhost:3306/analytics\"\nengine = create_engine(DB_URL)\n\ndef load_data() -> pd.DataFrame:\n return pd.read_sql(\"SELECT * FROM measurements\", engine)\n\ndef summary_stats(df: pd.DataFrame) -> dict:\n return {\n \"total_rows\": len(df),\n \"date_range\": f\"{df['timestamp'].min()} to {df['timestamp'].max()}\",\n \"unique_entities\": df[\"entity_id\"].nunique(),\n }\n\ndef hourly_distribution(df: pd.DataFrame) -> pd.DataFrame:\n df[\"hour\"] = df[\"timestamp\"].dt.hour\n return df.groupby(\"hour\").size().reset_index(name=\"count\")\n\nif __name__ == \"__main__\":\n df = load_data()\n stats = summary_stats(df)\n for k, v in stats.items():\n print(f\"{k}: {v}\")",
"instructions": "Write analysis functions:\n- Load cleaned data from MariaDB\n- Compute summary statistics (counts, date ranges, distributions)\n- Time-based analysis (hourly, daily, weekly patterns)\n- Group-level metrics (per entity, per zone)\n- Return DataFrames and dicts suitable for visualization"
},
"visualize.py": {
"description": "Charts and visualizations with matplotlib",
"example": "import matplotlib.pyplot as plt\nimport pandas as pd\nfrom analysis import load_data, hourly_distribution\n\ndef plot_heatmap(df: pd.DataFrame, title: str, output: str):\n fig, ax = plt.subplots(figsize=(12, 8))\n scatter = ax.scatter(df[\"x\"], df[\"y\"], c=df[\"density\"], cmap=\"hot\", alpha=0.5, s=2)\n ax.set_title(title)\n ax.set_xlabel(\"x\")\n ax.set_ylabel(\"y\")\n ax.invert_yaxis()\n plt.colorbar(scatter, label=\"Density\")\n plt.tight_layout()\n plt.savefig(output, dpi=150)\n print(f\"Saved {output}\")\n\ndef plot_bar(df: pd.DataFrame, x: str, y: str, title: str, output: str):\n fig, ax = plt.subplots(figsize=(10, 5))\n ax.bar(df[x], df[y], color=\"steelblue\")\n ax.set_title(title)\n ax.set_xlabel(x)\n ax.set_ylabel(y)\n plt.tight_layout()\n plt.savefig(output, dpi=150)\n\nif __name__ == \"__main__\":\n df = load_data()\n hourly = hourly_distribution(df)\n plot_bar(hourly, \"hour\", \"count\", \"Hourly Distribution\", \"output/hourly.png\")",
"instructions": "Write visualization functions:\n- Import analysis functions for data\n- Heatmaps, bar charts, line charts as appropriate\n- Save figures to output/ directory (PNG, 150 DPI)\n- Use matplotlib with clear titles, labels, colorbars\n- Make it runnable as standalone to generate all charts"
},
"docker-compose.yml": {
"description": "Docker Compose stack for database and Jupyter",
"example": "services:\n db:\n image: mariadb:11\n environment:\n MYSQL_ROOT_PASSWORD: secret\n MYSQL_DATABASE: analytics\n ports:\n - \"3306:3306\"\n volumes:\n - db_data:/var/lib/mysql\n\n jupyter:\n image: jupyter/scipy-notebook:latest\n ports:\n - \"8888:8888\"\n volumes:\n - .:/home/jovyan/work\n environment:\n JUPYTER_TOKEN: kipina\n depends_on:\n - db\n\nvolumes:\n db_data:",
"instructions": "Write docker-compose.yml:\n- MariaDB service with persistent volume\n- JupyterLab service with project mounted\n- Correct environment variables\n- Port mappings for local development\n- Write ONLY the YAML, no explanations"
},
"pyproject.toml": {
"description": "Project dependencies",
"example": "[project]\nname = \"analytics\"\nversion = \"0.1.0\"\nrequires-python = \">=3.11\"\ndependencies = [\n \"pandas\",\n \"matplotlib\",\n \"sqlalchemy\",\n \"pymysql\",\n]\n\n[project.scripts]\netl = \"python etl.py\"\nanalyze = \"python analysis.py\"\nvisualize = \"python visualize.py\"",
"instructions": "Use [project] format (PEP 621). List all data science dependencies. Add scripts for ETL, analysis, and visualization."
}
},
"order": ["etl.py", "analysis.py", "visualize.py", "docker-compose.yml", "pyproject.toml"]
}

View File

@@ -1,6 +1,7 @@
{
"name": "FastAPI CRUD",
"description": "REST API with SQLite database",
"keywords": ["api", "rest", "crud", "endpoint", "fastapi", "web", "backend", "server", "database", "sqlite"],
"files": {
"models.py": {
"description": "SQLAlchemy models, engine, and session",

View File

@@ -501,10 +501,16 @@ OUTPUT FORMAT:
// Wasm-autostart vain jos natiivisolmua ei löydy (tarkistetaan onopen:ssa)
// === Pipeline-keskeytys ===
let pipelineAbort = null; // AbortController tai null
// === kpnRun: lähettää promptin mallille ===
const activeStreams = {};
async function kpnRun(model, prompt, silent, agentOpts) {
// Tarkistetaan keskeytys
if (pipelineAbort?.signal?.aborted) return null;
const taskId = crypto.randomUUID();
const statusDiv = document.createElement('div');
statusDiv.className = 'terminal-line';
@@ -514,10 +520,6 @@ OUTPUT FORMAT:
termPanel.scrollTop = termPanel.scrollHeight;
try {
// Ei odotetaan Wasmia — lähetetään suoraan hubille.
// Jos hub löytää natiivisolmun, vastaus tulee nopeasti.
// Jos 503, käynnistetään Wasm-fallback.
if (!silent) {
const streamDiv = document.createElement('div');
streamDiv.className = 'terminal-line';
@@ -535,18 +537,18 @@ OUTPUT FORMAT:
model,
prompt,
task_id: taskId,
system_prompt: opts.systemPrompt || settings.systemPrompt || undefined,
system_prompt: opts.prompt || settings.systemPrompt || undefined,
temperature: opts.temperature ?? settings.temperature ?? undefined,
top_k: opts.topK ?? settings.topK ?? undefined,
max_tokens: opts.maxTokens ?? settings.maxTokens ?? undefined,
repeat_penalty: opts.repeatPenalty ?? settings.repeatPenalty ?? undefined,
stop: settings.stopSequences ? settings.stopSequences.split('\\n').filter(Boolean) : undefined,
};
const res = await fetch('/api/v1/chat/completions', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
signal: pipelineAbort?.signal,
});
if (res.status === 503 && !wasmNodeStarted) {
@@ -619,7 +621,7 @@ OUTPUT FORMAT:
const kpnExamples = {
'kpn run coder': ['"hello world in python"','"fibonacci in rust"','"quicksort in javascript"'],
'kpn run coder-3b': ['"REST API with Flask"','"binary search tree"'],
'kpn project': ['"FastAPI + SQLite REST API"','"CLI tool for CSV processing"'],
'kpn project': ['"FastAPI + SQLite REST API"','"UWB indoor positioning analytics — CSV cart tracking data, heatmaps, statistics, Docker (MariaDB + Jupyter)"','"CLI tool for CSV processing"'],
'kpn pipeline': ['"todo-sovellus"','"laskin pythonilla"'],
};
@@ -753,15 +755,30 @@ OUTPUT FORMAT:
// === Template-pohjainen projektipipeline ===
let templates = {};
const TEMPLATE_FILES = ['fastapi-crud.json', 'data-analytics.json'];
// Ladataan mallipohjat
(async () => {
try {
const res = await fetch('/templates/fastapi-crud.json');
if (res.ok) { const t = await res.json(); templates[t.name] = t; }
} catch(e) {}
for (const file of TEMPLATE_FILES) {
try {
const res = await fetch(`/templates/${file}`);
if (res.ok) { const t = await res.json(); templates[t.name] = t; }
} catch(e) {}
}
})();
// Valitaan mallipohja Asiakkaan briefin perusteella (keywords-match)
function selectTemplate(brief) {
const lower = brief.toLowerCase();
let best = null, bestScore = 0;
for (const t of Object.values(templates)) {
const keywords = t.keywords || [];
const score = keywords.filter(k => lower.includes(k)).length;
if (score > bestScore) { bestScore = score; best = t; }
}
return best; // null = vapaa tila
}
function explainStep(title, explanation) {
termLog(`\n <span style="color:#a371f7;font-size:12px">💡 ${esc(title)}</span>`);
termLog(` <span style="color:#8b949e;font-size:12px">${esc(explanation)}</span>`);
@@ -769,18 +786,11 @@ OUTPUT FORMAT:
async function kpnProject(task) {
const cli = agents.client || Object.values(agents)[0];
const mgr = agents.manager || Object.values(agents)[1];
const cdr = agents.coder || Object.values(agents)[2];
// Etsitään sopivin mallipohja
const template = Object.values(templates)[0]; // Toistaiseksi vain FastAPI CRUD
if (!template) {
termLog(' ✗ Mallipohjia ei ladattu', '#f85149');
return;
}
termLog(`<span style="color:var(--purple);font-weight:bold">━━━ ${esc(template.name)} — ${esc(task)} ━━━</span>`);
// Asiakas: jalostaa vaatimukset
termLog(`<span style="color:var(--purple);font-weight:bold">━━━ Projekti — ${esc(task)} ━━━</span>`);
termLog(`\n<span style="color:#f0883e;font-weight:bold">[0] ${esc(cli.name)}</span> — vaatimusmäärittely`);
highlightAgent('client');
explainStep('Vaatimusmäärittely', `${cli.name} muotoilee idean selkeiksi vaatimuksiksi: ominaisuudet, datamallit, rajapinnat.`);
@@ -788,37 +798,72 @@ OUTPUT FORMAT:
if (!brief) { termLog(' ✗ Vaatimusmäärittely epäonnistui', '#f85149'); return; }
termLog(` <span style="color:#8b949e">Vaatimukset valmiit → Manageri</span>`);
explainStep('Mallipohja', `Käytetään "${template.name}" -mallipohjaa jossa ${template.order.length} tiedostoa: ${template.order.join(', ')}. Jokainen tiedosto generoidaan järjestyksessä, ja aiemmat tiedostot annetaan kontekstina seuraavalle.`);
// Valitaan mallipohja automaattisesti briefin perusteella
const template = selectTemplate(brief);
// Tiedostolista: mallipohjasta tai managerin dynaamisesta suunnitelmasta
let fileOrder = [];
let fileDefs = {};
if (template) {
// Mallipohja löytyi — käytetään sen rakennetta
fileOrder = template.order;
fileDefs = template.files;
explainStep('Mallipohja', `Tunnistettiin "${template.name}" — ${fileOrder.length} tiedostoa: ${fileOrder.join(', ')}.`);
} else {
// Vapaa tila — Manageri päättää tiedostorakenteen
termLog(`\n<span style="color:#d29922;font-weight:bold">[1] ${esc(mgr.name)}</span> — tiedostorakenne`);
highlightAgent('manager');
explainStep('Vapaa tila', 'Sopivaa mallipohjaa ei löytynyt. Manageri suunnittelee tiedostorakenteen vaatimusten perusteella.');
const planPrompt = `PROJECT REQUIREMENTS:\n${brief}\n\nPlan the file structure for this project. List each file on its own line:\nfilename.ext: one-line description\n\nMaximum 6 files. List dependency files first.`;
const plan = await kpnRun(mgr.model, planPrompt, false, mgr);
if (!plan) { termLog(' ✗ Suunnittelu epäonnistui', '#f85149'); return; }
// Parsitaan managerin tuottama tiedostolista
for (const line of plan.split('\n')) {
const m = line.match(/^\s*[-*]?\s*(\S+\.\w+)\s*[:\-]\s*(.+)/);
if (m) {
const fname = m[1].replace(/^`|`$/g, '');
fileOrder.push(fname);
fileDefs[fname] = { description: m[2].trim(), instructions: m[2].trim() };
}
}
if (fileOrder.length === 0) {
termLog(' ✗ Manageri ei tuottanut tiedostolistaa', '#f85149');
return;
}
explainStep('Suunnitelma', `${fileOrder.length} tiedostoa: ${fileOrder.join(', ')}`);
}
const files = {};
for (let i = 0; i < template.order.length; i++) {
const fileName = template.order[i];
const fileDef = template.files[fileName];
for (let i = 0; i < fileOrder.length; i++) {
const fileName = fileOrder[i];
const fileDef = fileDefs[fileName];
if (!fileDef) continue;
const step = i + 1;
// Valitaan oikea agentti tiedostotyypin mukaan
const isDbFile = fileName === 'models.py' || fileName === 'database.py';
const isDbFile = fileName === 'models.py' || fileName === 'database.py' || fileName === 'etl.py';
const dataAgent = agents.data || Object.values(agents)[3];
const fileAgent = isDbFile && dataAgent ? dataAgent : cdr;
const fileAgentKey = isDbFile && dataAgent ? 'data' : 'coder';
termLog(`\n<span style="color:#3fb950;font-weight:bold">[${step}/${template.order.length}] ${esc(fileAgent.name)}</span> — ${esc(fileName)}`);
termLog(`\n<span style="color:#3fb950;font-weight:bold">[${step}/${fileOrder.length}] ${esc(fileAgent.name)}</span> — ${esc(fileName)}`);
highlightAgent(fileAgentKey);
// Opettava selitys: miksi tämä tiedosto, mitä se sisältää
explainStep(fileName, fileDef.instructions);
explainStep(fileName, fileDef.instructions || fileDef.description);
// Rakennetaan prompti: esimerkki + konteksti + ohje
// Rakennetaan prompti
let prompt = '';
// Agentin system prompt (data-agentti models.py:lle, koodari muille)
if (fileAgent.prompt) prompt += fileAgent.prompt + '\n\n';
// Esimerkki (few-shot)
prompt += `EXAMPLE of ${fileName} (for a different project, adapt to this one):\n`;
prompt += '```\n' + fileDef.example + '\n```\n\n';
// Esimerkki (vain mallipohjatilassa)
if (fileDef.example) {
prompt += `EXAMPLE of ${fileName} (for a different project, adapt to this one):\n`;
prompt += '```\n' + fileDef.example + '\n```\n\n';
}
// Aiemmin generoidut tiedostot (konteksti)
const prevFiles = Object.entries(files);
@@ -834,8 +879,8 @@ OUTPUT FORMAT:
// Tehtävä
prompt += `NOW write "${fileName}" for THIS project: ${task}\n`;
prompt += fileDef.instructions + '\n';
prompt += 'Adapt the example to match the project description. Import from already written files. Write ONLY the code, no explanations.';
if (fileDef.instructions) prompt += fileDef.instructions + '\n';
prompt += 'Adapt to the project requirements. Import from already written files. Write ONLY the code, no explanations.';
const code = await kpnRun(fileAgent.model, prompt, false, fileAgent);
if (!code) {