From 01b4fb8e221dca1c86540e4ab2cf01a96962eaa6 Mon Sep 17 00:00:00 2001 From: jaakko Date: Tue, 14 Apr 2026 10:59:39 +0300 Subject: [PATCH] =?UTF-8?q?CodeBench:=20--compact=20tiivist=C3=A4=C3=A4=20?= =?UTF-8?q?golden=20examplen=20templaatiksi?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python: 1340 → 335 tokenia (−75%) Rust: 3383 → 445 tokenia (−87%) Käyttö: node benchmark.mjs --compact --models qwen3:4b --- kipina-codebench/benchmark.mjs | 10 ++++- kipina-codebench/prompts/golden-compact-py.md | 36 ++++++++++++++++ kipina-codebench/prompts/golden-compact-rs.md | 43 +++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 kipina-codebench/prompts/golden-compact-py.md create mode 100644 kipina-codebench/prompts/golden-compact-rs.md diff --git a/kipina-codebench/benchmark.mjs b/kipina-codebench/benchmark.mjs index a8a3e3f..000ecad 100644 --- a/kipina-codebench/benchmark.mjs +++ b/kipina-codebench/benchmark.mjs @@ -33,6 +33,7 @@ const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 16); const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`); const RESULTS_DIR = join(__dirname, 'results'); const THINK_MODE = args.includes('--think'); +const COMPACT_MODE = args.includes('--compact'); const LANG = arg('lang', 'python'); // python | rust const MAX_FIX_ROUNDS = 2; @@ -66,6 +67,13 @@ const LANG_CONFIG = { const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python; function loadGoldenExample() { + // --compact: käytä tiivistettyä templaattia täyden koodin sijaan + if (COMPACT_MODE) { + const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md'; + const compactPath = join(__dirname, 'prompts', compactFile); + if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n'; + } + // Täysi golden example const todoDir = join(GOLDEN_DIR, LCONF.goldenDir); if (!existsSync(todoDir)) return ''; let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`; @@ -376,7 +384,7 @@ async function main() { console.log('╔══════════════════════════════════════════════╗'); console.log('║ Kipinä CodeBench ║'); console.log('╚══════════════════════════════════════════════╝'); - console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${THINK_MODE ? ' 🧠 thinking ON' : ''}`); + console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${COMPACT_MODE ? ' (compact)' : ''}${THINK_MODE ? ' 🧠 thinking ON' : ''}`); // Haetaan mallit let models; diff --git a/kipina-codebench/prompts/golden-compact-py.md b/kipina-codebench/prompts/golden-compact-py.md new file mode 100644 index 0000000..c6272b8 --- /dev/null +++ b/kipina-codebench/prompts/golden-compact-py.md @@ -0,0 +1,36 @@ +REFERENCE PATTERNS (follow exactly): + +STACK: SQLAlchemy 2.0 + Pydantic v2 + FastAPI + SQLite + +models.py: + from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + class Base(DeclarativeBase): pass + Fields: Mapped[type] = mapped_column(SqlType, default=...) + Nullable: Mapped[str | None] = mapped_column(Text, default=None) + Status: Mapped[str] = mapped_column(String(20), default="pending") + FK: Mapped[int] = mapped_column(ForeignKey("table.id")) + End: Base.metadata.create_all(bind=engine) + +schemas.py: + class EntityCreate(BaseModel): fields with defaults + class EntityResponse(EntityCreate): + id: int + model_config = ConfigDict(from_attributes=True) + +main.py: + def get_db(): yield SessionLocal(); finally close + POST /{table}/ → 201, model_dump() + GET /{table}/ → list + GET /{table}/{id} → 404 if not found + PUT /{table}/{id} → model_dump(), setattr loop + DELETE /{table}/{id} → 204 + +test_main.py: + test.db + override_get_db + TestClient + Unique Finnish data per test ("Osta maitoa", "Haettava tehtävä"...) + test_create → 201 + assert "id" in json + test_list → post first, get, assert len >= 1 + test_get_by_id → post, get by id, assert id matches + test_not_found → get /99999 → 404 + test_update → post, put with ALL required fields, assert 200 + test_delete → post, delete 204, get again → 404 diff --git a/kipina-codebench/prompts/golden-compact-rs.md b/kipina-codebench/prompts/golden-compact-rs.md new file mode 100644 index 0000000..5ce8fd5 --- /dev/null +++ b/kipina-codebench/prompts/golden-compact-rs.md @@ -0,0 +1,43 @@ +REFERENCE PATTERNS (follow exactly): + +STACK: Axum 0.8 + SQLx + SQLite + Tokio + Serde + +Cargo.toml: + edition = "2024" + deps: axum 0.8, tokio (full), serde (derive), serde_json, sqlx (sqlite, runtime-tokio), tower-http (cors) + dev: reqwest (rustls-tls) + +src/models.rs: + #[derive(Debug, Serialize, Deserialize, FromRow)] + struct Entity { id: i64, field: String, optional: Option } + struct CreateEntity { field: String, optional: Option } + Status fields: String with default "pending" + +src/handlers.rs: + async fn create(State(pool), Json(input)) -> (StatusCode, Json) + POST → StatusCode::CREATED, sqlx::query("INSERT...").execute + query_as last_insert_rowid + GET list → query_as("SELECT * FROM table").fetch_all + GET by id → query_as.fetch_optional, return 404 if None + PUT → query("UPDATE...SET...WHERE id=?"), rows_affected == 0 → 404 + DELETE → StatusCode::NO_CONTENT, rows_affected == 0 → 404 + +src/lib.rs: + pub fn app(pool: SqlitePool) -> Router + pub async fn init_db(pool: &SqlitePool) → CREATE TABLE IF NOT EXISTS + Routes: .route("/{table}", post(create).get(list)) + .route("/{table}/:id", get(get_one).put(update).delete(delete_one)) + +src/main.rs: + SqlitePool::connect("sqlite:./app.db"), init_db, bind 0.0.0.0:3000 + +tests/api_test.rs: + Each test: SqlitePool::connect("sqlite::memory:"), init_db, app(pool) + Spawn on random port: TcpListener::bind("127.0.0.1:0"), axum::serve + reqwest::Client for HTTP calls + Unique Finnish data ("Osta maitoa", "Haettava tehtävä"...) + test_create → 201 + assert id exists + test_list → post first, get, assert len >= 1 + test_get_by_id → post, get, assert id matches + test_not_found → 404 + test_update → post, put with ALL fields, assert 200 + test_delete → post, delete 204, get → 404