CodeBench: --compact tiivistää golden examplen templaatiksi
Python: 1340 → 335 tokenia (−75%) Rust: 3383 → 445 tokenia (−87%) Käyttö: node benchmark.mjs --compact --models qwen3:4b
This commit is contained in:
@@ -33,6 +33,7 @@ const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 16);
|
|||||||
const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
|
const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
|
||||||
const RESULTS_DIR = join(__dirname, 'results');
|
const RESULTS_DIR = join(__dirname, 'results');
|
||||||
const THINK_MODE = args.includes('--think');
|
const THINK_MODE = args.includes('--think');
|
||||||
|
const COMPACT_MODE = args.includes('--compact');
|
||||||
const LANG = arg('lang', 'python'); // python | rust
|
const LANG = arg('lang', 'python'); // python | rust
|
||||||
const MAX_FIX_ROUNDS = 2;
|
const MAX_FIX_ROUNDS = 2;
|
||||||
|
|
||||||
@@ -66,6 +67,13 @@ const LANG_CONFIG = {
|
|||||||
const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;
|
const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;
|
||||||
|
|
||||||
function loadGoldenExample() {
|
function loadGoldenExample() {
|
||||||
|
// --compact: käytä tiivistettyä templaattia täyden koodin sijaan
|
||||||
|
if (COMPACT_MODE) {
|
||||||
|
const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md';
|
||||||
|
const compactPath = join(__dirname, 'prompts', compactFile);
|
||||||
|
if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n';
|
||||||
|
}
|
||||||
|
// Täysi golden example
|
||||||
const todoDir = join(GOLDEN_DIR, LCONF.goldenDir);
|
const todoDir = join(GOLDEN_DIR, LCONF.goldenDir);
|
||||||
if (!existsSync(todoDir)) return '';
|
if (!existsSync(todoDir)) return '';
|
||||||
let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`;
|
let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`;
|
||||||
@@ -376,7 +384,7 @@ async function main() {
|
|||||||
console.log('╔══════════════════════════════════════════════╗');
|
console.log('╔══════════════════════════════════════════════╗');
|
||||||
console.log('║ Kipinä CodeBench ║');
|
console.log('║ Kipinä CodeBench ║');
|
||||||
console.log('╚══════════════════════════════════════════════╝');
|
console.log('╚══════════════════════════════════════════════╝');
|
||||||
console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${THINK_MODE ? ' 🧠 thinking ON' : ''}`);
|
console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${COMPACT_MODE ? ' (compact)' : ''}${THINK_MODE ? ' 🧠 thinking ON' : ''}`);
|
||||||
|
|
||||||
// Haetaan mallit
|
// Haetaan mallit
|
||||||
let models;
|
let models;
|
||||||
|
|||||||
36
kipina-codebench/prompts/golden-compact-py.md
Normal file
36
kipina-codebench/prompts/golden-compact-py.md
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
REFERENCE PATTERNS (follow exactly):
|
||||||
|
|
||||||
|
STACK: SQLAlchemy 2.0 + Pydantic v2 + FastAPI + SQLite
|
||||||
|
|
||||||
|
models.py:
|
||||||
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||||||
|
class Base(DeclarativeBase): pass
|
||||||
|
Fields: Mapped[type] = mapped_column(SqlType, default=...)
|
||||||
|
Nullable: Mapped[str | None] = mapped_column(Text, default=None)
|
||||||
|
Status: Mapped[str] = mapped_column(String(20), default="pending")
|
||||||
|
FK: Mapped[int] = mapped_column(ForeignKey("table.id"))
|
||||||
|
End: Base.metadata.create_all(bind=engine)
|
||||||
|
|
||||||
|
schemas.py:
|
||||||
|
class EntityCreate(BaseModel): fields with defaults
|
||||||
|
class EntityResponse(EntityCreate):
|
||||||
|
id: int
|
||||||
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
main.py:
|
||||||
|
def get_db(): yield SessionLocal(); finally close
|
||||||
|
POST /{table}/ → 201, model_dump()
|
||||||
|
GET /{table}/ → list
|
||||||
|
GET /{table}/{id} → 404 if not found
|
||||||
|
PUT /{table}/{id} → model_dump(), setattr loop
|
||||||
|
DELETE /{table}/{id} → 204
|
||||||
|
|
||||||
|
test_main.py:
|
||||||
|
test.db + override_get_db + TestClient
|
||||||
|
Unique Finnish data per test ("Osta maitoa", "Haettava tehtävä"...)
|
||||||
|
test_create → 201 + assert "id" in json
|
||||||
|
test_list → post first, get, assert len >= 1
|
||||||
|
test_get_by_id → post, get by id, assert id matches
|
||||||
|
test_not_found → get /99999 → 404
|
||||||
|
test_update → post, put with ALL required fields, assert 200
|
||||||
|
test_delete → post, delete 204, get again → 404
|
||||||
43
kipina-codebench/prompts/golden-compact-rs.md
Normal file
43
kipina-codebench/prompts/golden-compact-rs.md
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
REFERENCE PATTERNS (follow exactly):
|
||||||
|
|
||||||
|
STACK: Axum 0.8 + SQLx + SQLite + Tokio + Serde
|
||||||
|
|
||||||
|
Cargo.toml:
|
||||||
|
edition = "2024"
|
||||||
|
deps: axum 0.8, tokio (full), serde (derive), serde_json, sqlx (sqlite, runtime-tokio), tower-http (cors)
|
||||||
|
dev: reqwest (rustls-tls)
|
||||||
|
|
||||||
|
src/models.rs:
|
||||||
|
#[derive(Debug, Serialize, Deserialize, FromRow)]
|
||||||
|
struct Entity { id: i64, field: String, optional: Option<String> }
|
||||||
|
struct CreateEntity { field: String, optional: Option<String> }
|
||||||
|
Status fields: String with default "pending"
|
||||||
|
|
||||||
|
src/handlers.rs:
|
||||||
|
async fn create(State(pool), Json(input)) -> (StatusCode, Json<Entity>)
|
||||||
|
POST → StatusCode::CREATED, sqlx::query("INSERT...").execute + query_as last_insert_rowid
|
||||||
|
GET list → query_as("SELECT * FROM table").fetch_all
|
||||||
|
GET by id → query_as.fetch_optional, return 404 if None
|
||||||
|
PUT → query("UPDATE...SET...WHERE id=?"), rows_affected == 0 → 404
|
||||||
|
DELETE → StatusCode::NO_CONTENT, rows_affected == 0 → 404
|
||||||
|
|
||||||
|
src/lib.rs:
|
||||||
|
pub fn app(pool: SqlitePool) -> Router
|
||||||
|
pub async fn init_db(pool: &SqlitePool) → CREATE TABLE IF NOT EXISTS
|
||||||
|
Routes: .route("/{table}", post(create).get(list))
|
||||||
|
.route("/{table}/:id", get(get_one).put(update).delete(delete_one))
|
||||||
|
|
||||||
|
src/main.rs:
|
||||||
|
SqlitePool::connect("sqlite:./app.db"), init_db, bind 0.0.0.0:3000
|
||||||
|
|
||||||
|
tests/api_test.rs:
|
||||||
|
Each test: SqlitePool::connect("sqlite::memory:"), init_db, app(pool)
|
||||||
|
Spawn on random port: TcpListener::bind("127.0.0.1:0"), axum::serve
|
||||||
|
reqwest::Client for HTTP calls
|
||||||
|
Unique Finnish data ("Osta maitoa", "Haettava tehtävä"...)
|
||||||
|
test_create → 201 + assert id exists
|
||||||
|
test_list → post first, get, assert len >= 1
|
||||||
|
test_get_by_id → post, get, assert id matches
|
||||||
|
test_not_found → 404
|
||||||
|
test_update → post, put with ALL fields, assert 200
|
||||||
|
test_delete → post, delete 204, get → 404
|
||||||
Reference in New Issue
Block a user