puristettu qwen output tiukempaan muottiin

This commit is contained in:
Jaakko Vanhala
2026-04-04 21:33:54 +03:00
parent 133ff38fa4
commit 4e49cfbbfa
2 changed files with 28 additions and 4 deletions

View File

@@ -139,7 +139,7 @@ impl LlmEngine {
}
pub fn generate(&mut self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
let formatted = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
let formatted = format!("<|im_start|>system\nYou are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
let encoding = self.tokenizer.encode(formatted.as_str(), true)
.map_err(|e| format!("Encode: {}", e))?;
@@ -218,6 +218,17 @@ impl LlmEngine {
if let Ok(text) = self.tokenizer.decode(&[next_token], true) {
generated_text.push_str(&text);
// Stop-sekvenssit: katkaistaan kun malli alkaa selittää
let lower = generated_text.to_lowercase();
if lower.contains("\n###") || lower.contains("\nexplanation") || lower.contains("\nnote:") || lower.contains("\noutput:") || lower.contains("\n```\n\n") {
for stop in &["\n###", "\nExplanation", "\nNote:", "\nOutput:", "\n```\n\n"] {
if let Some(pos) = generated_text.find(stop) {
generated_text.truncate(pos);
}
}
break;
}
}
all_tokens.push(next_token);
tokens_generated += 1;

View File

@@ -197,14 +197,14 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&prompt) {
let p = json.get("prompt").and_then(|v| v.as_str()).unwrap_or(&prompt).to_string();
let s = json.get("system").and_then(|v| v.as_str())
.unwrap_or("You are a Python coding assistant. Write only code, no explanations.").to_string();
.unwrap_or("You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.").to_string();
let m = json.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(128) as usize;
(p, s, m)
} else {
(prompt.clone(), "You are a Python coding assistant. Write only code, no explanations.".to_string(), 128)
(prompt.clone(), "You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.".to_string(), 128)
}
} else {
(prompt.clone(), "You are a Python coding assistant. Write only code, no explanations.".to_string(), 128)
(prompt.clone(), "You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.".to_string(), 128)
};
let formatted = format!("<|im_start|>system\n{}<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", system_msg, actual_prompt);
@@ -286,6 +286,19 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
if let Ok(text) = tokenizer.decode(&[next_token], true) {
generated_text.push_str(&text);
// Stop-sekvenssit: katkaistaan kun malli alkaa selittää
let lower = generated_text.to_lowercase();
if lower.contains("\n###") || lower.contains("\nexplanation") || lower.contains("\nnote:") || lower.contains("\noutput:") || lower.contains("\n```\n\n") {
// Trimmataan selitysosuus pois
for stop in &["\n###", "\nExplanation", "\nNote:", "\nOutput:", "\n```\n\n"] {
if let Some(pos) = generated_text.find(stop) {
generated_text.truncate(pos);
}
}
break;
}
let mut chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "Qwen2.5-Coder" });
if let Some(ref tid) = task_id { chunk.as_object_mut().unwrap().insert("task_id".to_string(), serde_json::json!(tid)); }
let _ = ws.borrow().send_with_str(&chunk.to_string());