puristettu qwen output tiukempaan muottiin
This commit is contained in:
@@ -139,7 +139,7 @@ impl LlmEngine {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn generate(&mut self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
|
pub fn generate(&mut self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
|
||||||
let formatted = format!("<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
|
let formatted = format!("<|im_start|>system\nYou are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt);
|
||||||
|
|
||||||
let encoding = self.tokenizer.encode(formatted.as_str(), true)
|
let encoding = self.tokenizer.encode(formatted.as_str(), true)
|
||||||
.map_err(|e| format!("Encode: {}", e))?;
|
.map_err(|e| format!("Encode: {}", e))?;
|
||||||
@@ -218,6 +218,17 @@ impl LlmEngine {
|
|||||||
|
|
||||||
if let Ok(text) = self.tokenizer.decode(&[next_token], true) {
|
if let Ok(text) = self.tokenizer.decode(&[next_token], true) {
|
||||||
generated_text.push_str(&text);
|
generated_text.push_str(&text);
|
||||||
|
|
||||||
|
// Stop-sekvenssit: katkaistaan kun malli alkaa selittää
|
||||||
|
let lower = generated_text.to_lowercase();
|
||||||
|
if lower.contains("\n###") || lower.contains("\nexplanation") || lower.contains("\nnote:") || lower.contains("\noutput:") || lower.contains("\n```\n\n") {
|
||||||
|
for stop in &["\n###", "\nExplanation", "\nNote:", "\nOutput:", "\n```\n\n"] {
|
||||||
|
if let Some(pos) = generated_text.find(stop) {
|
||||||
|
generated_text.truncate(pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
all_tokens.push(next_token);
|
all_tokens.push(next_token);
|
||||||
tokens_generated += 1;
|
tokens_generated += 1;
|
||||||
|
|||||||
@@ -197,14 +197,14 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
|
|||||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&prompt) {
|
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&prompt) {
|
||||||
let p = json.get("prompt").and_then(|v| v.as_str()).unwrap_or(&prompt).to_string();
|
let p = json.get("prompt").and_then(|v| v.as_str()).unwrap_or(&prompt).to_string();
|
||||||
let s = json.get("system").and_then(|v| v.as_str())
|
let s = json.get("system").and_then(|v| v.as_str())
|
||||||
.unwrap_or("You are a Python coding assistant. Write only code, no explanations.").to_string();
|
.unwrap_or("You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.").to_string();
|
||||||
let m = json.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(128) as usize;
|
let m = json.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(128) as usize;
|
||||||
(p, s, m)
|
(p, s, m)
|
||||||
} else {
|
} else {
|
||||||
(prompt.clone(), "You are a Python coding assistant. Write only code, no explanations.".to_string(), 128)
|
(prompt.clone(), "You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.".to_string(), 128)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
(prompt.clone(), "You are a Python coding assistant. Write only code, no explanations.".to_string(), 128)
|
(prompt.clone(), "You are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.".to_string(), 128)
|
||||||
};
|
};
|
||||||
|
|
||||||
let formatted = format!("<|im_start|>system\n{}<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", system_msg, actual_prompt);
|
let formatted = format!("<|im_start|>system\n{}<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", system_msg, actual_prompt);
|
||||||
@@ -286,6 +286,19 @@ pub async fn run_coder_inference(prompt: String, ws: Rc<RefCell<WebSocket>>, use
|
|||||||
|
|
||||||
if let Ok(text) = tokenizer.decode(&[next_token], true) {
|
if let Ok(text) = tokenizer.decode(&[next_token], true) {
|
||||||
generated_text.push_str(&text);
|
generated_text.push_str(&text);
|
||||||
|
|
||||||
|
// Stop-sekvenssit: katkaistaan kun malli alkaa selittää
|
||||||
|
let lower = generated_text.to_lowercase();
|
||||||
|
if lower.contains("\n###") || lower.contains("\nexplanation") || lower.contains("\nnote:") || lower.contains("\noutput:") || lower.contains("\n```\n\n") {
|
||||||
|
// Trimmataan selitysosuus pois
|
||||||
|
for stop in &["\n###", "\nExplanation", "\nNote:", "\nOutput:", "\n```\n\n"] {
|
||||||
|
if let Some(pos) = generated_text.find(stop) {
|
||||||
|
generated_text.truncate(pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
let mut chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "Qwen2.5-Coder" });
|
let mut chunk = serde_json::json!({ "type": "llm_chunk", "token": text, "prompt": prompt, "model": "Qwen2.5-Coder" });
|
||||||
if let Some(ref tid) = task_id { chunk.as_object_mut().unwrap().insert("task_id".to_string(), serde_json::json!(tid)); }
|
if let Some(ref tid) = task_id { chunk.as_object_mut().unwrap().insert("task_id".to_string(), serde_json::json!(tid)); }
|
||||||
let _ = ws.borrow().send_with_str(&chunk.to_string());
|
let _ = ws.borrow().send_with_str(&chunk.to_string());
|
||||||
|
|||||||
Reference in New Issue
Block a user