UI:n system prompt ja sampling-parametrit välittyvät inferenssiin asti
Frontend lähettää agentin asetukset (system_prompt, temperature, top_k, max_tokens, repeat_penalty, stop) API:lle. Hub välittää ne solmulle. Native-node ja Wasm-coder käyttävät välitettyjä arvoja hardkoodattujen sijaan.
This commit is contained in:
@@ -1,6 +1,15 @@
|
||||
use std::time::Instant;
|
||||
use std::cell::RefCell;
|
||||
|
||||
pub struct GenerateOptions {
|
||||
pub max_tokens: usize,
|
||||
pub system_prompt: Option<String>,
|
||||
pub temperature: Option<f64>,
|
||||
pub top_k: Option<u64>,
|
||||
pub repeat_penalty: Option<f64>,
|
||||
pub stop: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
pub struct LlmEngine {
|
||||
ollama_url: String,
|
||||
model: RefCell<String>,
|
||||
@@ -96,25 +105,34 @@ impl LlmEngine {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
|
||||
// System prompt tulee agentin konfiguraatiosta (frontend lähettää sen osana promptia).
|
||||
// Tässä ei yliajeta sitä — Ollama saa vain prompt-kentän.
|
||||
pub async fn generate(&self, prompt: &str, opts: &GenerateOptions) -> Result<GenerateResult, String> {
|
||||
let model = self.model.borrow().clone();
|
||||
|
||||
let default_stop: Vec<String> = vec![
|
||||
"<|im_end|>".into(), "\n###".into(), "\nExplanation".into(),
|
||||
"\nNote:".into(), "\nPlease note".into(), "\nThis is".into(),
|
||||
"\n```\n\n".into(), "\n// Example".into(), "\n# Example".into(),
|
||||
];
|
||||
|
||||
let mut body = serde_json::json!({
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": false,
|
||||
"options": {
|
||||
"num_predict": opts.max_tokens,
|
||||
"temperature": opts.temperature.unwrap_or(0.7),
|
||||
"top_k": opts.top_k.unwrap_or(40),
|
||||
"repeat_penalty": opts.repeat_penalty.unwrap_or(1.15),
|
||||
"stop": opts.stop.as_ref().unwrap_or(&default_stop),
|
||||
}
|
||||
});
|
||||
if let Some(ref sp) = opts.system_prompt {
|
||||
body.as_object_mut().unwrap().insert("system".to_string(), serde_json::json!(sp));
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
let resp = self.client.post(format!("{}/api/generate", self.ollama_url))
|
||||
.json(&serde_json::json!({
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": false,
|
||||
"options": {
|
||||
"num_predict": max_tokens,
|
||||
"temperature": 0.7,
|
||||
"top_k": 40,
|
||||
"repeat_penalty": 1.15,
|
||||
"stop": ["<|im_end|>", "\n###", "\nExplanation", "\nNote:", "\nPlease note", "\nThis is", "\n```\n\n", "\n// Example", "\n# Example"]
|
||||
}
|
||||
}))
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Ollama generate: {}", e))?;
|
||||
|
||||
@@ -472,7 +472,14 @@ async fn main() {
|
||||
|
||||
if !prompt.is_empty() && (msg_model.starts_with("qwen-coder") || msg_model.starts_with("qwen2.5-coder") || msg_model.starts_with("phi")) {
|
||||
if let Some(ref engine) = llm {
|
||||
let max_tokens = task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(1024) as usize;
|
||||
let gen_opts = inference::GenerateOptions {
|
||||
max_tokens: task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(1024) as usize,
|
||||
system_prompt: task.get("system_prompt").and_then(|v| v.as_str()).map(|s| s.to_string()),
|
||||
temperature: task.get("temperature").and_then(|v| v.as_f64()),
|
||||
top_k: task.get("top_k").and_then(|v| v.as_u64()),
|
||||
repeat_penalty: task.get("repeat_penalty").and_then(|v| v.as_f64()),
|
||||
stop: task.get("stop").and_then(|v| v.as_array()).map(|a| a.iter().filter_map(|s| s.as_str().map(|s| s.to_string())).collect()),
|
||||
};
|
||||
let prompt_lines = prompt.lines().count();
|
||||
let prompt_last: String = prompt.lines().last().unwrap_or("").chars().take(60).collect();
|
||||
tracing::info!("→ task_id:{} | {}r prompti | \"{}...\"", task_id, prompt_lines, prompt_last);
|
||||
@@ -480,11 +487,10 @@ async fn main() {
|
||||
let mut st = tui_state.write().await;
|
||||
st.cur_task_id = Some(task_id.to_string());
|
||||
st.cur_prompt = Some(format!("→ {} riviä | \"{}...\"", prompt_lines, prompt_last));
|
||||
// Ei login puskemista vielä tässä! Yhdistetään se valmiin lohkoon yhdelle riville.
|
||||
}
|
||||
|
||||
let model_name = engine.model_name();
|
||||
match engine.generate(prompt, max_tokens).await {
|
||||
match engine.generate(prompt, &gen_opts).await {
|
||||
Ok(result) => {
|
||||
let tokens_sec = (result.tokens_per_sec * 10.0).round() / 10.0;
|
||||
tracing::info!(
|
||||
|
||||
Reference in New Issue
Block a user