hetki ennen webgpu inferenssiä
This commit is contained in:
@@ -154,7 +154,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
} else {
|
||||
logits // jo [vocab_size]
|
||||
};
|
||||
let mut next_token = logits.argmax(0).unwrap().to_vec0::<u32>().unwrap();
|
||||
let mut next_token = crate::sampling::sample_top_k(&logits, 10, 5.0);
|
||||
console_log!("[Qwen] Ensimmäinen token: {}", next_token);
|
||||
|
||||
let eos_token = 151645u32; // <|endoftext|> for Qwen2.5
|
||||
@@ -188,7 +188,7 @@ pub async fn run_qwen_inference(prompt: String, ws: Rc<RefCell<WebSocket>>) {
|
||||
} else {
|
||||
logits
|
||||
};
|
||||
next_token = logits.argmax(0).unwrap().to_vec0::<u32>().unwrap();
|
||||
next_token = crate::sampling::sample_top_k(&logits, 10, 5.0);
|
||||
pos += 1;
|
||||
|
||||
if next_token == eos_token { break; }
|
||||
|
||||
Reference in New Issue
Block a user