Native node CPU-moodi: Candle 0.8 RMS-norm ei tue CUDA:a
candle-core 0.8 ei sisällä rms-norm CUDA-kerneliä → inferenssi epäonnistui. Vaihdettu CPU:ksi joka on silti ~10-20× nopeampi kuin selaimen WASM. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -69,11 +69,13 @@ pub struct LlmEngine {
|
|||||||
|
|
||||||
impl LlmEngine {
|
impl LlmEngine {
|
||||||
pub fn load() -> Result<Self, String> {
|
pub fn load() -> Result<Self, String> {
|
||||||
let device = Device::cuda_if_available(0).map_err(|e| format!("Device: {}", e))?;
|
// Candle 0.8: RMS-norm ei tue CUDA:a → käytetään CPU:ta
|
||||||
let device_name = if device.is_cuda() { "CUDA" } else { "CPU" };
|
// Natiivi CPU on silti ~10-20× nopeampi kuin WASM (multi-threaded, ei browser overhead)
|
||||||
|
let device = Device::Cpu;
|
||||||
|
let device_name = "CPU (native)";
|
||||||
tracing::info!("LLM device: {}", device_name);
|
tracing::info!("LLM device: {}", device_name);
|
||||||
|
|
||||||
let dtype = if device.is_cuda() { DType::F16 } else { DType::F32 };
|
let dtype = DType::F32;
|
||||||
|
|
||||||
tracing::info!("Ladataan Qwen2.5-Coder-0.5B-Instruct...");
|
tracing::info!("Ladataan Qwen2.5-Coder-0.5B-Instruct...");
|
||||||
let api = Api::new().map_err(|e| format!("HF API: {}", e))?;
|
let api = Api::new().map_err(|e| format!("HF API: {}", e))?;
|
||||||
|
|||||||
Reference in New Issue
Block a user