candle_transformers/models/whisper/
mod.rs1pub mod audio;
14pub mod model;
15pub mod quantized_model;
16
17use serde::Deserialize;
18
19#[derive(Debug, Clone, PartialEq, Deserialize)]
22pub struct Config {
23 pub num_mel_bins: usize, pub max_source_positions: usize, pub d_model: usize, pub encoder_attention_heads: usize, pub encoder_layers: usize, pub vocab_size: usize, pub max_target_positions: usize, pub decoder_attention_heads: usize, pub decoder_layers: usize, #[serde(default)]
34 pub suppress_tokens: Vec<u32>,
35}
36
37pub const DTYPE: candle::DType = candle::DType::F32;
38
39pub const SAMPLE_RATE: usize = 16000;
41pub const N_FFT: usize = 400;
42pub const HOP_LENGTH: usize = 160;
43pub const CHUNK_LENGTH: usize = 30;
44pub const N_SAMPLES: usize = CHUNK_LENGTH * SAMPLE_RATE; pub const N_FRAMES: usize = N_SAMPLES / HOP_LENGTH; pub const NO_SPEECH_THRESHOLD: f64 = 0.6;
48pub const LOGPROB_THRESHOLD: f64 = -1.0;
49pub const TEMPERATURES: [f64; 6] = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0];
50pub const COMPRESSION_RATIO_THRESHOLD: f64 = 2.4;
51
52pub const SOT_TOKEN: &str = "<|startoftranscript|>";
54pub const TRANSCRIBE_TOKEN: &str = "<|transcribe|>";
55pub const TRANSLATE_TOKEN: &str = "<|translate|>";
56pub const NO_TIMESTAMPS_TOKEN: &str = "<|notimestamps|>";
57pub const EOT_TOKEN: &str = "<|endoftext|>";
58pub const NO_SPEECH_TOKENS: [&str; 2] = ["<|nocaptions|>", "<|nospeech|>"];