candle_transformers/models/mimi/mod.rs
1//! mimi model
2//!
3//! [Mimi](https://huggingface.co/kyutai/mimi) is a state of the art audio
4//! compression model using an encoder/decoder architecture with residual vector
5//! quantization. The candle implementation supports streaming meaning that it's
6//! possible to encode or decode a stream of audio tokens on the flight to provide
7//! low latency interaction with an audio model.
8//!
9//! - 🤗 [HuggingFace Model Card](https://huggingface.co/kyutai/mimi)
10//! - 💻 [GitHub](https://github.com/kyutai-labs/moshi)
11//!
12//!
13//! # Example
14//! ```bash
15//! # Generating some audio tokens from an audio files.
16//! wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
17//! cargo run --example mimi \
18//! --features mimi --release -- \
19//! audio-to-code bria.mp3 bria.safetensors
20//!
21//! # And decoding the audio tokens back into a sound file.
22//! cargo run --example mimi
23//! --features mimi --release -- \
24//! code-to-audio bria.safetensors bria.wav
25//!
26
27// Copyright (c) Kyutai, all rights reserved.
28// This source code is licensed under the license found in the
29// LICENSE file in the root directory of this source tree.
30pub use candle;
31pub use candle_nn;
32
33pub mod conv;
34pub mod encodec;
35pub mod quantization;
36pub mod seanet;
37pub mod transformer;
38
39#[derive(Debug, Copy, Clone, PartialEq, Eq)]
40pub enum NormType {
41 RmsNorm,
42 LayerNorm,
43}
44
45pub use encodec::{load, Config, Encodec as Model};