candle_transformers/models/llava/
utils.rs1pub fn get_anyres_image_grid_shape(
2 image_size: (u32, u32),
3 grid_pinpoints: &[(u32, u32)],
4 patch_size: u32,
5) -> (u32, u32) {
6 let (width, height) = select_best_resolution(image_size, grid_pinpoints);
7 (width / patch_size, height / patch_size)
8}
9
10pub fn select_best_resolution(
11 original_size: (u32, u32),
12 possible_resolutions: &[(u32, u32)],
13) -> (u32, u32) {
14 let (original_width, original_height) = original_size;
15 let mut best_fit = (0, 0);
16 let original_width_f = original_width as f32;
17 let original_height_f = original_height as f32;
18 let mut max_effective_resolution = 0_u32;
19 let mut min_wasted_resolution = u32::MAX;
20 for (width, height) in possible_resolutions {
21 let width_f = *width as f32;
22 let height_f = *height as f32;
23 let scale = (width_f / original_width_f).min(height_f / original_height_f);
24 let (downscaled_width, downscaled_height) = (
25 (original_width_f * scale) as u32,
26 (original_height_f * scale) as u32,
27 );
28 let effective_resolution =
29 std::cmp::min((*width) * (*height), downscaled_width * downscaled_height);
30 let wasted_resolution = (*width) * (*height) - effective_resolution;
31 if effective_resolution > max_effective_resolution
32 || (effective_resolution == max_effective_resolution
33 && wasted_resolution < min_wasted_resolution)
34 {
35 best_fit = (*width, *height);
36 max_effective_resolution = effective_resolution;
37 min_wasted_resolution = wasted_resolution;
38 }
39 }
40 best_fit
41}