mtmd : support SmolVLM (version 1 and 2) (#13050)

* mtmd : support SmolVLM (version 1 and 2)

* correct chat template

* fix n_patches

* scale_factor is an int

* add more models to test
This commit is contained in:
Xuan-Son Nguyen 2025-04-22 16:24:54 +02:00 committed by GitHub
parent ab47dec3d3
commit dc39a5e7a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 279 additions and 65 deletions

View file

@ -231,11 +231,15 @@ class Keys:
IMAGE_MEAN = "clip.vision.image_mean"
IMAGE_STD = "clip.vision.image_std"
USE_GELU = "clip.use_gelu"
USE_SILU = "clip.use_silu"
class Attention:
HEAD_COUNT = "clip.vision.attention.head_count"
LAYERNORM_EPS = "clip.vision.attention.layer_norm_epsilon"
class Projector:
SCALE_FACTOR = "clip.vision.projector.scale_factor"
#
# recommended mapping of model tensor names for storage in gguf
#
@ -2122,6 +2126,11 @@ class GGUFValueType(IntEnum):
raise ValueError(f"Unknown type: {type(val)}")
class VisionProjectorType:
GEMMA3 = "gemma3"
IDEFICS3 = "idefics3"
# Items here are (block size, type size)
QK_K = 256
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {