mtmd : support SmolVLM (version 1 and 2) (#13050)
* mtmd : support SmolVLM (version 1 and 2) * correct chat template * fix n_patches * scale_factor is an int * add more models to test
This commit is contained in:
parent
ab47dec3d3
commit
dc39a5e7a8
10 changed files with 279 additions and 65 deletions
|
@ -231,11 +231,15 @@ class Keys:
|
|||
IMAGE_MEAN = "clip.vision.image_mean"
|
||||
IMAGE_STD = "clip.vision.image_std"
|
||||
USE_GELU = "clip.use_gelu"
|
||||
USE_SILU = "clip.use_silu"
|
||||
|
||||
class Attention:
|
||||
HEAD_COUNT = "clip.vision.attention.head_count"
|
||||
LAYERNORM_EPS = "clip.vision.attention.layer_norm_epsilon"
|
||||
|
||||
class Projector:
|
||||
SCALE_FACTOR = "clip.vision.projector.scale_factor"
|
||||
|
||||
#
|
||||
# recommended mapping of model tensor names for storage in gguf
|
||||
#
|
||||
|
@ -2122,6 +2126,11 @@ class GGUFValueType(IntEnum):
|
|||
raise ValueError(f"Unknown type: {type(val)}")
|
||||
|
||||
|
||||
class VisionProjectorType:
|
||||
GEMMA3 = "gemma3"
|
||||
IDEFICS3 = "idefics3"
|
||||
|
||||
|
||||
# Items here are (block size, type size)
|
||||
QK_K = 256
|
||||
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue