mtmd : support InternVL 2.5 and 3 (#13422)
* convert : internvl support * InternVL3-1B working * fix regression * rm mobilevlm from test * fix conversion * add test for internvl * add to list of pre-quant * restore boi/eoi check * add clarify comment for norm eps
This commit is contained in:
parent
d8919424f1
commit
053367d149
9 changed files with 243 additions and 25 deletions
|
@ -905,6 +905,7 @@ class TensorNameMap:
|
|||
|
||||
MODEL_TENSOR.V_MMPROJ_MLP: (
|
||||
"model.mm_projector.mlp.mlp.{bid}",
|
||||
"mlp1.{bid}", # InternVL
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MMPROJ_PEG: (
|
||||
|
@ -955,6 +956,7 @@ class TensorNameMap:
|
|||
|
||||
MODEL_TENSOR.V_ENC_INPUT_NORM: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm1",
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.norm1", # InternVL
|
||||
"vpm.encoder.layers.{bid}.layer_norm1",
|
||||
"model.vision_model.encoder.layers.{bid}.layer_norm1", # SmolVLM
|
||||
"vision_tower.transformer.layers.{bid}.attention_norm", # pixtral
|
||||
|
@ -963,6 +965,7 @@ class TensorNameMap:
|
|||
|
||||
MODEL_TENSOR.V_ENC_OUTPUT: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj",
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.attn.proj", # InternVL
|
||||
"vpm.encoder.layers.{bid}.self_attn.out_proj",
|
||||
"model.vision_model.encoder.layers.{bid}.self_attn.out_proj", # SmolVLM
|
||||
"vision_tower.transformer.layers.{bid}.attention.o_proj", # pixtral
|
||||
|
@ -971,6 +974,7 @@ class TensorNameMap:
|
|||
|
||||
MODEL_TENSOR.V_ENC_OUTPUT_NORM: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm2",
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.norm2", # InternVL
|
||||
"vpm.encoder.layers.{bid}.layer_norm2",
|
||||
"model.vision_model.encoder.layers.{bid}.layer_norm2", # SmolVLM
|
||||
"vision_tower.transformer.layers.{bid}.ffn_norm", # pixtral
|
||||
|
@ -1000,6 +1004,14 @@ class TensorNameMap:
|
|||
"visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_LAYER_SCALE_1: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.ls1", # InternVL
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_LAYER_SCALE_2: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.ls2", # InternVL
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_PRE_NORM: (
|
||||
"vision_tower.vision_model.pre_layrnorm",
|
||||
"vision_tower.ln_pre", # pixtral
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue