model : Nomic Embed Text V2 with Mixture-of-Experts (MoE) architecture (#12466)
* Nomic Embed Text V2 with Mixture-of-Experts (MoE) architecture - Adds MoE-based embedding model supporting multilingual embeddings. - Selects architecture variant based on hyperparameter detection (MoE layers). - Removes unnecessary subclass initialization checks for clarity. https://www.nomic.ai/blog/posts/nomic-embed-text-v2 Co-authored-by: Jared Van Bortel <jared@nomic.ai> * fix tokenizer * don't rename this tensor --------- Co-authored-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
eaea325324
commit
5f5e39e1ba
9 changed files with 247 additions and 110 deletions
|
@ -290,6 +290,7 @@ class TensorNameMap:
|
|||
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
||||
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
||||
"language_model.model.layers.{bid}.feed_forward.router", # llama4
|
||||
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
||||
|
@ -322,6 +323,7 @@ class TensorNameMap:
|
|||
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||
"encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe
|
||||
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
||||
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
||||
"model.layers.{bid}.residual_mlp.w3", # arctic
|
||||
|
@ -337,6 +339,7 @@ class TensorNameMap:
|
|||
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
||||
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
|
||||
"language_model.model.layers.{bid}.feed_forward.experts.up_proj", # llama4
|
||||
"encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_UP_SHEXP: (
|
||||
|
@ -418,6 +421,7 @@ class TensorNameMap:
|
|||
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
||||
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
|
||||
"language_model.model.layers.{bid}.feed_forward.experts.down_proj", # llama4
|
||||
"encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue