convert : fix nomic-bert-moe mask token (#13757)

This commit is contained in:
Sigbjørn Skjæret 2025-06-01 18:07:21 +02:00 committed by GitHub
parent c496fe0b1d
commit 5e1c3aed40
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 13 additions and 2 deletions

View file

@ -3889,6 +3889,12 @@ class BertModel(TextModel):
SentencePieceTokenTypes.UNKNOWN,
] + toktypes[3:-1]
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
# Add mask token missing from sentencepiece.bpe.model
tokens[250001] = b'<mask>'
scores[250001] = 0.0
toktypes[250001] = SentencePieceTokenTypes.CONTROL
self.gguf_writer.add_tokenizer_model("t5")
self.gguf_writer.add_tokenizer_pre("default")
self.gguf_writer.add_token_list(tokens)