convert : fix nomic-bert-moe mask token (#13757)
This commit is contained in:
parent
c496fe0b1d
commit
5e1c3aed40
2 changed files with 13 additions and 2 deletions
|
@ -3889,6 +3889,12 @@ class BertModel(TextModel):
|
|||
SentencePieceTokenTypes.UNKNOWN,
|
||||
] + toktypes[3:-1]
|
||||
|
||||
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
|
||||
# Add mask token missing from sentencepiece.bpe.model
|
||||
tokens[250001] = b'<mask>'
|
||||
scores[250001] = 0.0
|
||||
toktypes[250001] = SentencePieceTokenTypes.CONTROL
|
||||
|
||||
self.gguf_writer.add_tokenizer_model("t5")
|
||||
self.gguf_writer.add_tokenizer_pre("default")
|
||||
self.gguf_writer.add_token_list(tokens)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue