llama : add Trillion 7B model support (#12556)

* Support Trillion 7B * Update llama.h * Update llama.h * Update llama-vocab.cpp for Trillion * Update llama-vocab.cpp
2025-03-31 03:38:33 +09:00 · 2025-03-31 03:38:33 +09:00 · b3de7cac73
commit b3de7cac73
parent 7242dd9675
5 changed files with 11 additions and 0 deletions
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@ -342,6 +342,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
            case LLAMA_VOCAB_PRE_TYPE_MPT:
            case LLAMA_VOCAB_PRE_TYPE_OLMO:
            case LLAMA_VOCAB_PRE_TYPE_JAIS:
+            case LLAMA_VOCAB_PRE_TYPE_TRILLION:
                regex_exprs = {
                    "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
                };
@ -1614,6 +1615,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                tokenizer_pre == "superbpe") {
                pre_type = LLAMA_VOCAB_PRE_TYPE_SUPERBPE;
                clean_spaces = false;
+            } else if (
+                tokenizer_pre == "trillion") {
+                pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
+                clean_spaces = false;
            } else {
                throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
            }