vocab : prevent tokenizer overflow (#14301)

* vocab : prevent stack overflow in tokenize * vocab : return error instead of aborting on oversized token count * vocab : INT32_MIN from llama_tokenize on overflow
2025-06-20 22:13:06 +08:00 · 2025-06-20 22:13:06 +08:00 · dd6e6d0b6a
commit dd6e6d0b6a
parent 8308f98c7f
3 changed files with 9 additions and 0 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1290,6 +1290,9 @@ std::vector<llama_token> common_tokenize(
    int n_tokens = text.length() + 2 * add_special;
    std::vector<llama_token> result(n_tokens);
    n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+    if (n_tokens == std::numeric_limits<int32_t>::min()) {
+        throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
+    }
    if (n_tokens < 0) {
        result.resize(-n_tokens);
        int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);