context : fix pos_min initialization upon error decode (#14008)

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-06-05 09:06:29 +03:00 committed by GitHub
parent 5a8ae3053c
commit 9e31bec4fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1058,7 +1058,10 @@ int llama_context::decode(llama_batch & inp_batch) {
if (!res) { if (!res) {
// the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache // the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache
llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES] = { std::numeric_limits<llama_pos>::max() }; llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES];
for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
pos_min[s] = std::numeric_limits<llama_pos>::max();
}
for (uint32_t i = 0; i < ubatch.n_tokens; ++i) { for (uint32_t i = 0; i < ubatch.n_tokens; ++i) {
const auto & seq_id = ubatch.seq_id[i][0]; const auto & seq_id = ubatch.seq_id[i][0];