kv-cache : fix shift and defrag logic (#14081)

* kv-cache : fix shift ggml-ci * cont : reset shift[i] ggml-ci * cont : fix defrag erasing cells that didn't move ggml-ci
2025-06-09 23:04:35 +03:00 · 2025-06-09 23:04:35 +03:00 · 40cbf571c9
commit 40cbf571c9
parent 7f4fbe5183
2 changed files with 12 additions and 9 deletions
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@ -462,7 +462,7 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
            for (uint32_t i = 0; i < n_kv; ++i) {
                assert(dinfo.ids[i] <= n_kv);

-                if (dinfo.ids[i] == n_kv) {
+                if (dinfo.ids[i] == n_kv || dinfo.ids[i] == i) {
                    continue;
                }

@ -944,11 +944,9 @@ llm_graph_result_ptr llama_kv_cache_unified::build_graph_shift(
    const auto & n_embd_head_k = hparams.n_embd_head_k;
  //const auto & n_embd_head_v = hparams.n_embd_head_v;

-    //GGML_ASSERT(kv_self->size == n_ctx);
-
    auto inp = std::make_unique<llm_graph_input_k_shift>(this);

-    inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cparams.n_ctx);
+    inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cells.size());
    ggml_set_input(inp->k_shift);

    for (const auto & layer : layers) {