kv-cache : refactor the update/defrag mechanism (#13988)

* kv-cache : refactor update mechanism

ggml-ci

* memory : improve status handling

* defrag : reset head + add comments

ggml-ci

* cont : minor fixes

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-06-04 18:58:20 +03:00 committed by GitHub
parent 2589ad3704
commit 3e63a58ef7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 340 additions and 191 deletions

View file

@ -36,12 +36,19 @@ public:
virtual bool get_can_edit() const = 0;
};
using llama_memory_ptr = std::unique_ptr<llama_memory_i>;
enum llama_memory_status {
LLAMA_MEMORY_STATUS_SUCCESS = 0,
LLAMA_MEMORY_STATUS_NO_UPDATE,
LLAMA_MEMORY_STATUS_FAILED_PREPARE,
LLAMA_MEMORY_STATUS_FAILED_COMPUTE,
};
// helper function for combining the status of two memory states
// useful for implementing hybrid memory types (e.g. iSWA)
llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1);
// the interface for managing the memory state during batch processing
// this interface is implemented per memory type. see:
// - llama_kv_cache_unified_state
@ -69,7 +76,7 @@ public:
// get the current ubatch
virtual const llama_ubatch & get_ubatch() const = 0;
// get the status of the memory state
// get the status of the memory state - used for error handling and checking if any updates would be applied
virtual llama_memory_status get_status() const = 0;
};