kv-cache : simplify the interface (#13660)

* kv-cache : simplify the interface

ggml-ci

* context : revert llama_batch_allocr position change

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-05-21 15:11:13 +03:00 committed by GitHub
parent b44890df2e
commit 797f2ac062
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 89 additions and 153 deletions

View file

@ -13203,7 +13203,8 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
GGML_TYPE_F32,
GGML_TYPE_F32,
cparams.offload_kqv,
std::max((uint32_t) 1, cparams.n_seq_max));
std::max((uint32_t) 1, cparams.n_seq_max),
cparams.n_seq_max);
} break;
default:
{
@ -13222,8 +13223,8 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
params.type_v,
!cparams.flash_attn,
cparams.offload_kqv,
cparams.n_ctx,
params.swa_full,
cparams.n_ctx,
cparams.n_seq_max,
cparams.n_batch,
padding);
@ -13238,6 +13239,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
!cparams.flash_attn,
cparams.offload_kqv,
cparams.n_ctx,
cparams.n_seq_max,
padding,
hparams.n_swa,
hparams.swa_type);