kv-cache : simplify the interface (#13660)
* kv-cache : simplify the interface ggml-ci * context : revert llama_batch_allocr position change ggml-ci
This commit is contained in:
parent
b44890df2e
commit
797f2ac062
9 changed files with 89 additions and 153 deletions
|
@ -13203,7 +13203,8 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
|||
GGML_TYPE_F32,
|
||||
GGML_TYPE_F32,
|
||||
cparams.offload_kqv,
|
||||
std::max((uint32_t) 1, cparams.n_seq_max));
|
||||
std::max((uint32_t) 1, cparams.n_seq_max),
|
||||
cparams.n_seq_max);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
|
@ -13222,8 +13223,8 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
|||
params.type_v,
|
||||
!cparams.flash_attn,
|
||||
cparams.offload_kqv,
|
||||
cparams.n_ctx,
|
||||
params.swa_full,
|
||||
cparams.n_ctx,
|
||||
cparams.n_seq_max,
|
||||
cparams.n_batch,
|
||||
padding);
|
||||
|
@ -13238,6 +13239,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
|||
!cparams.flash_attn,
|
||||
cparams.offload_kqv,
|
||||
cparams.n_ctx,
|
||||
cparams.n_seq_max,
|
||||
padding,
|
||||
hparams.n_swa,
|
||||
hparams.swa_type);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue