graph : simplify attn input build for unified KV cache (#12381)
ggml-ci
This commit is contained in:
parent
081bee8c64
commit
c522ce4143
3 changed files with 53 additions and 58 deletions
|
@ -509,9 +509,7 @@ struct llm_graph_context {
|
|||
float kq_scale,
|
||||
int il) const;
|
||||
|
||||
llm_graph_input_attn_kv_unified * build_attn_inp_kv_unified(
|
||||
bool causal,
|
||||
bool swa) const;
|
||||
llm_graph_input_attn_kv_unified * build_attn_inp_kv_unified() const;
|
||||
|
||||
ggml_tensor * build_attn(
|
||||
llm_graph_input_attn_kv_unified * inp,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue