llama : fix non-causal mask for gemma 3 (#12615)
This commit is contained in:
parent
0bb2919335
commit
af6ae1efb2
2 changed files with 72 additions and 106 deletions
|
@ -1317,8 +1317,8 @@ int llama_context::decode(llama_batch & inp_batch) {
|
|||
n_outputs = n_outputs_new;
|
||||
}
|
||||
|
||||
// non-causal masks do not use the KV cache
|
||||
if (hparams.causal_attn) {
|
||||
// find KV slot
|
||||
{
|
||||
kv_self_update();
|
||||
|
||||
// if we have enough unused cells before the current head ->
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue