graph : normalize Q, K, V shapes + sync cross attention (#12449)
* graph : normalize Q, K, V shapes and add comments ggml-ci * context : synchronize before getting cross attention data * model : fix command-r attention norm check
This commit is contained in:
parent
bb115d2bf7
commit
75422e8bc4
4 changed files with 433 additions and 277 deletions
|
@ -1378,7 +1378,7 @@ ggml_tensor * llm_graph_context::build_attn(
|
|||
// note: storing RoPE-ed version of K in the KV cache
|
||||
ggml_build_forward_expand(gf, ggml_cpy(ctx0, k_cur, k_cache_view));
|
||||
|
||||
assert(v_cur->ne[0] == n_embd_v_gqa && v_cur->ne[1] == n_tokens);
|
||||
v_cur = ggml_reshape_2d(ctx0, v_cur, n_embd_v_gqa, n_tokens);
|
||||
|
||||
ggml_tensor * v_cache_view = nullptr;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue