metal : use F32 prec in FA kernels (#12688)

* metal : use F32 prec in FA kernels

ggml-ci

* cont : fix FA vec kernel

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-04-01 14:57:19 +03:00 committed by GitHub
parent a6f32f0b34
commit 3fd072a540
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 48 additions and 48 deletions

View file

@ -4179,7 +4179,7 @@ static void ggml_metal_encode_node(
// ne00*(nsg)
// each simdgroup has a full f16 head vector in shared mem to accumulate results
//
#define FATTN_SMEM(nsg) (GGML_PAD((nqptg*(GGML_PAD(ne00, 128) + 2*ncpsg*(nsg)) + ne20*(nsg))*(sizeof(float)/2), 16))
#define FATTN_SMEM(nsg) (GGML_PAD((nqptg*(GGML_PAD(ne00, 128) + 4*ncpsg*(nsg)) + ne20*(nsg))*(sizeof(float)/2), 16))
int64_t nsgmax = 2;
while (true) {