metal : use F32 prec in FA kernels (#12688)
* metal : use F32 prec in FA kernels ggml-ci * cont : fix FA vec kernel ggml-ci
This commit is contained in:
parent
a6f32f0b34
commit
3fd072a540
2 changed files with 48 additions and 48 deletions
|
|
@ -4179,7 +4179,7 @@ static void ggml_metal_encode_node(
|
|||
// ne00*(nsg)
|
||||
// each simdgroup has a full f16 head vector in shared mem to accumulate results
|
||||
//
|
||||
#define FATTN_SMEM(nsg) (GGML_PAD((nqptg*(GGML_PAD(ne00, 128) + 2*ncpsg*(nsg)) + ne20*(nsg))*(sizeof(float)/2), 16))
|
||||
#define FATTN_SMEM(nsg) (GGML_PAD((nqptg*(GGML_PAD(ne00, 128) + 4*ncpsg*(nsg)) + ne20*(nsg))*(sizeof(float)/2), 16))
|
||||
|
||||
int64_t nsgmax = 2;
|
||||
while (true) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue