CUDA: fix race condition in FA vector kernels (#13742)

2025-05-24 11:46:19 +02:00 · 2025-05-24 11:46:19 +02:00 · ffd0eae60b
commit ffd0eae60b
parent b775345d78
2 changed files with 2 additions and 0 deletions
--- a/ggml/src/ggml-cuda/fattn-vec-f16.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec-f16.cuh
@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
                }
            }
            if (__all_sync(0xFFFFFFFF, skip)) {
                __syncthreads();
                continue;
            }
 #endif // GGML_USE_HIP
--- a/ggml/src/ggml-cuda/fattn-vec-f32.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec-f32.cuh
@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
                }
            }
            if (__all_sync(0xFFFFFFFF, skip)) {
                __syncthreads();
                continue;
            }
 #endif // GGML_USE_HIP