CUDA: fix race conditions FlashAttention kernels (#13438)

This commit is contained in:
Johannes Gäßler 2025-05-10 22:22:48 +02:00 committed by GitHub
parent d2a4ef05c6
commit 0208355f42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 3 additions and 0 deletions

View file

@ -874,6 +874,8 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
}
}
__syncthreads();
// Write back combined meta data:
#pragma unroll
for (int imeta = 0; imeta < nmeta; ++imeta) {