CUDA: fix race condition in MMQ stream-k fixup (#13299)

This commit is contained in:
Johannes Gäßler 2025-05-04 14:16:39 +02:00 committed by GitHub
parent 8afbd96818
commit 93c4e23905
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -2958,6 +2958,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
for (int j = threadIdx.y*WARP_SIZE + threadIdx.x; j < mmq_x; j += nwarps*WARP_SIZE) {
ids_dst_shared[j] = ids_dst[col_low + j];
}
__syncthreads();
const int offset_dst = it*mmq_y;
dst += offset_dst;