HIP: add GGML_CUDA_CC_IS_* for amd familys as increasing cc archtectures for amd gpus are not supersets of eatch other (#11601)

This fixes a bug where RDNA1 gpus other than gfx1010 where not handled correctly
This commit is contained in:
uvos 2025-02-02 22:08:05 +01:00 committed by GitHub
parent 90f9b88afb
commit 4d0598e144
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 11 additions and 4 deletions

View file

@ -1205,7 +1205,7 @@ static void ggml_cuda_op_mul_mat_cublas(
CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream));
if (compute_capability == GGML_CUDA_CC_CDNA) {
if (GGML_CUDA_CC_IS_CDNA(compute_capability)) {
const float alpha = 1.0f;
const float beta = 0.0f;
CUBLAS_CHECK(
@ -1750,7 +1750,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co
beta = &beta_f32;
}
if (ggml_cuda_info().devices[ctx.device].cc == GGML_CUDA_CC_CDNA) {
if (GGML_CUDA_CC_IS_CDNA(ggml_cuda_info().devices[ctx.device].cc)) {
cu_compute_type = CUBLAS_COMPUTE_32F;
alpha = &alpha_f32;
beta = &beta_f32;