musa: refine compute capability (#12493)
* musa: refine compute capability Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Address review comments Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
This commit is contained in:
parent
eddfb43850
commit
fac63a3d78
5 changed files with 44 additions and 32 deletions
|
@ -264,9 +264,9 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||
#elif defined(GGML_USE_MUSA)
|
||||
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
|
||||
info.devices[id].warp_size = 32;
|
||||
// TODO: refine the .cc to reflect MUSA's actual CC capabilities
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
|
||||
info.devices[id].cc += prop.minor * 0x10;
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||
#else
|
||||
|
@ -1188,11 +1188,11 @@ static void ggml_cuda_op_mul_mat_cublas(
|
|||
// ldc == nrows of the matrix that cuBLAS writes into
|
||||
int64_t ldc = id == ctx.device ? ne0 : row_diff;
|
||||
|
||||
const int compute_capability = ggml_cuda_info().devices[id].cc;
|
||||
const int cc = ggml_cuda_info().devices[id].cc;
|
||||
|
||||
const bool use_fp16 = (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT;
|
||||
|
||||
if (compute_capability >= GGML_CUDA_CC_VOLTA && use_fp16) {
|
||||
if (((cc >= GGML_CUDA_CC_VOLTA && GGML_CUDA_CC_IS_NVIDIA(cc)) || GGML_CUDA_CC_IS_AMD(cc)) && use_fp16) {
|
||||
// convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32
|
||||
ggml_cuda_pool_alloc<half> src0_as_f16(ctx.pool(id));
|
||||
if (src0->type != GGML_TYPE_F16) {
|
||||
|
@ -1216,7 +1216,7 @@ static void ggml_cuda_op_mul_mat_cublas(
|
|||
|
||||
CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream));
|
||||
|
||||
if (GGML_CUDA_CC_IS_CDNA(compute_capability)) {
|
||||
if (GGML_CUDA_CC_IS_CDNA(cc)) {
|
||||
const float alpha = 1.0f;
|
||||
const float beta = 0.0f;
|
||||
CUBLAS_CHECK(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue