ggml : faster ssm scan (#10558)

* faster ssm_scan

* delete unused commnet

* clang format

* add space

* modify unnecessary calculations

* faster ssm conv implementatioin

* modify file name with dash
This commit is contained in:
a3sh 2025-04-01 00:05:13 +08:00 committed by GitHub
parent 403fbacbbc
commit 250d7953e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 322 additions and 0 deletions

View file

@ -31,6 +31,8 @@
#include "ggml-cuda/rope.cuh"
#include "ggml-cuda/scale.cuh"
#include "ggml-cuda/softmax.cuh"
#include "ggml-cuda/ssm-conv.cuh"
#include "ggml-cuda/ssm-scan.cuh"
#include "ggml-cuda/sum.cuh"
#include "ggml-cuda/sumrows.cuh"
#include "ggml-cuda/tsembd.cuh"
@ -2296,6 +2298,12 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
case GGML_OP_SUM_ROWS:
ggml_cuda_op_sum_rows(ctx, dst);
break;
case GGML_OP_SSM_CONV:
ggml_cuda_op_ssm_conv(ctx, dst);
break;
case GGML_OP_SSM_SCAN:
ggml_cuda_op_ssm_scan(ctx, dst);
break;
case GGML_OP_ARGSORT:
ggml_cuda_op_argsort(ctx, dst);
break;
@ -3193,6 +3201,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_COS:
case GGML_OP_CLAMP:
case GGML_OP_LOG:
case GGML_OP_SSM_SCAN:
case GGML_OP_SSM_CONV:
return true;
case GGML_OP_CONT:
return op->src[0]->type != GGML_TYPE_BF16;