CUDA: noncont MMVQ + batched bs1 MUL_MAT_ID (#13014)
* CUDA: noncont MMVQ + batched bs1 MUL_MAT_ID * fix logic for RoPE support, CUDA graphs
This commit is contained in:
parent
dc39a5e7a8
commit
658987cfc9
9 changed files with 548 additions and 426 deletions
|
@ -2071,7 +2071,7 @@ struct test_mul_mat_id : public test_case {
|
|||
const ggml_type type_b;
|
||||
const int n_mats;
|
||||
const int n_used;
|
||||
const bool b; // brodcast b matrix
|
||||
const bool b; // broadcast b matrix
|
||||
const int64_t m;
|
||||
const int64_t n;
|
||||
const int64_t k;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue