vulkan: Allow up to 4096 elements for mul_mat_id row_ids (#13326)
This assert fired running Qwen_Qwen3-30B-A3B-Q2_K.gguf: GGML_ASSERT(nei0 * nei1 <= 3072); The tensor is 8 x 512. Increase this array size to accommodate.
This commit is contained in:
parent
d9c4accaff
commit
02115dcd9a
4 changed files with 5 additions and 5 deletions
|
|
@ -103,7 +103,7 @@ shared FLOAT_TYPE buf_a[BM * SHMEM_STRIDE];
|
|||
shared FLOAT_TYPE buf_b[BN * SHMEM_STRIDE];
|
||||
|
||||
#ifdef MUL_MAT_ID
|
||||
shared u16vec2 row_ids[3072];
|
||||
shared u16vec2 row_ids[4096];
|
||||
#endif // MUL_MAT_ID
|
||||
|
||||
#define NUM_WARPS (BLOCK_SIZE / WARP)
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
|
|||
#ifdef MUL_MAT_ID
|
||||
layout (binding = 3) readonly buffer IDS {int data_ids[];};
|
||||
|
||||
shared u16vec4 row_ids[3072];
|
||||
shared u16vec4 row_ids[4096];
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufB {
|
||||
B_TYPE b[];
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ shared FLOAT_TYPE_VEC2 buf_b_ds[BN];
|
|||
#define LOAD_VEC_B 4
|
||||
|
||||
#ifdef MUL_MAT_ID
|
||||
shared u16vec2 row_ids[3072];
|
||||
shared u16vec2 row_ids[4096];
|
||||
#endif // MUL_MAT_ID
|
||||
|
||||
#define NUM_WARPS (BLOCK_SIZE / WARP)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue