opencl: fix for small models (#11950)
* opencl: fix small shape gemv, remove unused extensions * opencl: fix `transpose_16`, `dump_tensor`, enforce subgroup size * opencl: fix for token length < 4 * opencl: use wave size of 64 for all Adreno GPUs --------- Co-authored-by: Shawn Gu <quic_shawngu@quicinc.com> Co-authored-by: Skyler Szot <quic_sszot@quicinc.com>
This commit is contained in:
parent
7a2c913e66
commit
34a846b584
6 changed files with 67 additions and 59 deletions
|
@ -1797,6 +1797,9 @@ kernel void kernel_mul_mat_f16_f16(
|
|||
//------------------------------------------------------------------------------
|
||||
// mul_mat_f16_f32_1row
|
||||
//------------------------------------------------------------------------------
|
||||
#ifdef ADRENO_GPU
|
||||
REQD_SUBGROUP_SIZE_64
|
||||
#endif
|
||||
kernel void kernel_mul_mat_f16_f32_1row(
|
||||
global char * src0,
|
||||
ulong offset0,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue