vulkan: matmul dequantization improvements (#12015)
* faster dequant for old quants * dont use unpack for iq4_nl * vec2 unpack for q8
This commit is contained in:
parent
581650b7ca
commit
fbeda9002d
5 changed files with 93 additions and 53 deletions
|
@ -82,9 +82,9 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
|
|||
return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1]));
|
||||
}
|
||||
vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
|
||||
uint32_t v0 = data_a_packed16[a_offset + ib].qs[iqs/2];
|
||||
uint32_t v1 = data_a_packed16[a_offset + ib].qs[iqs/2 + 1];
|
||||
return vec4(int8_t(v0 & 0xFF), int8_t(v0 >> 8), int8_t(v1 & 0xFF), int8_t(v1 >> 8));
|
||||
const i8vec2 v0 = unpack8(data_a_packed16[a_offset + ib].qs[iqs/2]);
|
||||
const i8vec2 v1 = unpack8(data_a_packed16[a_offset + ib].qs[iqs/2 + 1]);
|
||||
return vec4(v0.x, v0.y, v1.x, v1.y);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue