vulkan: matmul dequantization improvements (#12015)

* faster dequant for old quants

* dont use unpack for iq4_nl

* vec2 unpack for q8
This commit is contained in:
Eve 2025-02-28 07:20:08 +00:00 committed by GitHub
parent 581650b7ca
commit fbeda9002d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 93 additions and 53 deletions

View file

@ -82,9 +82,9 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1]));
}
vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
uint32_t v0 = data_a_packed16[a_offset + ib].qs[iqs/2];
uint32_t v1 = data_a_packed16[a_offset + ib].qs[iqs/2 + 1];
return vec4(int8_t(v0 & 0xFF), int8_t(v0 >> 8), int8_t(v1 & 0xFF), int8_t(v1 >> 8));
const i8vec2 v0 = unpack8(data_a_packed16[a_offset + ib].qs[iqs/2]);
const i8vec2 v1 = unpack8(data_a_packed16[a_offset + ib].qs[iqs/2 + 1]);
return vec4(v0.x, v0.y, v1.x, v1.y);
}
#endif