vulkan: matmul dequantization improvements (#12015)

* faster dequant for old quants * dont use unpack for iq4_nl * vec2 unpack for q8
2025-02-28 07:20:08 +00:00 · 2025-02-28 07:20:08 +00:00 · fbeda9002d
commit fbeda9002d
parent 581650b7ca
5 changed files with 93 additions and 53 deletions
--- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp
@ -82,9 +82,9 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
    return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1]));
 }
 vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
-    uint32_t v0 = data_a_packed16[a_offset + ib].qs[iqs/2];
-    uint32_t v1 = data_a_packed16[a_offset + ib].qs[iqs/2 + 1];
-    return vec4(int8_t(v0 & 0xFF), int8_t(v0 >> 8), int8_t(v1 & 0xFF), int8_t(v1 >> 8));
+    const i8vec2 v0 = unpack8(data_a_packed16[a_offset + ib].qs[iqs/2]);
+    const i8vec2 v1 = unpack8(data_a_packed16[a_offset + ib].qs[iqs/2 + 1]);
+    return vec4(v0.x, v0.y, v1.x, v1.y);
 }
 #endif