ggml-cpu: Faster IQ1 mul_mat_vec on AVX2 using BMI2 instructions (#12154)

* ggml-cpu: Faster IQ1 mul_mat_vec on AVX2 using BMI2 instructions

* cmake: Add GGML_BMI2 build option

* ggml: enable BMI2 on relevant CPU variants

* ggml-cpu: include BMI2 in backend score

* ggml-cpu: register BMI2 in ggml_backend_cpu_get_features

* ggml-cpu: add __BMI2__ define when using MSVC
This commit is contained in:
Rémy O 2025-03-06 02:26:10 +01:00 committed by GitHub
parent 5e43f104cc
commit 07d1572347
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 68 additions and 18 deletions

View file

@ -219,6 +219,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
if (GGML_AVX_VNNI)
list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
endif()
if (GGML_BMI2)
# MSVC does not define macro __BMI2__
list(APPEND ARCH_DEFINITIONS __BMI2__ GGML_BMI2)
endif()
else ()
if (GGML_NATIVE)
list(APPEND ARCH_FLAGS -march=native)
@ -233,6 +237,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
list(APPEND ARCH_FLAGS -mfma)
list(APPEND ARCH_DEFINITIONS GGML_FMA)
endif()
if (GGML_BMI2)
list(APPEND ARCH_FLAGS -mbmi2)
list(APPEND ARCH_DEFINITIONS GGML_BMI2)
endif()
if (GGML_AVX)
list(APPEND ARCH_FLAGS -mavx)
list(APPEND ARCH_DEFINITIONS GGML_AVX)