Implement GGML_CPU_ALL_VARIANTS for ARM (#14080)

* ggml-cpu: Factor out feature detection build from x86

* ggml-cpu: Add ARM feature detection and scoring

This is analogous to cpu-feats-x86.cpp. However, to detect compile-time
activation of features, we rely on GGML_USE_<FEAT> which need to be set
in cmake, instead of GGML_<FEAT> that users would set for x86.

This is because on ARM, users specify features with GGML_CPU_ARM_ARCH,
rather than with individual flags.

* ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for ARM

Like x86, however to pass around arch flags within cmake, we use
GGML_INTERNAL_<FEAT> as we don't have GGML_<FEAT>.

Some features are optional, so we may need to build multiple backends
per arch version (armv8.2_1, armv8.2_2, ...), and let the scoring
function sort out which one can be used.

* ggml-cpu: Limit ARM GGML_CPU_ALL_VARIANTS to Linux for now

The other platforms will need their own specific variants.

This also fixes the bug that the the variant-building branch was always
being executed as the else-branch of GGML_NATIVE=OFF. The branch is
moved to an elseif-branch which restores the previous behavior.
This commit is contained in:
Christian Kastner 2025-06-11 19:07:44 +00:00 committed by GitHub
parent d4e0d95cf5
commit 532802f938
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 183 additions and 23 deletions

View file

@ -1,3 +1,17 @@
function(ggml_add_cpu_backend_features cpu_name arch)
# The feature detection code is compiled as a separate target so that
# it can be built without the architecture flags
# Since multiple variants of the CPU backend may be included in the same
# build, using set_source_files_properties() to set the arch flags is not possible
set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
endfunction()
function(ggml_add_cpu_backend_variant_impl tag_name)
if (tag_name)
set(GGML_CPU_NAME ggml-cpu-${tag_name})
@ -143,6 +157,49 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
else()
if (GGML_CPU_ARM_ARCH)
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
elseif(GGML_CPU_ALL_VARIANTS)
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
# Begin with the lowest baseline
set(ARM_MCPU "armv8-a")
set(ARCH_TAGS "")
set(ARCH_DEFINITIONS "")
# When a feature is selected, bump the MCPU to the first
# version that supported it
if (GGML_INTERNAL_DOTPROD)
set(ARM_MCPU "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
endif()
if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
set(ARM_MCPU "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+fp16")
list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
endif()
if (GGML_INTERNAL_SVE)
set(ARM_MCPU "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+sve")
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
endif()
if (GGML_INTERNAL_MATMUL_INT8)
set(ARM_MCPU "armv8.6-a")
set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
endif()
if (GGML_INTERNAL_SVE2)
set(ARM_MCPU "armv8.6-a")
set(ARCH_TAGS "${ARCH_TAGS}+sve2")
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
endif()
if (GGML_INTERNAL_SME)
set(ARM_MCPU "armv9.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+sme")
list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
endif()
list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
endif()
endif()
endif()
@ -306,18 +363,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
endif()
# The feature detection code is compiled as a separate target so that
# it can be built without the architecture flags
# Since multiple variants of the CPU backend may be included in the same
# build, using set_source_files_properties() to set the arch flags is not possible
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/x86/cpu-feats.cpp)
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
message(STATUS "PowerPC detected")