CUDA: app option to compile without FlashAttention (#12025)

This commit is contained in:
Johannes Gäßler 2025-02-22 20:44:34 +01:00 committed by GitHub
parent 36c258ee92
commit a28e0d5eb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 46 additions and 31 deletions

View file

@ -83,6 +83,10 @@ if (MUSAToolkit_FOUND)
add_compile_definitions(GGML_CUDA_NO_VMM)
endif()
if (NOT GGML_CUDA_FA)
add_compile_definitions(GGML_CUDA_NO_FA)
endif()
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
add_compile_definitions(GGML_CUDA_F16)
endif()