CUDA: app option to compile without FlashAttention (#12025)

This commit is contained in:
Johannes Gäßler 2025-02-22 20:44:34 +01:00 committed by GitHub
parent 36c258ee92
commit a28e0d5eb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 46 additions and 31 deletions

View file

@ -3203,7 +3203,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_FLASH_ATTN_EXT: {
#ifndef FLASH_ATTN_AVAILABLE
return false;
#endif
#endif // FLASH_ATTN_AVAILABLE
if (op->src[1]->type == GGML_TYPE_BF16 || op->src[2]->type == GGML_TYPE_BF16) {
return false;
}