cuda : synchronize graph capture and cublas handle destruction (#14288)

Workarounds an issue that may cause CUDA graph capture to fail when a cuBLAS handle is destroyed in a different thread
2025-06-20 04:57:36 -07:00 · 2025-06-20 04:57:36 -07:00 · e28c1b93fd
commit e28c1b93fd
parent d27b3ca175
2 changed files with 43 additions and 19 deletions
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@ -19,10 +19,10 @@
 #endif
 #include "ggml-common.h"

-#include <cstdio>
 #include <array>
 #include <cassert>
 #include <cfloat>
+#include <cstdio>
 #include <string>
 #include <vector>

@ -767,21 +767,7 @@ struct ggml_backend_cuda_context {
        name(GGML_CUDA_NAME + std::to_string(device)) {
    }

-    ~ggml_backend_cuda_context() {
-        if (copy_event != nullptr) {
-            CUDA_CHECK(cudaEventDestroy(copy_event));
-        }
-        for (int i = 0; i < GGML_CUDA_MAX_DEVICES; ++i) {
-            for (int j = 0; j < GGML_CUDA_MAX_STREAMS; ++j) {
-                if (streams[i][j] != nullptr) {
-                    CUDA_CHECK(cudaStreamDestroy(streams[i][j]));
-                }
-            }
-            if (cublas_handles[i] != nullptr) {
-                CUBLAS_CHECK(cublasDestroy(cublas_handles[i]));
-            }
-        }
-    }
+    ~ggml_backend_cuda_context();

    cudaStream_t stream(int device, int stream) {
        if (streams[device][stream] == nullptr) {