cuda : synchronize graph capture and cublas handle destruction (#14288)

Workarounds an issue that may cause CUDA graph capture to fail when a cuBLAS handle is destroyed in a different thread
This commit is contained in:
Diego Devesa 2025-06-20 04:57:36 -07:00 committed by GitHub
parent d27b3ca175
commit e28c1b93fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 43 additions and 19 deletions

View file

@ -19,10 +19,10 @@
#endif
#include "ggml-common.h"
#include <cstdio>
#include <array>
#include <cassert>
#include <cfloat>
#include <cstdio>
#include <string>
#include <vector>
@ -767,21 +767,7 @@ struct ggml_backend_cuda_context {
name(GGML_CUDA_NAME + std::to_string(device)) {
}
~ggml_backend_cuda_context() {
if (copy_event != nullptr) {
CUDA_CHECK(cudaEventDestroy(copy_event));
}
for (int i = 0; i < GGML_CUDA_MAX_DEVICES; ++i) {
for (int j = 0; j < GGML_CUDA_MAX_STREAMS; ++j) {
if (streams[i][j] != nullptr) {
CUDA_CHECK(cudaStreamDestroy(streams[i][j]));
}
}
if (cublas_handles[i] != nullptr) {
CUBLAS_CHECK(cublasDestroy(cublas_handles[i]));
}
}
}
~ggml_backend_cuda_context();
cudaStream_t stream(int device, int stream) {
if (streams[device][stream] == nullptr) {