CANN: Improve the Inferencing Performance for Ascend NPU Device (#10454)

* improve inferencing performance for ascend npu.

Co-authored-by: Frank Mai <thxCode@thxcode0824@gmail.com>

* some modification after review

* some modifications after review

* restore some modifications

* restore some modifications

---------

Co-authored-by: shanshan shen <shanshanshen333@gmail.com>
Co-authored-by: Frank Mai <thxCode@thxcode0824@gmail.com>
This commit is contained in:
Shanshan Shen 2024-11-26 18:08:37 +08:00 committed by GitHub
parent 7066b4cce2
commit 9a4b79bcfa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 266 additions and 102 deletions

View file

@ -211,17 +211,20 @@ struct ggml_cann_pool_alloc {
struct ggml_backend_cann_context {
int32_t device; /**< Device ID. */
std::string name; /**< Name of the device. */
std::string description; /**< Description of the device. */
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
aclrtStream streams[GGML_CANN_MAX_STREAMS] = {
{nullptr}}; /**< Array of streams for the device. */
aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */
/**
* @brief Constructor for initializing the context with a given device.
* @param device Device ID.
*/
explicit ggml_backend_cann_context(int device)
: device(device), name("CANN" + std::to_string(device)) {}
: device(device), name("CANN" + std::to_string(device)) {
ggml_cann_set_device(device);
description = aclrtGetSocName();
}
/**
* @brief Destructor for cleaning up resources.