llama : do not crash if there is no CPU backend (#13395)

* llama : do not crash if there is no CPU backend

* add checks to examples
This commit is contained in:
Diego Devesa 2025-05-09 13:02:07 +02:00 committed by GitHub
parent 5c86c9ed3e
commit 27ebfcacba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 48 additions and 13 deletions

View file

@ -152,7 +152,12 @@ int main(int argc, char ** argv) {
LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads);
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
if (!cpu_dev) {
LOG_ERR("%s: no CPU backend found\n", __func__);
return 1;
}
auto * reg = ggml_backend_dev_backend_reg(cpu_dev);
auto * ggml_threadpool_new_fn = (decltype(ggml_threadpool_new) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_new");
auto * ggml_threadpool_free_fn = (decltype(ggml_threadpool_free) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_free");

View file

@ -352,9 +352,12 @@ struct clip_ctx {
clip_ctx(clip_context_params & ctx_params) {
backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
backend = ctx_params.use_gpu
? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr)
: nullptr;
if (!backend_cpu) {
throw std::runtime_error("failed to initialize CPU backend");
}
backend = ctx_params.use_gpu
? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr)
: nullptr;
if (backend) {
LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend));
@ -2185,9 +2188,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity) {
struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params) {
g_logger_state.verbosity_thold = ctx_params.verbosity;
clip_ctx * ctx_clip = new clip_ctx(ctx_params);
clip_ctx * ctx_clip = nullptr;
try {
ctx_clip = new clip_ctx(ctx_params);
clip_model_loader loader(fname, *ctx_clip);
loader.load_hparams();
loader.load_tensors();

View file

@ -212,6 +212,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
ggml_build_forward_expand(gf, flatten);
ggml_backend_ptr backend { ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr) };
GGML_ASSERT(backend != nullptr && "failed to initialize CPU backend");
ggml_backend_graph_compute(backend.get(), gf);
struct ggml_tensor* result = ggml_graph_node(gf, -1);

View file

@ -237,15 +237,17 @@ static ggml_backend_t create_backend(const rpc_server_params & params) {
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
}
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
if (backend) {
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
// set the number of threads
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
if (reg) {
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
if (ggml_backend_set_n_threads_fn) {
ggml_backend_set_n_threads_fn(backend, params.n_threads);
// set the number of threads
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
if (reg) {
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
if (ggml_backend_set_n_threads_fn) {
ggml_backend_set_n_threads_fn(backend, params.n_threads);
}
}
}