Add --no-op-offload
to improve -ot
pp perf in MoE models like llama4 400B (#13386)
This commit is contained in:
parent
3eac209319
commit
7f323a589f
11 changed files with 57 additions and 9 deletions
|
@ -853,7 +853,7 @@ int main(void) {
|
|||
backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
|
||||
|
||||
ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
|
||||
backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false);
|
||||
backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
|
||||
|
||||
printf("Backend %zu/%zu: %s\n", i + 1, dev_count, ggml_backend_dev_name(devs[i]));
|
||||
printf(" Device description: %s\n", ggml_backend_dev_description(devs[i]));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue