llama : add option to override model tensor buffers (#11397)
* llama : add option to override tensor buffers * ggml : fix possible underflow in ggml_nbytes
This commit is contained in:
parent
a10b36c91a
commit
e0e912f49b
12 changed files with 108 additions and 9 deletions
|
@ -255,7 +255,8 @@ llama_context::llama_context(
|
|||
model.n_devices() > 1 &&
|
||||
model.params.n_gpu_layers > (int) model.hparams.n_layer &&
|
||||
model.params.split_mode == LLAMA_SPLIT_MODE_LAYER &&
|
||||
cparams.offload_kqv;
|
||||
cparams.offload_kqv &&
|
||||
!model.has_tensor_overrides();
|
||||
|
||||
// pipeline parallelism requires support for async compute and events in all devices
|
||||
if (pipeline_parallel) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue