llama : add thread safety test (#14035)
* llama : add thread safety test * llamafile : remove global state * llama : better LLAMA_SPLIT_MODE_NONE logic when main_gpu < 0 GPU devices are not used --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
0dbcabde8c
commit
6adc3c3ebc
9 changed files with 192 additions and 18 deletions
|
@ -198,14 +198,18 @@ static struct llama_model * llama_model_load_from_file_impl(
|
|||
|
||||
// if using single GPU mode, remove all except the main GPU
|
||||
if (params.split_mode == LLAMA_SPLIT_MODE_NONE) {
|
||||
if (params.main_gpu < 0 || params.main_gpu >= (int)model->devices.size()) {
|
||||
LLAMA_LOG_ERROR("%s: invalid value for main_gpu: %d (available devices: %d)\n", __func__, params.main_gpu, (int)model->devices.size());
|
||||
llama_model_free(model);
|
||||
return nullptr;
|
||||
if (params.main_gpu < 0) {
|
||||
model->devices.clear();
|
||||
} else {
|
||||
if (params.main_gpu >= (int)model->devices.size()) {
|
||||
LLAMA_LOG_ERROR("%s: invalid value for main_gpu: %d (available devices: %zu)\n", __func__, params.main_gpu, model->devices.size());
|
||||
llama_model_free(model);
|
||||
return nullptr;
|
||||
}
|
||||
ggml_backend_dev_t main_gpu = model->devices[params.main_gpu];
|
||||
model->devices.clear();
|
||||
model->devices.push_back(main_gpu);
|
||||
}
|
||||
ggml_backend_dev_t main_gpu = model->devices[params.main_gpu];
|
||||
model->devices.clear();
|
||||
model->devices.push_back(main_gpu);
|
||||
}
|
||||
|
||||
for (auto * dev : model->devices) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue