llama : fix quantize with dl backends (#13539)
This commit is contained in:
parent
6da34fa276
commit
b7d2672082
1 changed files with 10 additions and 5 deletions
|
@ -822,13 +822,18 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
|
||||||
mappings.reserve(files.size());
|
mappings.reserve(files.size());
|
||||||
mmaps_used.reserve(files.size());
|
mmaps_used.reserve(files.size());
|
||||||
for (const auto & file : files) {
|
for (const auto & file : files) {
|
||||||
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
|
bool is_numa = false;
|
||||||
if (!reg) {
|
|
||||||
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
auto * dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||||
|
if (dev) {
|
||||||
|
auto * reg = ggml_backend_dev_backend_reg(dev);
|
||||||
|
auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
|
||||||
|
if (is_numa_fn) {
|
||||||
|
is_numa = is_numa_fn();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
|
std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa);
|
||||||
std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
|
|
||||||
mmaps_used.emplace_back(mapping->size(), 0);
|
mmaps_used.emplace_back(mapping->size(), 0);
|
||||||
if (mlock_mmaps) {
|
if (mlock_mmaps) {
|
||||||
std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock());
|
std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue