ggml : generalize quantize_fns
for simpler FP16 handling (#1237)
* Generalize quantize_fns for simpler FP16 handling * Remove call to ggml_cuda_mul_mat_get_wsize * ci : disable FMA for mac os actions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
8567c76b53
commit
1b107b8550
9 changed files with 172 additions and 548 deletions
10
llama.cpp
10
llama.cpp
|
@ -2257,10 +2257,10 @@ static void llama_convert_tensor_internal(const llama_load_tensor & tensor, llam
|
|||
}
|
||||
float * f32_output = (float *) output.addr;
|
||||
|
||||
quantize_fns_t qtype;
|
||||
ggml_type_traits_t qtype;
|
||||
if (ggml_is_quantized(tensor.type)) {
|
||||
qtype = ggml_internal_get_quantize_fn(tensor.type);
|
||||
if (qtype.dequantize_row_q == NULL) {
|
||||
qtype = ggml_internal_get_type_traits(tensor.type);
|
||||
if (qtype.to_float == NULL) {
|
||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor.type)));
|
||||
}
|
||||
} else if (tensor.type != GGML_TYPE_F16) {
|
||||
|
@ -2271,7 +2271,7 @@ static void llama_convert_tensor_internal(const llama_load_tensor & tensor, llam
|
|||
if (tensor.type == GGML_TYPE_F16) {
|
||||
ggml_fp16_to_fp32_row((ggml_fp16_t *)tensor.data, f32_output, nelements);
|
||||
} else if (ggml_is_quantized(tensor.type)) {
|
||||
qtype.dequantize_row_q(tensor.data, f32_output, nelements);
|
||||
qtype.to_float(tensor.data, f32_output, nelements);
|
||||
} else {
|
||||
LLAMA_ASSERT(false); // unreachable
|
||||
}
|
||||
|
@ -2296,7 +2296,7 @@ static void llama_convert_tensor_internal(const llama_load_tensor & tensor, llam
|
|||
if (typ == GGML_TYPE_F16) {
|
||||
ggml_fp16_to_fp32_row((ggml_fp16_t *)inbuf, outbuf, nels);
|
||||
} else {
|
||||
qtype.dequantize_row_q(inbuf, outbuf, nels);
|
||||
qtype.to_float(inbuf, outbuf, nels);
|
||||
}
|
||||
};
|
||||
workers.push_back(std::thread(compute, tensor.type, tensor.data + in_buff_offs, f32_output + out_buff_offs, thr_elems));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue