server : add server parameters for draft model cache type (#13782)
Co-authored-by: aa956 <27946957+aa956@users.noreply.github.com>
This commit is contained in:
parent
456af35eb7
commit
d67341dc18
4 changed files with 33 additions and 4 deletions
|
@ -3210,6 +3210,32 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.speculative.model.path = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT"));
|
||||
add_opt(common_arg(
|
||||
{"-ctkd", "--cache-type-k-draft"}, "TYPE",
|
||||
string_format(
|
||||
"KV cache data type for K for the draft model\n"
|
||||
"allowed values: %s\n"
|
||||
"(default: %s)",
|
||||
get_all_kv_cache_types().c_str(),
|
||||
ggml_type_name(params.speculative.cache_type_k)
|
||||
),
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.speculative.cache_type_k = kv_cache_type_from_str(value);
|
||||
}
|
||||
).set_env("LLAMA_ARG_CACHE_TYPE_K_DRAFT"));
|
||||
add_opt(common_arg(
|
||||
{"-ctvd", "--cache-type-v-draft"}, "TYPE",
|
||||
string_format(
|
||||
"KV cache data type for V for the draft model\n"
|
||||
"allowed values: %s\n"
|
||||
"(default: %s)",
|
||||
get_all_kv_cache_types().c_str(),
|
||||
ggml_type_name(params.speculative.cache_type_v)
|
||||
),
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.speculative.cache_type_v = kv_cache_type_from_str(value);
|
||||
}
|
||||
).set_env("LLAMA_ARG_CACHE_TYPE_V_DRAFT"));
|
||||
|
||||
add_opt(common_arg(
|
||||
{"-mv", "--model-vocoder"}, "FNAME",
|
||||
|
|
|
@ -199,6 +199,9 @@ struct common_params_speculative {
|
|||
float p_split = 0.1f; // speculative decoding split probability
|
||||
float p_min = 0.75f; // minimum speculative decoding probability (greedy)
|
||||
|
||||
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
||||
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
||||
|
||||
struct cpu_params cpuparams;
|
||||
struct cpu_params cpuparams_batch;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue