server : Prefilling assistant message in openai compatible API (#13174)
* Prefilling assistant message in openai compatible API * fixed indentation * fixed code convention * simplify method usage * no more than one assistant message at end of messages * merge checks into prefill code * Update examples/server/utils.hpp --------- Co-authored-by: matteo <matteo@naspc.lan> Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
This commit is contained in:
parent
d9d398f84f
commit
e2e1ddb93a
1 changed files with 22 additions and 0 deletions
|
@ -642,9 +642,31 @@ static json oaicompat_completion_params_parse(
|
||||||
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
|
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if the assistant message appears at the end of list, we do not add end-of-turn token
|
||||||
|
// for ex. this can be useful to modify the reasoning process in reasoning models
|
||||||
|
bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant";
|
||||||
|
common_chat_msg last_message;
|
||||||
|
if (prefill_assistant_message) {
|
||||||
|
last_message = inputs.messages.back();
|
||||||
|
inputs.messages.pop_back();
|
||||||
|
|
||||||
|
/* sanity check, max one assistant message at the end of the list */
|
||||||
|
if (!inputs.messages.empty() && inputs.messages.back().role == "assistant"){
|
||||||
|
throw std::runtime_error("Cannot have 2 or more assistant messages at the end of the list.");
|
||||||
|
}
|
||||||
|
|
||||||
|
inputs.extract_reasoning = false;
|
||||||
|
inputs.add_generation_prompt = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Apply chat template to the list of messages
|
// Apply chat template to the list of messages
|
||||||
auto chat_params = common_chat_templates_apply(tmpls, inputs);
|
auto chat_params = common_chat_templates_apply(tmpls, inputs);
|
||||||
|
|
||||||
|
/* Append assistant prefilled message */
|
||||||
|
if (prefill_assistant_message) {
|
||||||
|
chat_params.prompt += last_message.content;
|
||||||
|
}
|
||||||
|
|
||||||
llama_params["chat_format"] = static_cast<int>(chat_params.format);
|
llama_params["chat_format"] = static_cast<int>(chat_params.format);
|
||||||
llama_params["prompt"] = chat_params.prompt;
|
llama_params["prompt"] = chat_params.prompt;
|
||||||
if (!chat_params.grammar.empty()) {
|
if (!chat_params.grammar.empty()) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue