server
: fix tool-call of DeepSeek R1 Qwen, return reasoning_content (Command 7RB & DeepSeek R1) unless --reasoning-format none
(#11607)
* extract & return thoughts in reasoning_content field (unless --reasoning-format) for DeepSeek R1 & Command R7B * tool-calls: add deepseek r1 template (models/templates/llama-cpp-deepseek-r1.jinja) + hackommodate broken official template * tool-calls: accommodate variety of wrong tool call opening tags both R1 Qwen 32B and 7B distills like to spit out * server/oai: ensure content is null when there are tool calls, and reasoning_content appears before content for readability * tool-calls: add DeepSeek R1 Qwen distills to server/README.md & server tests Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
27e8a23300
commit
c7f460ab88
17 changed files with 1023 additions and 316 deletions
|
@ -173,6 +173,7 @@ struct slot_params {
|
|||
{"grammar_trigger_words", grammar_trigger_words},
|
||||
{"grammar_trigger_tokens", sampling.grammar_trigger_tokens},
|
||||
{"preserved_tokens", sampling.preserved_tokens},
|
||||
{"chat_format", common_chat_format_name(oaicompat_chat_format)},
|
||||
{"samplers", samplers},
|
||||
{"speculative.n_max", speculative.n_max},
|
||||
{"speculative.n_min", speculative.n_min},
|
||||
|
@ -724,9 +725,19 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
msg.content = content;
|
||||
}
|
||||
|
||||
json tool_calls;
|
||||
json message {
|
||||
{"role", "assistant"},
|
||||
};
|
||||
if (!msg.reasoning_content.empty()) {
|
||||
message["reasoning_content"] = msg.reasoning_content;
|
||||
}
|
||||
if (msg.content.empty() && !msg.tool_calls.empty()) {
|
||||
message["content"] = json();
|
||||
} else {
|
||||
message["content"] = msg.content;
|
||||
}
|
||||
if (!msg.tool_calls.empty()) {
|
||||
tool_calls = json::array();
|
||||
auto tool_calls = json::array();
|
||||
for (const auto & tc : msg.tool_calls) {
|
||||
tool_calls.push_back({
|
||||
{"type", "function"},
|
||||
|
@ -737,15 +748,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
{"id", tc.id},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
json message {
|
||||
{"content", msg.content},
|
||||
{"tool_calls", tool_calls},
|
||||
{"role", "assistant"},
|
||||
};
|
||||
if (!msg.tool_plan.empty()) {
|
||||
message["tool_plan"] = msg.tool_plan;
|
||||
message["tool_calls"] = tool_calls;
|
||||
}
|
||||
|
||||
json choice {
|
||||
|
@ -4060,7 +4063,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
auto body = json::parse(req.body);
|
||||
json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
|
||||
json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
|
||||
|
||||
return handle_completions_impl(
|
||||
SERVER_TASK_TYPE_COMPLETION,
|
||||
|
@ -4073,7 +4076,7 @@ int main(int argc, char ** argv) {
|
|||
// same with handle_chat_completions, but without inference part
|
||||
const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) {
|
||||
auto body = json::parse(req.body);
|
||||
json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
|
||||
json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
|
||||
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue