Adds:
* Dots1Model to convert_hf_to_gguf.py
* Computation graph code to llama-model.cpp
* Chat template to llama-chat.cpp to detect this model's template.
---
The model is called "dots.llm1" (I decided to shorten it to dots1 or
DOTS1 in the code generally) architecture.
The only models that exist as of writing of this commit that follow this
architecture are "dots.llm1.inst" and "dots.llm1.base" from here:
* https://huggingface.co/rednote-hilab/dots.llm1.inst
* https://huggingface.co/rednote-hilab/dots.llm1.base
The model architecture is a combination of Qwen and Deepseek parts, as
seen here:
ffe12627b4/src/transformers/models/dots1/modular_dots1.py
This commit is contained in:
parent
c311ac664d
commit
9ae4143bc6
9 changed files with 326 additions and 1 deletions
|
@ -183,6 +183,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
|
|||
return LLM_CHAT_TEMPLATE_BAILING;
|
||||
} else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
|
||||
return LLM_CHAT_TEMPLATE_LLAMA4;
|
||||
} else if (tmpl_contains("<|endofuserprompt|>")) {
|
||||
return LLM_CHAT_TEMPLATE_DOTS1;
|
||||
}
|
||||
return LLM_CHAT_TEMPLATE_UNKNOWN;
|
||||
}
|
||||
|
@ -643,6 +645,21 @@ int32_t llm_chat_apply_template(
|
|||
if (add_ass) {
|
||||
ss << "Assistant:";
|
||||
}
|
||||
} else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
|
||||
// dots.llm1.inst (DOTS1)
|
||||
for (auto message : chat) {
|
||||
std::string role(message->role);
|
||||
if (role == "system") {
|
||||
ss << "<|system|>" << message->content << "<|endofsystem|>";
|
||||
} else if (role == "user") {
|
||||
ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
|
||||
} else {
|
||||
ss << "<|response|>" << message->content << "<|endofresponse|>";
|
||||
}
|
||||
}
|
||||
if (add_ass) {
|
||||
ss << "<|response|>";
|
||||
}
|
||||
} else {
|
||||
// template not supported
|
||||
return -1;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue