llama : add support for GLM-Edge and GLM-Edge-V series models (#10573)

* add glm edge chat model

* use config partial_rotary_factor as rope ratio

* support for glm edge model

* vision model support

* remove debug info

* fix format

* llava.cpp trailing whitespace

* remove unused AutoTokenizer

* Update src/llama.cpp for not contain <|end|> or </s>

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>

* add edge template

* fix chat template

* fix confict

* fix confict

* fix ci err

* fix format err

* fix template err

* 9b hf chat support

* format

* format clip.cpp

* fix format

* Apply suggestions from code review

* Apply suggestions from code review

* Update examples/llava/clip.cpp

* fix format

* minor : style

---------

Co-authored-by: liyuhang <yuhang.li@zhipuai.cn>
Co-authored-by: piDack <pcdack@hotmail.co>
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
Co-authored-by: liyuhang <yuhang.li@aminer.cn>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
piDack 2025-02-02 15:48:46 +08:00 committed by GitHub
parent 53debe6f3c
commit 0cec062a63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 568 additions and 67 deletions

View file

@ -51,6 +51,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
{ "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
{ "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
{ "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
{ "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
{ "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
{ "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
@ -115,7 +116,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
return LLM_CHAT_TEMPLATE_PHI_3;
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
return LLM_CHAT_TEMPLATE_FALCON_3;
return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
} else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
return LLM_CHAT_TEMPLATE_ZEPHYR;
} else if (tmpl_contains("bos_token + message['role']")) {
@ -440,6 +441,14 @@ int32_t llm_chat_apply_template(
if (add_ass) {
ss << "<|assistant|>";
}
} else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
for (auto message : chat) {
std::string role(message->role);
ss << "<|" << role << "|>" << "\n" << message->content;
}
if (add_ass) {
ss << "<|assistant|>";
}
} else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
for (auto message : chat) {