
Adds:
* Dots1Model to convert_hf_to_gguf.py
* Computation graph code to llama-model.cpp
* Chat template to llama-chat.cpp to detect this model's template.
---
The model is called "dots.llm1" (I decided to shorten it to dots1 or
DOTS1 in the code generally) architecture.
The only models that exist as of writing of this commit that follow this
architecture are "dots.llm1.inst" and "dots.llm1.base" from here:
* https://huggingface.co/rednote-hilab/dots.llm1.inst
* https://huggingface.co/rednote-hilab/dots.llm1.base
The model architecture is a combination of Qwen and Deepseek parts, as
seen here:
ffe12627b4/src/transformers/models/dots1/modular_dots1.py
59 lines
1.7 KiB
C++
59 lines
1.7 KiB
C++
#pragma once
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
#include <cstdint>
|
|
|
|
enum llm_chat_template {
|
|
LLM_CHAT_TEMPLATE_CHATML,
|
|
LLM_CHAT_TEMPLATE_LLAMA_2,
|
|
LLM_CHAT_TEMPLATE_LLAMA_2_SYS,
|
|
LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS,
|
|
LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP,
|
|
LLM_CHAT_TEMPLATE_MISTRAL_V1,
|
|
LLM_CHAT_TEMPLATE_MISTRAL_V3,
|
|
LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
|
|
LLM_CHAT_TEMPLATE_MISTRAL_V7,
|
|
LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN,
|
|
LLM_CHAT_TEMPLATE_PHI_3,
|
|
LLM_CHAT_TEMPLATE_PHI_4,
|
|
LLM_CHAT_TEMPLATE_FALCON_3,
|
|
LLM_CHAT_TEMPLATE_ZEPHYR,
|
|
LLM_CHAT_TEMPLATE_MONARCH,
|
|
LLM_CHAT_TEMPLATE_GEMMA,
|
|
LLM_CHAT_TEMPLATE_ORION,
|
|
LLM_CHAT_TEMPLATE_OPENCHAT,
|
|
LLM_CHAT_TEMPLATE_VICUNA,
|
|
LLM_CHAT_TEMPLATE_VICUNA_ORCA,
|
|
LLM_CHAT_TEMPLATE_DEEPSEEK,
|
|
LLM_CHAT_TEMPLATE_DEEPSEEK_2,
|
|
LLM_CHAT_TEMPLATE_DEEPSEEK_3,
|
|
LLM_CHAT_TEMPLATE_COMMAND_R,
|
|
LLM_CHAT_TEMPLATE_LLAMA_3,
|
|
LLM_CHAT_TEMPLATE_CHATGLM_3,
|
|
LLM_CHAT_TEMPLATE_CHATGLM_4,
|
|
LLM_CHAT_TEMPLATE_GLMEDGE,
|
|
LLM_CHAT_TEMPLATE_MINICPM,
|
|
LLM_CHAT_TEMPLATE_EXAONE_3,
|
|
LLM_CHAT_TEMPLATE_RWKV_WORLD,
|
|
LLM_CHAT_TEMPLATE_GRANITE,
|
|
LLM_CHAT_TEMPLATE_GIGACHAT,
|
|
LLM_CHAT_TEMPLATE_MEGREZ,
|
|
LLM_CHAT_TEMPLATE_YANDEX,
|
|
LLM_CHAT_TEMPLATE_BAILING,
|
|
LLM_CHAT_TEMPLATE_LLAMA4,
|
|
LLM_CHAT_TEMPLATE_SMOLVLM,
|
|
LLM_CHAT_TEMPLATE_DOTS1,
|
|
LLM_CHAT_TEMPLATE_UNKNOWN,
|
|
};
|
|
|
|
struct llama_chat_message;
|
|
|
|
llm_chat_template llm_chat_template_from_str(const std::string & name);
|
|
|
|
llm_chat_template llm_chat_detect_template(const std::string & tmpl);
|
|
|
|
int32_t llm_chat_apply_template(
|
|
llm_chat_template tmpl,
|
|
const std::vector<const llama_chat_message *> & chat,
|
|
std::string & dest, bool add_ass);
|