llama : Add support for DeepSeek V3 (#11049)

* convert : extend DEEPSEEK2 model architecture to support DeepseekV3ForCausalLM by adding EXPERT_WEIGHTS_NORM and EXPERT_GATING_FUNC model parameters and FFN_EXP_PROBS_B tensor type * vocab : add DeepSeek V3 pre-tokenizer regexes * unicode : handle ACCENT_MARK and SYMBOL categories in regex * llama : add DeepSeek V3 chat template, handle new model parameters and tensor types --------- Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
2025-01-04 21:06:11 +01:00 · 2025-01-04 21:06:11 +01:00 · 9394bbd484
commit 9394bbd484
parent f922a9c542
16 changed files with 162 additions and 5 deletions
--- a/src/llama-model.h
+++ b/src/llama-model.h
@ -63,6 +63,7 @@ enum llm_type {
    MODEL_70B,
    MODEL_236B,
    MODEL_314B,
+    MODEL_671B,
    MODEL_SMALL,
    MODEL_MEDIUM,
    MODEL_LARGE,
@ -213,6 +214,7 @@ struct llama_layer {
    struct ggml_tensor * ffn_down_b = nullptr; // b2
    struct ggml_tensor * ffn_up_b   = nullptr; // b3
    struct ggml_tensor * ffn_act    = nullptr;
+    struct ggml_tensor * ffn_exp_probs_b = nullptr;

    // mamba proj
    struct ggml_tensor * ssm_in  = nullptr;