llama: Add support for RWKV v7 architecture (#12412)
* ggml: Add op l2_norm Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * ggml: Add op rwkv_wkv7 Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * llama: Add support for RWKV7 and ARWKV7 models Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * llama: fix inference with RWKV6Qwen2 Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * llama: add more (a)rwkv7 variants in size Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * Apply code-format changes Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * fix MUSA build Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * llama: fix shape error with rwkv using llama-parallel Signed-off-by: Molly Sophia <mollysophia379@gmail.com> --------- Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
parent
60c902926c
commit
7dfad387e3
35 changed files with 2948 additions and 438 deletions
|
@ -29,6 +29,7 @@ enum llm_type {
|
|||
LLM_TYPE_109M,
|
||||
LLM_TYPE_137M,
|
||||
LLM_TYPE_160M,
|
||||
LLM_TYPE_190M,
|
||||
LLM_TYPE_220M,
|
||||
LLM_TYPE_250M,
|
||||
LLM_TYPE_270M,
|
||||
|
@ -45,6 +46,7 @@ enum llm_type {
|
|||
LLM_TYPE_1_6B,
|
||||
LLM_TYPE_2B,
|
||||
LLM_TYPE_2_8B,
|
||||
LLM_TYPE_2_9B,
|
||||
LLM_TYPE_3B,
|
||||
LLM_TYPE_4B,
|
||||
LLM_TYPE_6B,
|
||||
|
@ -260,6 +262,20 @@ struct llama_layer {
|
|||
struct ggml_tensor * time_mix_receptance_b = nullptr;
|
||||
struct ggml_tensor * time_mix_gate = nullptr;
|
||||
|
||||
// rwkv7
|
||||
struct ggml_tensor * time_mix_w0 = nullptr;
|
||||
struct ggml_tensor * time_mix_a0 = nullptr;
|
||||
struct ggml_tensor * time_mix_a1 = nullptr;
|
||||
struct ggml_tensor * time_mix_a2 = nullptr;
|
||||
struct ggml_tensor * time_mix_v0 = nullptr;
|
||||
struct ggml_tensor * time_mix_v1 = nullptr;
|
||||
struct ggml_tensor * time_mix_v2 = nullptr;
|
||||
struct ggml_tensor * time_mix_g1 = nullptr;
|
||||
struct ggml_tensor * time_mix_g2 = nullptr;
|
||||
struct ggml_tensor * time_mix_k_k = nullptr;
|
||||
struct ggml_tensor * time_mix_k_a = nullptr;
|
||||
struct ggml_tensor * time_mix_r_k = nullptr;
|
||||
|
||||
struct ggml_tensor * time_mix_ln = nullptr;
|
||||
struct ggml_tensor * time_mix_ln_b = nullptr;
|
||||
struct ggml_tensor * time_mix_output = nullptr;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue