llama : Support llama 4 text-only (#12791)
* llama4 conversion * initial support, no chat template * clean up a bit * fix tokenizer conversion * correct hparams * try this * fix shexp * ffn_inp_normed * chat template * clean up model conversion * add_bos * add scale_before_ffn * fix order * weight_before_ffn * llm_graph_input_attn_temp * add chunk attn mask * build_inp_attn_scale() * add comment about ggml_repeat * clarify comments * fix build
This commit is contained in:
parent
82974011f3
commit
1466621e73
17 changed files with 532 additions and 22 deletions
|
@ -112,6 +112,14 @@ struct llama_hparams {
|
|||
bool use_alibi = false;
|
||||
bool attn_soft_cap = false;
|
||||
|
||||
uint32_t n_moe_layer_step = 0;
|
||||
bool use_kq_norm = true;
|
||||
uint32_t n_attn_chunk = 0;
|
||||
// values below seems to be fixed on llama4
|
||||
uint32_t n_no_rope_layer_step = 4;
|
||||
uint32_t n_attn_temp_floor_scale = 8192;
|
||||
float f_attn_temp_scale = 0.1;
|
||||
|
||||
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
|
||||
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
|
||||
llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue