llama-model : support Qwen2 embedding models and pooling_mode_lasttoken (#13245)
This commit is contained in:
parent
7d2123484e
commit
2f567611c0
3 changed files with 45 additions and 28 deletions
|
@ -773,6 +773,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
// fall through
|
||||
case LLM_ARCH_QWEN2:
|
||||
{
|
||||
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = hparams.n_embd == 1024 ? LLM_TYPE_0_5B : LLM_TYPE_1B; break;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue