llama : rework embeddings logic (#14208)

* llama : rework embeddings logic ggml-ci * cont : fix rerank ggml-ci * cont : engrish [no ci] * cont : fix rerank ggml-ci * server : support both embeddings and completions with single model ggml-ci * cont : avoid embeddings_org ggml-ci
2025-06-16 14:14:00 +03:00 · 2025-06-16 14:14:00 +03:00 · d3e64b9f49
commit d3e64b9f49
parent 3ba0d843c6
16 changed files with 159 additions and 114 deletions
--- a/common/common.h
+++ b/common/common.h
@ -355,7 +355,6 @@ struct common_params {
    int32_t embd_normalize = 2;     // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
    std::string embd_out   = "";    // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
    std::string embd_sep   = "\n";  // separator of embeddings
-    bool reranking         = false; // enable reranking support on server

    // server params
    int32_t port           = 8080;         // server listens on this network port