llama : rework embeddings logic (#14208)

* llama : rework embeddings logic

ggml-ci

* cont : fix rerank

ggml-ci

* cont : engrish [no ci]

* cont : fix rerank

ggml-ci

* server : support both embeddings and completions with single model

ggml-ci

* cont : avoid embeddings_org

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-06-16 14:14:00 +03:00 committed by GitHub
parent 3ba0d843c6
commit d3e64b9f49
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 159 additions and 114 deletions

View file

@ -310,8 +310,8 @@ llama_pos llama_kv_cache_unified::seq_pos_max(llama_seq_id seq_id) const {
llama_memory_state_ptr llama_kv_cache_unified::init_batch(
const llama_batch & batch,
uint32_t n_ubatch,
bool embd_pooled) {
GGML_UNUSED(embd_pooled);
bool embd_all) {
GGML_UNUSED(embd_all);
do {
auto sbatch = llama_sbatch(batch, hparams.n_embd, true);