context : remove logits_all flag (#13284)

* context : remove logits_all flag

ggml-ci

* llama : remove logits_all flag + reorder llama_context_params

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-05-08 14:26:50 +03:00 committed by GitHub
parent 70a6991edf
commit 51fb96b1ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 13 additions and 37 deletions

View file

@ -116,8 +116,6 @@ llama_context::llama_context(
__func__, n_ctx_per_seq, hparams.n_ctx_train);
}
logits_all = params.logits_all;
if (!hparams.vocab_only) {
// GPU backends
for (auto * dev : model.devices) {
@ -890,7 +888,7 @@ int llama_context::decode(llama_batch & inp_batch) {
for (uint32_t i = 0; i < n_tokens_all; ++i) {
n_outputs_all += batch.logits[i] != 0;
}
} else if (logits_all || embd_pooled) {
} else if (embd_pooled) {
n_outputs_all = n_tokens_all;
} else {
// keep last output only
@ -1853,13 +1851,12 @@ llama_context_params llama_context_default_params() {
/*.cb_eval_user_data =*/ nullptr,
/*.type_k =*/ GGML_TYPE_F16,
/*.type_v =*/ GGML_TYPE_F16,
/*.logits_all =*/ false,
/*.abort_callback =*/ nullptr,
/*.abort_callback_data =*/ nullptr,
/*.embeddings =*/ false,
/*.offload_kqv =*/ true,
/*.flash_attn =*/ false,
/*.no_perf =*/ true,
/*.abort_callback =*/ nullptr,
/*.abort_callback_data =*/ nullptr,
};
return result;