Fixed spec timings to: accepted/tested instead of accepted/drafted (#14104)
This commit is contained in:
parent
ae92c1855b
commit
3a12db23b6
1 changed files with 4 additions and 4 deletions
|
@ -3556,9 +3556,6 @@ struct server_context {
|
||||||
const llama_tokens & cached_text_tokens = slot.cache_tokens.get_text_tokens();
|
const llama_tokens & cached_text_tokens = slot.cache_tokens.get_text_tokens();
|
||||||
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, cached_text_tokens, id);
|
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, cached_text_tokens, id);
|
||||||
|
|
||||||
// keep track of total number of tokens generated in the draft
|
|
||||||
slot.n_draft_total += draft.size();
|
|
||||||
|
|
||||||
// ignore small drafts
|
// ignore small drafts
|
||||||
if (slot.params.speculative.n_min > (int) draft.size()) {
|
if (slot.params.speculative.n_min > (int) draft.size()) {
|
||||||
SLT_DBG(slot, "ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
|
SLT_DBG(slot, "ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
|
||||||
|
@ -3566,6 +3563,9 @@ struct server_context {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// keep track of total number of drafted tokens tested
|
||||||
|
slot.n_draft_total += draft.size();
|
||||||
|
|
||||||
// construct the speculation batch
|
// construct the speculation batch
|
||||||
common_batch_clear(slot.batch_spec);
|
common_batch_clear(slot.batch_spec);
|
||||||
common_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
|
common_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
|
||||||
|
@ -3584,7 +3584,7 @@ struct server_context {
|
||||||
slot.n_past += ids.size();
|
slot.n_past += ids.size();
|
||||||
slot.n_decoded += ids.size();
|
slot.n_decoded += ids.size();
|
||||||
|
|
||||||
// update how many tokens out of draft was accepted
|
// update how many tokens out of those tested were accepted
|
||||||
slot.n_draft_accepted += ids.size() - 1;
|
slot.n_draft_accepted += ids.size() - 1;
|
||||||
|
|
||||||
slot.cache_tokens.push_back(id);
|
slot.cache_tokens.push_back(id);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue