server : do not return error out of context (with ctx shift disabled) (#13577)

This commit is contained in:
Xuan-Son Nguyen 2025-05-16 21:50:00 +02:00 committed by GitHub
parent aea9f8b4e7
commit 6aa892ec2a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 26 additions and 0 deletions

View file

@ -2251,6 +2251,14 @@ struct server_context {
slot.has_next_token = true; slot.has_next_token = true;
} }
// if context shifting is disabled, make sure that we don't run out of context
if (!params_base.ctx_shift && slot.n_past + 1 >= slot.n_ctx) {
slot.stop = STOP_TYPE_LIMIT;
slot.has_next_token = false;
SLT_DBG(slot, "stopped due to running out of context, n_past = %d, n_ctx = %d\n", slot.n_past, slot.n_ctx);
}
// check the limits // check the limits
if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params_base)) { if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params_base)) {
slot.stop = STOP_TYPE_LIMIT; slot.stop = STOP_TYPE_LIMIT;

View file

@ -65,3 +65,21 @@ def test_ctx_shift_disabled_long_prompt():
assert res.status_code != 200 assert res.status_code != 200
assert "error" in res.body assert "error" in res.body
assert "exceeds the available context size" in res.body["error"]["message"] assert "exceeds the available context size" in res.body["error"]["message"]
def test_ctx_shift_disabled_stream():
global server
server.disable_ctx_shift = True
server.start()
res = server.make_stream_request("POST", "/v1/completions", data={
"n_predict": 256,
"prompt": "Once",
"stream": True,
})
content = ""
for data in res:
choice = data["choices"][0]
if choice["finish_reason"] == "length":
assert len(content) > 0
else:
assert choice["finish_reason"] is None
content += choice["text"]