llama : auto-batch preparation (#13845)

* llama : auto-batch ggml-ci * context : simplify if branching
2025-05-31 12:55:57 +03:00 · 2025-05-31 12:55:57 +03:00 · 3f55f781f1
commit 3f55f781f1
parent 51fa76f172
5 changed files with 67 additions and 54 deletions
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@ -3431,7 +3431,7 @@ struct server_context {
                // retry with half the batch size to try to find a free slot in the KV cache
                n_batch /= 2;

-                SRV_WRN("failed to find free space in the KV cache, retrying with smaller batch size - try increasing it via the context size or enable defragmentation, i = %d, n_batch = %d, ret = %d\n", i, n_batch, ret);
+                SRV_WRN("failed to find free space in the KV cache, retrying with smaller batch size, i = %d, n_batch = %d, ret = %d\n", i, n_batch, ret);

                continue; // continue loop of n_batch
            }