server: --offline mode (#13804)

* server: --offline mode (env: LLAMA_OFFLINE)

---------

Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
This commit is contained in:
Olivier Chafik 2025-05-26 14:34:27 -07:00 committed by GitHub
parent a26c4cc11e
commit cdf94a1802
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 127 additions and 106 deletions

View file

@ -291,6 +291,7 @@ struct common_params {
int32_t verbosity = 0;
int32_t control_vector_layer_start = -1; // layer range for control vector
int32_t control_vector_layer_end = -1; // layer range for control vector
bool offline = false;
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line