llama-bench : add --no-warmup flag (#14224) (#14270)

Add no_warmup parameter to cmd_params struct and command-line parsing to allow users to skip warmup runs before benchmarking.

- Add no_warmup boolean field to cmd_params struct

- Add --no-warmup command-line argument parsing

- Add help text documentation for the new flag

- Wrap existing warmup logic in conditional check

- Maintain full backward compatibility (warmup enabled by default)

Addresses #14224
This commit is contained in:
bashayer hijji 2025-06-19 13:24:12 +03:00 committed by GitHub
parent 5fc7856815
commit fffcce535e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -267,6 +267,7 @@ struct cmd_params {
int delay; int delay;
bool verbose; bool verbose;
bool progress; bool progress;
bool no_warmup;
output_formats output_format; output_formats output_format;
output_formats output_format_stderr; output_formats output_format_stderr;
}; };
@ -303,6 +304,7 @@ static const cmd_params cmd_params_defaults = {
/* delay */ 0, /* delay */ 0,
/* verbose */ false, /* verbose */ false,
/* progress */ false, /* progress */ false,
/* no_warmup */ false,
/* output_format */ MARKDOWN, /* output_format */ MARKDOWN,
/* output_format_stderr */ NONE, /* output_format_stderr */ NONE,
}; };
@ -325,6 +327,7 @@ static void print_usage(int /* argc */, char ** argv) {
output_format_str(cmd_params_defaults.output_format_stderr)); output_format_str(cmd_params_defaults.output_format_stderr));
printf(" -v, --verbose verbose output\n"); printf(" -v, --verbose verbose output\n");
printf(" --progress print test progress indicators\n"); printf(" --progress print test progress indicators\n");
printf(" --no-warmup skip warmup runs before benchmarking\n");
printf("\n"); printf("\n");
printf("test parameters:\n"); printf("test parameters:\n");
printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str()); printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
@ -425,6 +428,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
params.prio = cmd_params_defaults.prio; params.prio = cmd_params_defaults.prio;
params.delay = cmd_params_defaults.delay; params.delay = cmd_params_defaults.delay;
params.progress = cmd_params_defaults.progress; params.progress = cmd_params_defaults.progress;
params.no_warmup = cmd_params_defaults.no_warmup;
for (int i = 1; i < argc; i++) { for (int i = 1; i < argc; i++) {
arg = argv[i]; arg = argv[i];
@ -798,6 +802,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
params.verbose = true; params.verbose = true;
} else if (arg == "--progress") { } else if (arg == "--progress") {
params.progress = true; params.progress = true;
} else if (arg == "--no-warmup") {
params.no_warmup = true;
} else { } else {
invalid_param = true; invalid_param = true;
break; break;
@ -1925,6 +1931,7 @@ int main(int argc, char ** argv) {
llama_attach_threadpool(ctx, threadpool, NULL); llama_attach_threadpool(ctx, threadpool, NULL);
// warmup run // warmup run
if (!params.no_warmup) {
if (t.n_prompt > 0) { if (t.n_prompt > 0) {
if (params.progress) { if (params.progress) {
fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup prompt run\n", params_idx, params_count); fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup prompt run\n", params_idx, params_count);
@ -1946,6 +1953,7 @@ int main(int argc, char ** argv) {
exit(1); exit(1);
} }
} }
}
for (int i = 0; i < params.reps; i++) { for (int i = 0; i < params.reps; i++) {
llama_memory_clear(llama_get_memory(ctx), false); llama_memory_clear(llama_get_memory(ctx), false);