diff --git a/common/arg.cpp b/common/arg.cpp
index e35417de..aface844 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2783,7 +2783,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP"));
add_opt(common_arg(
{"--cache-reuse"}, "N",
- string_format("min chunk size to attempt reusing from the cache via KV shifting (default: %d)", params.n_cache_reuse),
+ string_format(
+ "min chunk size to attempt reusing from the cache via KV shifting (default: %d)\n"
+ "[(card)](https://ggml.ai/f0.png)", params.n_cache_reuse
+ ),
[](common_params & params, int value) {
params.n_cache_reuse = value;
}
diff --git a/examples/server/README.md b/examples/server/README.md
index a2a09032..61446a0b 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -154,7 +154,7 @@ The project is under active development, and we are [looking for feedback and co
| `--ssl-cert-file FNAME` | path to file a PEM-encoded SSL certificate
(env: LLAMA_ARG_SSL_CERT_FILE) |
| `-to, --timeout N` | server read/write timeout in seconds (default: 600)
(env: LLAMA_ARG_TIMEOUT) |
| `--threads-http N` | number of threads used to process HTTP requests (default: -1)
(env: LLAMA_ARG_THREADS_HTTP) |
-| `--cache-reuse N` | min chunk size to attempt reusing from the cache via KV shifting (default: 0)
(env: LLAMA_ARG_CACHE_REUSE) |
+| `--cache-reuse N` | min chunk size to attempt reusing from the cache via KV shifting (default: 0)
[(card)](https://ggml.ai/f0.png)
(env: LLAMA_ARG_CACHE_REUSE) |
| `--metrics` | enable prometheus compatible metrics endpoint (default: disabled)
(env: LLAMA_ARG_ENDPOINT_METRICS) |
| `--slots` | enable slots monitoring endpoint (default: disabled)
(env: LLAMA_ARG_ENDPOINT_SLOTS) |
| `--props` | enable changing global properties via POST /props (default: disabled)
(env: LLAMA_ARG_ENDPOINT_PROPS) |