common : reimplement logging (#9418)
https://github.com/ggerganov/llama.cpp/pull/9418
This commit is contained in:
parent
e6deac31f7
commit
6262d13e0b
54 changed files with 2092 additions and 2419 deletions
|
@ -51,21 +51,23 @@ endif()
|
|||
set(TARGET common)
|
||||
|
||||
add_library(${TARGET} STATIC
|
||||
base64.hpp
|
||||
common.h
|
||||
common.cpp
|
||||
arg.h
|
||||
arg.cpp
|
||||
sampling.h
|
||||
sampling.cpp
|
||||
console.h
|
||||
arg.h
|
||||
base64.hpp
|
||||
common.cpp
|
||||
common.h
|
||||
console.cpp
|
||||
json.hpp
|
||||
console.h
|
||||
json-schema-to-grammar.cpp
|
||||
train.h
|
||||
train.cpp
|
||||
ngram-cache.h
|
||||
json.hpp
|
||||
log.cpp
|
||||
log.h
|
||||
ngram-cache.cpp
|
||||
ngram-cache.h
|
||||
sampling.cpp
|
||||
sampling.h
|
||||
train.cpp
|
||||
train.h
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_LIBS)
|
||||
|
|
115
common/arg.cpp
115
common/arg.cpp
|
@ -1,15 +1,17 @@
|
|||
#include "arg.h"
|
||||
|
||||
#include "log.h"
|
||||
#include "sampling.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <climits>
|
||||
#include <cstdarg>
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
#include <cstdarg>
|
||||
#include <climits>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "json-schema-to-grammar.h"
|
||||
|
||||
|
@ -383,20 +385,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
exit(0);
|
||||
}
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"-v", "--verbose"},
|
||||
"print verbose information",
|
||||
[](gpt_params & params) {
|
||||
params.verbosity = 1;
|
||||
}
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--verbosity"}, "N",
|
||||
format("set specific verbosity level (default: %d)", params.verbosity),
|
||||
[](gpt_params & params, int value) {
|
||||
params.verbosity = value;
|
||||
}
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--verbose-prompt"},
|
||||
format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
|
||||
|
@ -417,7 +405,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
[](gpt_params & params) {
|
||||
params.use_color = true;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
|
||||
add_opt(llama_arg(
|
||||
{"-t", "--threads"}, "N",
|
||||
format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
|
||||
|
@ -876,7 +864,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
params.input_prefix = value;
|
||||
params.enable_chat_template = false;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
|
||||
add_opt(llama_arg(
|
||||
{"--in-suffix"}, "STRING",
|
||||
"string to suffix after user inputs with (default: empty)",
|
||||
|
@ -884,7 +872,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
params.input_suffix = value;
|
||||
params.enable_chat_template = false;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
|
||||
add_opt(llama_arg(
|
||||
{"--no-warmup"},
|
||||
"skip warming up the model with an empty run",
|
||||
|
@ -1824,19 +1812,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
params.system_prompt = system_prompt;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(llama_arg(
|
||||
{"--log-format"}, "{text, json}",
|
||||
"log output format: json or text (default: json)",
|
||||
[](gpt_params & params, const std::string & value) {
|
||||
if (value == "json") {
|
||||
params.log_json = true;
|
||||
} else if (value == "text") {
|
||||
params.log_json = false;
|
||||
} else {
|
||||
throw std::invalid_argument("invalid value");
|
||||
}
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(llama_arg(
|
||||
{"--metrics"},
|
||||
format("enable prometheus compatible metrics endpoint (default: %s)", params.endpoint_metrics ? "enabled" : "disabled"),
|
||||
|
@ -1956,39 +1931,57 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
else { std::invalid_argument("invalid value"); }
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_BENCH}));
|
||||
#ifndef LOG_DISABLE_LOGS
|
||||
// TODO: make this looks less weird
|
||||
add_opt(llama_arg(
|
||||
{"--log-test"},
|
||||
"Log test",
|
||||
[](gpt_params &) { log_param_single_parse("--log-test"); }
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--log-disable"},
|
||||
"Log disable",
|
||||
[](gpt_params &) { log_param_single_parse("--log-disable"); }
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--log-enable"},
|
||||
"Log enable",
|
||||
[](gpt_params &) { log_param_single_parse("--log-enable"); }
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--log-new"},
|
||||
"Log new",
|
||||
[](gpt_params &) { log_param_single_parse("--log-new"); }
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--log-append"},
|
||||
"Log append",
|
||||
[](gpt_params &) { log_param_single_parse("--log-append"); }
|
||||
[](gpt_params &) {
|
||||
gpt_log_pause(gpt_log_main());
|
||||
}
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--log-file"}, "FNAME",
|
||||
"Log file",
|
||||
[](gpt_params &, const std::string & value) { log_param_pair_parse(false, "--log-file", value); }
|
||||
"Log to file",
|
||||
[](gpt_params &, const std::string & value) {
|
||||
gpt_log_set_file(gpt_log_main(), value.c_str());
|
||||
}
|
||||
));
|
||||
#endif // LOG_DISABLE_LOGS
|
||||
add_opt(llama_arg(
|
||||
{"--log-colors"},
|
||||
"Enable colored logging",
|
||||
[](gpt_params &) {
|
||||
gpt_log_set_colors(gpt_log_main(), true);
|
||||
}
|
||||
).set_env("LLAMA_LOG_COLORS"));
|
||||
add_opt(llama_arg(
|
||||
{"-v", "--verbose", "--log-verbose"},
|
||||
"Set verbosity level to infinity (i.e. log all messages, useful for debugging)",
|
||||
[](gpt_params & params) {
|
||||
params.verbosity = INT_MAX;
|
||||
gpt_log_set_verbosity_thold(INT_MAX);
|
||||
}
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"-lv", "--verbosity", "--log-verbosity"}, "N",
|
||||
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
|
||||
[](gpt_params & params, int value) {
|
||||
params.verbosity = value;
|
||||
gpt_log_set_verbosity_thold(value);
|
||||
}
|
||||
).set_env("LLAMA_LOG_VERBOSITY"));
|
||||
add_opt(llama_arg(
|
||||
{"--log-prefix"},
|
||||
"Enable prefx in log messages",
|
||||
[](gpt_params &) {
|
||||
gpt_log_set_prefix(gpt_log_main(), true);
|
||||
}
|
||||
).set_env("LLAMA_LOG_PREFIX"));
|
||||
add_opt(llama_arg(
|
||||
{"--log-timestamps"},
|
||||
"Enable timestamps in log messages",
|
||||
[](gpt_params &) {
|
||||
gpt_log_set_timestamps(gpt_log_main(), true);
|
||||
}
|
||||
).set_env("LLAMA_LOG_TIMESTAMPS"));
|
||||
|
||||
return ctx_arg;
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#endif
|
||||
|
||||
#include "common.h"
|
||||
#include "log.h"
|
||||
// Change JSON_ASSERT from assert() to GGML_ASSERT:
|
||||
#define JSON_ASSERT GGML_ASSERT
|
||||
#include "json.hpp"
|
||||
|
@ -25,6 +26,7 @@
|
|||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
#include <sys/types.h>
|
||||
|
@ -48,7 +50,6 @@
|
|||
#if defined(LLAMA_USE_CURL)
|
||||
#include <curl/curl.h>
|
||||
#include <curl/easy.h>
|
||||
#include <thread>
|
||||
#include <future>
|
||||
#endif
|
||||
|
||||
|
@ -226,7 +227,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
|||
}
|
||||
|
||||
if (!SetPriorityClass(GetCurrentProcess(), p)) {
|
||||
fprintf(stderr, "warn: failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
|
||||
LOG_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -251,7 +252,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
|||
}
|
||||
|
||||
if (!setpriority(PRIO_PROCESS, 0, p)) {
|
||||
fprintf(stderr, "warn: failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
|
||||
LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -284,14 +285,14 @@ void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model)
|
|||
|
||||
if (n_set && n_set < cpuparams.n_threads) {
|
||||
// Not enough set bits, may experience performance issues.
|
||||
fprintf(stderr, "warn: Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
|
||||
LOG_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
|
||||
}
|
||||
}
|
||||
|
||||
bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) {
|
||||
size_t dash_loc = range.find('-');
|
||||
if (dash_loc == std::string::npos) {
|
||||
fprintf(stderr, "Format of CPU range is invalid! Expected [<start>]-[<end>].\n");
|
||||
LOG_ERR("Format of CPU range is invalid! Expected [<start>]-[<end>].\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -303,7 +304,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THRE
|
|||
} else {
|
||||
start_i = std::stoull(range.substr(0, dash_loc));
|
||||
if (start_i >= GGML_MAX_N_THREADS) {
|
||||
fprintf(stderr, "Start index out of bounds!\n");
|
||||
LOG_ERR("Start index out of bounds!\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -313,7 +314,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THRE
|
|||
} else {
|
||||
end_i = std::stoull(range.substr(dash_loc + 1));
|
||||
if (end_i >= GGML_MAX_N_THREADS) {
|
||||
fprintf(stderr, "End index out of bounds!\n");
|
||||
LOG_ERR("End index out of bounds!\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -348,7 +349,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
|
|||
} else if (c >= 'A' && c <= 'F') {
|
||||
id -= 'A' - 10;
|
||||
} else {
|
||||
fprintf(stderr, "Invalid hex character '%c' at position %d\n", c, int32_t(i));
|
||||
LOG_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -361,6 +362,22 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
|
|||
return true;
|
||||
}
|
||||
|
||||
void gpt_init() {
|
||||
llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) {
|
||||
if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_thold) {
|
||||
gpt_log_add(gpt_log_main(), level, "%s", text);
|
||||
}
|
||||
}, NULL);
|
||||
|
||||
#ifdef NDEBUG
|
||||
const char * build_type = "";
|
||||
#else
|
||||
const char * build_type = " (debug)";
|
||||
#endif
|
||||
|
||||
LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
|
||||
}
|
||||
|
||||
std::string gpt_params_get_system_info(const gpt_params & params) {
|
||||
std::ostringstream os;
|
||||
|
||||
|
@ -441,6 +458,94 @@ void string_replace_all(std::string & s, const std::string & search, const std::
|
|||
s = std::move(builder);
|
||||
}
|
||||
|
||||
std::string string_from(bool value) {
|
||||
return value ? "true" : "false";
|
||||
}
|
||||
|
||||
std::string string_from(const std::vector<int> & values) {
|
||||
std::stringstream buf;
|
||||
|
||||
buf << "[ ";
|
||||
bool first = true;
|
||||
for (auto e : values) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
buf << ", ";
|
||||
}
|
||||
buf << std::to_string(e);
|
||||
}
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens) {
|
||||
std::stringstream buf;
|
||||
|
||||
buf << "[ ";
|
||||
|
||||
bool first = true;
|
||||
for (const auto & token : tokens) {
|
||||
if (!first) {
|
||||
buf << ", ";
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
|
||||
auto detokenized = llama_token_to_piece(ctx, token);
|
||||
|
||||
detokenized.erase(
|
||||
std::remove_if(
|
||||
detokenized.begin(),
|
||||
detokenized.end(),
|
||||
[](const unsigned char c) { return !std::isprint(c); }),
|
||||
detokenized.end());
|
||||
|
||||
buf << "'" << detokenized << "'"
|
||||
<< ":" << std::to_string(token);
|
||||
}
|
||||
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) {
|
||||
std::stringstream buf;
|
||||
|
||||
buf << "[ ";
|
||||
|
||||
bool first = true;
|
||||
for (int i = 0; i < batch.n_tokens; ++i) {
|
||||
if (!first) {
|
||||
buf << ", ";
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
|
||||
auto detokenized = llama_token_to_piece(ctx, batch.token[i]);
|
||||
|
||||
detokenized.erase(
|
||||
std::remove_if(
|
||||
detokenized.begin(),
|
||||
detokenized.end(),
|
||||
[](const unsigned char c) { return !std::isprint(c); }),
|
||||
detokenized.end());
|
||||
|
||||
buf << "\n" << std::to_string(i)
|
||||
<< ":token '" << detokenized << "'"
|
||||
<< ":pos " << std::to_string(batch.pos[i])
|
||||
<< ":n_seq_id " << std::to_string(batch.n_seq_id[i])
|
||||
<< ":seq_id " << std::to_string(batch.seq_id[i][0])
|
||||
<< ":logits " << std::to_string(batch.logits[i]);
|
||||
}
|
||||
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
void string_process_escapes(std::string & input) {
|
||||
std::size_t input_len = input.length();
|
||||
std::size_t output_idx = 0;
|
||||
|
@ -481,7 +586,7 @@ void string_process_escapes(std::string & input) {
|
|||
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
|
||||
const char * sep = strchr(data, '=');
|
||||
if (sep == nullptr || sep - data >= 128) {
|
||||
fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data);
|
||||
LOG_ERR("%s: malformed KV override '%s'\n", __func__, data);
|
||||
return false;
|
||||
}
|
||||
llama_model_kv_override kvo;
|
||||
|
@ -504,20 +609,20 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
|
|||
} else if (std::strcmp(sep, "false") == 0) {
|
||||
kvo.val_bool = false;
|
||||
} else {
|
||||
fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
|
||||
LOG_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data);
|
||||
return false;
|
||||
}
|
||||
} else if (strncmp(sep, "str:", 4) == 0) {
|
||||
sep += 4;
|
||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
|
||||
if (strlen(sep) > 127) {
|
||||
fprintf(stderr, "%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
|
||||
LOG_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
|
||||
return false;
|
||||
}
|
||||
strncpy(kvo.val_str, sep, 127);
|
||||
kvo.val_str[127] = '\0';
|
||||
} else {
|
||||
fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data);
|
||||
LOG_ERR("%s: invalid type for KV override '%s'\n", __func__, data);
|
||||
return false;
|
||||
}
|
||||
overrides.emplace_back(std::move(kvo));
|
||||
|
@ -729,7 +834,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
}
|
||||
|
||||
if (model == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
return iparams;
|
||||
}
|
||||
|
||||
|
@ -737,7 +842,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
|
||||
llama_context * lctx = llama_new_context_with_model(model, cparams);
|
||||
if (lctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str());
|
||||
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
|
||||
llama_free_model(model);
|
||||
return iparams;
|
||||
}
|
||||
|
@ -773,7 +878,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
loaded_la.scale = la.scale;
|
||||
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
|
||||
if (loaded_la.adapter == nullptr) {
|
||||
fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
||||
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
||||
llama_free(lctx);
|
||||
llama_free_model(model);
|
||||
return iparams;
|
||||
|
@ -785,12 +890,12 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
}
|
||||
|
||||
if (params.sparams.ignore_eos && llama_token_eos(model) == -1) {
|
||||
fprintf(stderr, "%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
|
||||
LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
|
||||
params.sparams.ignore_eos = false;
|
||||
}
|
||||
|
||||
if (params.warmup) {
|
||||
LOG("warming up the model with an empty run\n");
|
||||
LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
|
||||
|
||||
std::vector<llama_token> tmp;
|
||||
llama_token bos = llama_token_bos(model);
|
||||
|
@ -955,7 +1060,7 @@ static bool curl_perform_with_retry(const std::string& url, CURL* curl, int max_
|
|||
int remaining_attempts = max_attempts;
|
||||
|
||||
while (remaining_attempts > 0) {
|
||||
fprintf(stderr, "%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
|
||||
LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
if (res == CURLE_OK) {
|
||||
|
@ -963,13 +1068,14 @@ static bool curl_perform_with_retry(const std::string& url, CURL* curl, int max_
|
|||
}
|
||||
|
||||
int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000;
|
||||
fprintf(stderr, "%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
|
||||
LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
|
||||
|
||||
remaining_attempts--;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
||||
LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -978,7 +1084,7 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||
// Initialize libcurl
|
||||
std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl(curl_easy_init(), &curl_easy_cleanup);
|
||||
if (!curl) {
|
||||
fprintf(stderr, "%s: error initializing libcurl\n", __func__);
|
||||
LOG_ERR("%s: error initializing libcurl\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1019,11 +1125,11 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||
if (metadata_in.good()) {
|
||||
try {
|
||||
metadata_in >> metadata;
|
||||
fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
||||
LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
||||
if (metadata.contains("url") && metadata.at("url").is_string()) {
|
||||
auto previous_url = metadata.at("url").get<std::string>();
|
||||
if (previous_url != url) {
|
||||
fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
|
||||
LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1034,12 +1140,12 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||
last_modified = metadata.at("lastModified");
|
||||
}
|
||||
} catch (const nlohmann::json::exception & e) {
|
||||
fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
||||
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "%s: no previous model file found %s\n", __func__, path.c_str());
|
||||
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
||||
}
|
||||
|
||||
// Send a HEAD request to retrieve the etag and last-modified headers
|
||||
|
@ -1087,26 +1193,26 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||
// HEAD not supported, we don't know if the file has changed
|
||||
// force trigger downloading
|
||||
force_download = true;
|
||||
fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
||||
LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
||||
}
|
||||
}
|
||||
|
||||
bool should_download = !file_exists || force_download;
|
||||
if (!should_download) {
|
||||
if (!etag.empty() && etag != headers.etag) {
|
||||
fprintf(stderr, "%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
|
||||
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
|
||||
should_download = true;
|
||||
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
||||
fprintf(stderr, "%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
|
||||
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
|
||||
should_download = true;
|
||||
}
|
||||
}
|
||||
if (should_download) {
|
||||
std::string path_temporary = path + ".downloadInProgress";
|
||||
if (file_exists) {
|
||||
fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
||||
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
||||
if (remove(path.c_str()) != 0) {
|
||||
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path.c_str());
|
||||
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1121,7 +1227,7 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||
|
||||
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
|
||||
if (!outfile) {
|
||||
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path.c_str());
|
||||
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1152,7 +1258,7 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||
};
|
||||
|
||||
// start the download
|
||||
fprintf(stderr, "%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
|
||||
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
|
||||
llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
|
||||
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
|
||||
if (!was_perform_successful) {
|
||||
|
@ -1162,7 +1268,7 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||
long http_code = 0;
|
||||
curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
||||
if (http_code < 200 || http_code >= 400) {
|
||||
fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code);
|
||||
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1176,10 +1282,10 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||
{"lastModified", headers.last_modified}
|
||||
});
|
||||
std::ofstream(metadata_path) << metadata.dump(4);
|
||||
fprintf(stderr, "%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
||||
LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
||||
|
||||
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
||||
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
||||
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1194,7 +1300,7 @@ struct llama_model * llama_load_model_from_url(
|
|||
const struct llama_model_params & params) {
|
||||
// Basic validation of the model_url
|
||||
if (!model_url || strlen(model_url) == 0) {
|
||||
fprintf(stderr, "%s: invalid model_url\n", __func__);
|
||||
LOG_ERR("%s: invalid model_url\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -1211,7 +1317,7 @@ struct llama_model * llama_load_model_from_url(
|
|||
};
|
||||
auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params);
|
||||
if (!ctx_gguf) {
|
||||
fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, path_model);
|
||||
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, path_model);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -1231,14 +1337,12 @@ struct llama_model * llama_load_model_from_url(
|
|||
// and extract split URL and PATH prefixes
|
||||
{
|
||||
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) {
|
||||
fprintf(stderr, "\n%s: unexpected model file name: %s"
|
||||
" n_split=%d\n", __func__, path_model, n_split);
|
||||
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, path_model, n_split);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) {
|
||||
fprintf(stderr, "\n%s: unexpected model url: %s"
|
||||
" n_split=%d\n", __func__, model_url, n_split);
|
||||
LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url, n_split);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -1298,7 +1402,7 @@ struct llama_model * llama_load_model_from_url(
|
|||
const char * /*path_model*/,
|
||||
const char * /*hf_token*/,
|
||||
const struct llama_model_params & /*params*/) {
|
||||
fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
|
||||
LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1308,7 +1412,7 @@ struct llama_model * llama_load_model_from_hf(
|
|||
const char * /*path_model*/,
|
||||
const char * /*hf_token*/,
|
||||
const struct llama_model_params & /*params*/) {
|
||||
fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
|
||||
LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1636,13 +1740,13 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
|||
};
|
||||
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
|
||||
if (!ctx_gguf) {
|
||||
fprintf(stderr, "%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str());
|
||||
LOG_ERR("%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str());
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||
if (n_tensors == 0) {
|
||||
fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
|
||||
LOG_WRN("%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_tensors; i++) {
|
||||
|
@ -1660,23 +1764,23 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
|||
}
|
||||
}
|
||||
if (layer_idx < 0) {
|
||||
fprintf(stderr, "%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
|
||||
LOG_ERR("%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
} else if (layer_idx == 0) {
|
||||
fprintf(stderr, "%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
|
||||
LOG_ERR("%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
|
||||
if (tensor->type != GGML_TYPE_F32) {
|
||||
fprintf(stderr, "%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str());
|
||||
LOG_ERR("%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
if (ggml_n_dims(tensor) != 1) {
|
||||
fprintf(stderr, "%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str());
|
||||
LOG_ERR("%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
|
@ -1684,7 +1788,7 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
|||
if (result.n_embd == -1) {
|
||||
result.n_embd = ggml_nelements(tensor);
|
||||
} else if (ggml_nelements(tensor) != result.n_embd) {
|
||||
fprintf(stderr, "%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str());
|
||||
LOG_ERR("%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
|
@ -1701,7 +1805,7 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
|||
}
|
||||
|
||||
if (result.n_embd == -1) {
|
||||
fprintf(stderr, "%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str());
|
||||
LOG_WRN("%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str());
|
||||
result.data.clear();
|
||||
}
|
||||
|
||||
|
@ -1722,7 +1826,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
|
|||
break;
|
||||
}
|
||||
if (result.n_embd != -1 && result.n_embd != cur.n_embd) {
|
||||
fprintf(stderr, "%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str());
|
||||
LOG_ERR("%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
|
@ -1738,7 +1842,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
|
|||
}
|
||||
|
||||
if (result.n_embd == -1) {
|
||||
fprintf(stderr, "%s: no valid control vector files passed\n", __func__);
|
||||
LOG_ERR("%s: no valid control vector files passed\n", __func__);
|
||||
result.data.clear();
|
||||
}
|
||||
|
||||
|
|
|
@ -4,11 +4,9 @@
|
|||
|
||||
#include "llama.h"
|
||||
|
||||
#define LOG_NO_FILE_LINE_FUNCTION
|
||||
#include "log.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DIRECTORY_SEPARATOR '\\'
|
||||
|
@ -343,6 +341,10 @@ struct gpt_params {
|
|||
bool batched_bench_output_jsonl = false;
|
||||
};
|
||||
|
||||
// call once at the start of a program if it uses libcommon
|
||||
// initializes the logging system and prints info about the build
|
||||
void gpt_init();
|
||||
|
||||
std::string gpt_params_get_system_info(const gpt_params & params);
|
||||
|
||||
bool parse_cpu_range(const std::string& range, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
||||
|
@ -378,6 +380,11 @@ static std::vector<T> string_split(const std::string & str, char delim) {
|
|||
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
||||
void string_process_escapes(std::string & input);
|
||||
|
||||
std::string string_from(bool value);
|
||||
std::string string_from(const std::vector<int> & values);
|
||||
std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens);
|
||||
std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch);
|
||||
|
||||
//
|
||||
// Filesystem utils
|
||||
//
|
||||
|
|
401
common/log.cpp
Normal file
401
common/log.cpp
Normal file
|
@ -0,0 +1,401 @@
|
|||
#include "log.h"
|
||||
|
||||
#include <condition_variable>
|
||||
#include <cstdarg>
|
||||
#include <cstdio>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
int gpt_log_verbosity_thold = LOG_DEFAULT_LLAMA;
|
||||
|
||||
void gpt_log_set_verbosity_thold(int verbosity) {
|
||||
gpt_log_verbosity_thold = verbosity;
|
||||
}
|
||||
|
||||
#define LOG_COL_DEFAULT "\033[0m"
|
||||
#define LOG_COL_BOLD "\033[1m"
|
||||
#define LOG_COL_RED "\033[31m"
|
||||
#define LOG_COL_GREEN "\033[32m"
|
||||
#define LOG_COL_YELLOW "\033[33m"
|
||||
#define LOG_COL_BLUE "\033[34m"
|
||||
#define LOG_COL_MAGENTA "\033[35m"
|
||||
#define LOG_COL_CYAN "\033[36m"
|
||||
#define LOG_COL_WHITE "\033[37m"
|
||||
|
||||
static int64_t t_us() {
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
// colors
|
||||
enum gpt_log_col : int {
|
||||
GPT_LOG_COL_DEFAULT = 0,
|
||||
GPT_LOG_COL_BOLD,
|
||||
GPT_LOG_COL_RED,
|
||||
GPT_LOG_COL_GREEN,
|
||||
GPT_LOG_COL_YELLOW,
|
||||
GPT_LOG_COL_BLUE,
|
||||
GPT_LOG_COL_MAGENTA,
|
||||
GPT_LOG_COL_CYAN,
|
||||
GPT_LOG_COL_WHITE,
|
||||
};
|
||||
|
||||
// disable colors by default
|
||||
static std::vector<const char *> g_col = {
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
struct gpt_log_entry {
|
||||
enum ggml_log_level level;
|
||||
|
||||
bool prefix;
|
||||
|
||||
int64_t timestamp;
|
||||
|
||||
std::vector<char> msg;
|
||||
|
||||
// signals the worker thread to stop
|
||||
bool is_end;
|
||||
|
||||
void print(FILE * file = nullptr) const {
|
||||
FILE * fcur = file;
|
||||
if (!fcur) {
|
||||
// stderr displays DBG messages only when their verbosity level is not higher than the threshold
|
||||
// these messages will still be logged to a file
|
||||
if (level == GGML_LOG_LEVEL_DEBUG && gpt_log_verbosity_thold < LOG_DEFAULT_DEBUG) {
|
||||
return;
|
||||
}
|
||||
|
||||
fcur = stdout;
|
||||
|
||||
if (level != GGML_LOG_LEVEL_NONE) {
|
||||
fcur = stderr;
|
||||
}
|
||||
}
|
||||
|
||||
if (level != GGML_LOG_LEVEL_NONE && prefix) {
|
||||
if (timestamp) {
|
||||
// [M.s.ms.us]
|
||||
fprintf(fcur, "%s%d.%02d.%03d.%03d%s ",
|
||||
g_col[GPT_LOG_COL_BLUE],
|
||||
(int) (timestamp / 1000000 / 60),
|
||||
(int) (timestamp / 1000000 % 60),
|
||||
(int) (timestamp / 1000 % 1000),
|
||||
(int) (timestamp % 1000),
|
||||
g_col[GPT_LOG_COL_DEFAULT]);
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_INFO: fprintf(fcur, "%sI %s", g_col[GPT_LOG_COL_GREEN], g_col[GPT_LOG_COL_DEFAULT]); break;
|
||||
case GGML_LOG_LEVEL_WARN: fprintf(fcur, "%sW %s", g_col[GPT_LOG_COL_MAGENTA], "" ); break;
|
||||
case GGML_LOG_LEVEL_ERROR: fprintf(fcur, "%sE %s", g_col[GPT_LOG_COL_RED], "" ); break;
|
||||
case GGML_LOG_LEVEL_DEBUG: fprintf(fcur, "%sD %s", g_col[GPT_LOG_COL_YELLOW], "" ); break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(fcur, "%s", msg.data());
|
||||
|
||||
if (level == GGML_LOG_LEVEL_WARN || level == GGML_LOG_LEVEL_ERROR || level == GGML_LOG_LEVEL_DEBUG) {
|
||||
fprintf(fcur, "%s", g_col[GPT_LOG_COL_DEFAULT]);
|
||||
}
|
||||
|
||||
fflush(fcur);
|
||||
}
|
||||
};
|
||||
|
||||
struct gpt_log {
|
||||
// default capacity - will be expanded if needed
|
||||
gpt_log() : gpt_log(256) {}
|
||||
|
||||
gpt_log(size_t capacity) {
|
||||
file = nullptr;
|
||||
prefix = false;
|
||||
timestamps = false;
|
||||
running = false;
|
||||
t_start = t_us();
|
||||
|
||||
// initial message size - will be expanded if longer messages arrive
|
||||
entries.resize(capacity);
|
||||
for (auto & entry : entries) {
|
||||
entry.msg.resize(256);
|
||||
}
|
||||
|
||||
head = 0;
|
||||
tail = 0;
|
||||
|
||||
resume();
|
||||
}
|
||||
|
||||
~gpt_log() {
|
||||
pause();
|
||||
if (file) {
|
||||
fclose(file);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mtx;
|
||||
std::thread thrd;
|
||||
std::condition_variable cv;
|
||||
|
||||
FILE * file;
|
||||
|
||||
bool prefix;
|
||||
bool timestamps;
|
||||
bool running;
|
||||
|
||||
int64_t t_start;
|
||||
|
||||
// ring buffer of entries
|
||||
std::vector<gpt_log_entry> entries;
|
||||
size_t head;
|
||||
size_t tail;
|
||||
|
||||
// worker thread copies into this
|
||||
gpt_log_entry cur;
|
||||
|
||||
public:
|
||||
void add(enum ggml_log_level level, const char * fmt, va_list args) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
if (!running) {
|
||||
// discard messages while the worker thread is paused
|
||||
return;
|
||||
}
|
||||
|
||||
auto & entry = entries[tail];
|
||||
|
||||
{
|
||||
// cannot use args twice, so make a copy in case we need to expand the buffer
|
||||
va_list args_copy;
|
||||
va_copy(args_copy, args);
|
||||
|
||||
#if 1
|
||||
const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args);
|
||||
if (n >= entry.msg.size()) {
|
||||
entry.msg.resize(n + 1);
|
||||
vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args_copy);
|
||||
}
|
||||
#else
|
||||
// hack for bolding arguments
|
||||
|
||||
std::stringstream ss;
|
||||
for (int i = 0; fmt[i] != 0; i++) {
|
||||
if (fmt[i] == '%') {
|
||||
ss << LOG_COL_BOLD;
|
||||
while (fmt[i] != ' ' && fmt[i] != ')' && fmt[i] != ']' && fmt[i] != 0) ss << fmt[i++];
|
||||
ss << LOG_COL_DEFAULT;
|
||||
if (fmt[i] == 0) break;
|
||||
}
|
||||
ss << fmt[i];
|
||||
}
|
||||
const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args);
|
||||
if (n >= entry.msg.size()) {
|
||||
entry.msg.resize(n + 1);
|
||||
vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args_copy);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
entry.level = level;
|
||||
entry.prefix = prefix;
|
||||
entry.timestamp = 0;
|
||||
if (timestamps) {
|
||||
entry.timestamp = t_us() - t_start;
|
||||
}
|
||||
entry.is_end = false;
|
||||
|
||||
tail = (tail + 1) % entries.size();
|
||||
if (tail == head) {
|
||||
// expand the buffer
|
||||
std::vector<gpt_log_entry> new_entries(2*entries.size());
|
||||
|
||||
size_t new_tail = 0;
|
||||
|
||||
do {
|
||||
new_entries[new_tail] = std::move(entries[head]);
|
||||
|
||||
head = (head + 1) % entries.size();
|
||||
new_tail = (new_tail + 1);
|
||||
} while (head != tail);
|
||||
|
||||
head = 0;
|
||||
tail = new_tail;
|
||||
|
||||
for (size_t i = tail; i < new_entries.size(); i++) {
|
||||
new_entries[i].msg.resize(256);
|
||||
}
|
||||
|
||||
entries = std::move(new_entries);
|
||||
}
|
||||
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
void resume() {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
if (running) {
|
||||
return;
|
||||
}
|
||||
|
||||
running = true;
|
||||
|
||||
thrd = std::thread([this]() {
|
||||
while (true) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
cv.wait(lock, [this]() { return head != tail; });
|
||||
|
||||
cur = entries[head];
|
||||
|
||||
head = (head + 1) % entries.size();
|
||||
}
|
||||
|
||||
if (cur.is_end) {
|
||||
break;
|
||||
}
|
||||
|
||||
cur.print(); // stdout and stderr
|
||||
|
||||
if (file) {
|
||||
cur.print(file);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void pause() {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
if (!running) {
|
||||
return;
|
||||
}
|
||||
|
||||
running = false;
|
||||
|
||||
// push an entry to signal the worker thread to stop
|
||||
{
|
||||
auto & entry = entries[tail];
|
||||
entry.is_end = true;
|
||||
|
||||
tail = (tail + 1) % entries.size();
|
||||
}
|
||||
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
thrd.join();
|
||||
}
|
||||
|
||||
void set_file(const char * path) {
|
||||
pause();
|
||||
|
||||
if (file) {
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
if (path) {
|
||||
file = fopen(path, "w");
|
||||
} else {
|
||||
file = nullptr;
|
||||
}
|
||||
|
||||
resume();
|
||||
}
|
||||
|
||||
void set_colors(bool colors) {
|
||||
pause();
|
||||
|
||||
if (colors) {
|
||||
g_col[GPT_LOG_COL_DEFAULT] = LOG_COL_DEFAULT;
|
||||
g_col[GPT_LOG_COL_BOLD] = LOG_COL_BOLD;
|
||||
g_col[GPT_LOG_COL_RED] = LOG_COL_RED;
|
||||
g_col[GPT_LOG_COL_GREEN] = LOG_COL_GREEN;
|
||||
g_col[GPT_LOG_COL_YELLOW] = LOG_COL_YELLOW;
|
||||
g_col[GPT_LOG_COL_BLUE] = LOG_COL_BLUE;
|
||||
g_col[GPT_LOG_COL_MAGENTA] = LOG_COL_MAGENTA;
|
||||
g_col[GPT_LOG_COL_CYAN] = LOG_COL_CYAN;
|
||||
g_col[GPT_LOG_COL_WHITE] = LOG_COL_WHITE;
|
||||
} else {
|
||||
for (size_t i = 0; i < g_col.size(); i++) {
|
||||
g_col[i] = "";
|
||||
}
|
||||
}
|
||||
|
||||
resume();
|
||||
}
|
||||
|
||||
void set_prefix(bool prefix) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
this->prefix = prefix;
|
||||
}
|
||||
|
||||
void set_timestamps(bool timestamps) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
this->timestamps = timestamps;
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// public API
|
||||
//
|
||||
|
||||
struct gpt_log * gpt_log_init() {
|
||||
return new gpt_log;
|
||||
}
|
||||
|
||||
struct gpt_log * gpt_log_main() {
|
||||
static struct gpt_log log;
|
||||
|
||||
return &log;
|
||||
}
|
||||
|
||||
void gpt_log_pause(struct gpt_log * log) {
|
||||
log->pause();
|
||||
}
|
||||
|
||||
void gpt_log_resume(struct gpt_log * log) {
|
||||
log->resume();
|
||||
}
|
||||
|
||||
void gpt_log_free(struct gpt_log * log) {
|
||||
delete log;
|
||||
}
|
||||
|
||||
void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
log->add(level, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void gpt_log_set_file(struct gpt_log * log, const char * file) {
|
||||
log->set_file(file);
|
||||
}
|
||||
|
||||
void gpt_log_set_colors(struct gpt_log * log, bool colors) {
|
||||
log->set_colors(colors);
|
||||
}
|
||||
|
||||
void gpt_log_set_prefix(struct gpt_log * log, bool prefix) {
|
||||
log->set_prefix(prefix);
|
||||
}
|
||||
|
||||
void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps) {
|
||||
log->set_timestamps(timestamps);
|
||||
}
|
786
common/log.h
786
common/log.h
|
@ -1,724 +1,90 @@
|
|||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include "ggml.h" // for ggml_log_level
|
||||
|
||||
// --------------------------------
|
||||
//
|
||||
// Basic usage:
|
||||
//
|
||||
// --------
|
||||
//
|
||||
// The LOG() and LOG_TEE() macros are ready to go by default
|
||||
// they do not require any initialization.
|
||||
//
|
||||
// LOGLN() and LOG_TEELN() are variants which automatically
|
||||
// include \n character at the end of the log string.
|
||||
//
|
||||
// LOG() behaves exactly like printf, by default writing to a logfile.
|
||||
// LOG_TEE() additionally, prints to the screen too ( mimics Unix tee command ).
|
||||
//
|
||||
// Default logfile is named
|
||||
// "llama.<threadID>.log"
|
||||
// Default LOG_TEE() secondary output target is
|
||||
// stderr
|
||||
//
|
||||
// Logs can be dynamically disabled or enabled using functions:
|
||||
// log_disable()
|
||||
// and
|
||||
// log_enable()
|
||||
//
|
||||
// A log target can be changed with:
|
||||
// log_set_target( string )
|
||||
// creating and opening, or re-opening a file by string filename
|
||||
// or
|
||||
// log_set_target( FILE* )
|
||||
// allowing to point at stderr, stdout, or any valid FILE* file handler.
|
||||
//
|
||||
// --------
|
||||
//
|
||||
// End of Basic usage.
|
||||
//
|
||||
// --------------------------------
|
||||
|
||||
// Specifies a log target.
|
||||
// default uses log_handler() with "llama.log" log file
|
||||
// this can be changed, by defining LOG_TARGET
|
||||
// like so:
|
||||
//
|
||||
// #define LOG_TARGET (a valid FILE*)
|
||||
// #include "log.h"
|
||||
//
|
||||
// or it can be simply redirected to stdout or stderr
|
||||
// like so:
|
||||
//
|
||||
// #define LOG_TARGET stderr
|
||||
// #include "log.h"
|
||||
//
|
||||
// The log target can also be redirected to a different function
|
||||
// like so:
|
||||
//
|
||||
// #define LOG_TARGET log_handler_different()
|
||||
// #include "log.h"
|
||||
//
|
||||
// FILE* log_handler_different()
|
||||
// {
|
||||
// return stderr;
|
||||
// }
|
||||
//
|
||||
// or:
|
||||
//
|
||||
// #define LOG_TARGET log_handler_another_one("somelog.log")
|
||||
// #include "log.h"
|
||||
//
|
||||
// FILE* log_handler_another_one(char*filename)
|
||||
// {
|
||||
// static FILE* logfile = nullptr;
|
||||
// (...)
|
||||
// if( !logfile )
|
||||
// {
|
||||
// fopen(...)
|
||||
// }
|
||||
// (...)
|
||||
// return logfile
|
||||
// }
|
||||
//
|
||||
#ifndef LOG_TARGET
|
||||
#define LOG_TARGET log_handler()
|
||||
#endif
|
||||
|
||||
#ifndef LOG_TEE_TARGET
|
||||
#define LOG_TEE_TARGET stderr
|
||||
#endif
|
||||
|
||||
// Utility for synchronizing log configuration state
|
||||
// since std::optional was introduced only in c++17
|
||||
enum LogTriState
|
||||
{
|
||||
LogTriStateSame,
|
||||
LogTriStateFalse,
|
||||
LogTriStateTrue
|
||||
};
|
||||
|
||||
// Utility to obtain "pid" like unique process id and use it when creating log files.
|
||||
inline std::string log_get_pid()
|
||||
{
|
||||
static std::string pid;
|
||||
if (pid.empty())
|
||||
{
|
||||
// std::this_thread::get_id() is the most portable way of obtaining a "process id"
|
||||
// it's not the same as "pid" but is unique enough to solve multiple instances
|
||||
// trying to write to the same log.
|
||||
std::stringstream ss;
|
||||
ss << std::this_thread::get_id();
|
||||
pid = ss.str();
|
||||
}
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
// Utility function for generating log file names with unique id based on thread id.
|
||||
// invocation with log_filename_generator( "llama", "log" ) creates a string "llama.<number>.log"
|
||||
// where the number is a runtime id of the current thread.
|
||||
|
||||
#define log_filename_generator(log_file_basename, log_file_extension) log_filename_generator_impl(LogTriStateSame, log_file_basename, log_file_extension)
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline std::string log_filename_generator_impl(LogTriState multilog, const std::string & log_file_basename, const std::string & log_file_extension)
|
||||
{
|
||||
static bool _multilog = false;
|
||||
|
||||
if (multilog != LogTriStateSame)
|
||||
{
|
||||
_multilog = multilog == LogTriStateTrue;
|
||||
}
|
||||
|
||||
std::stringstream buf;
|
||||
|
||||
buf << log_file_basename;
|
||||
if (_multilog)
|
||||
{
|
||||
buf << ".";
|
||||
buf << log_get_pid();
|
||||
}
|
||||
buf << ".";
|
||||
buf << log_file_extension;
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
#ifndef LOG_DEFAULT_FILE_NAME
|
||||
#define LOG_DEFAULT_FILE_NAME log_filename_generator("llama", "log")
|
||||
#endif
|
||||
|
||||
// Utility for turning #define values into string literals
|
||||
// so we can have a define for stderr and
|
||||
// we can print "stderr" instead of literal stderr, etc.
|
||||
#define LOG_STRINGIZE1(s) #s
|
||||
#define LOG_STRINGIZE(s) LOG_STRINGIZE1(s)
|
||||
|
||||
#define LOG_TEE_TARGET_STRING LOG_STRINGIZE(LOG_TEE_TARGET)
|
||||
|
||||
// Allows disabling timestamps.
|
||||
// in order to disable, define LOG_NO_TIMESTAMPS
|
||||
// like so:
|
||||
//
|
||||
// #define LOG_NO_TIMESTAMPS
|
||||
// #include "log.h"
|
||||
//
|
||||
#ifndef LOG_NO_TIMESTAMPS
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||
#define LOG_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||
#else
|
||||
#define LOG_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||
#define LOG_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||
#endif
|
||||
#ifndef __GNUC__
|
||||
# define LOG_ATTRIBUTE_FORMAT(...)
|
||||
#elif defined(__MINGW32__)
|
||||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
||||
#else
|
||||
#define LOG_TIMESTAMP_FMT "%s"
|
||||
#define LOG_TIMESTAMP_VAL ,""
|
||||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||
#endif
|
||||
|
||||
#ifdef LOG_TEE_TIMESTAMPS
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_TEE_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||
#define LOG_TEE_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||
#else
|
||||
#define LOG_TEE_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||
#define LOG_TEE_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||
#endif
|
||||
#else
|
||||
#define LOG_TEE_TIMESTAMP_FMT "%s"
|
||||
#define LOG_TEE_TIMESTAMP_VAL ,""
|
||||
#endif
|
||||
#define LOG_DEFAULT_DEBUG 1
|
||||
#define LOG_DEFAULT_LLAMA 0
|
||||
|
||||
// Allows disabling file/line/function prefix
|
||||
// in order to disable, define LOG_NO_FILE_LINE_FUNCTION
|
||||
// like so:
|
||||
// needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower
|
||||
// set via gpt_log_set_verbosity()
|
||||
extern int gpt_log_verbosity_thold;
|
||||
|
||||
void gpt_log_set_verbosity_thold(int verbosity); // not thread-safe
|
||||
|
||||
// the gpt_log uses an internal worker thread to print/write log messages
|
||||
// when the worker thread is paused, incoming log messages are discarded
|
||||
struct gpt_log;
|
||||
|
||||
struct gpt_log * gpt_log_init();
|
||||
struct gpt_log * gpt_log_main(); // singleton, automatically destroys itself on exit
|
||||
void gpt_log_pause (struct gpt_log * log); // pause the worker thread, not thread-safe
|
||||
void gpt_log_resume(struct gpt_log * log); // resume the worker thread, not thread-safe
|
||||
void gpt_log_free (struct gpt_log * log);
|
||||
|
||||
LOG_ATTRIBUTE_FORMAT(3, 4)
|
||||
void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...);
|
||||
|
||||
// defaults: file = NULL, colors = false, prefix = false, timestamps = false
|
||||
//
|
||||
// #define LOG_NO_FILE_LINE_FUNCTION
|
||||
// #include "log.h"
|
||||
// regular log output:
|
||||
//
|
||||
#ifndef LOG_NO_FILE_LINE_FUNCTION
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_FLF_FMT "[%24s:%5d][%24s] "
|
||||
#define LOG_FLF_VAL , __FILE__, __LINE__, __FUNCTION__
|
||||
#else
|
||||
#define LOG_FLF_FMT "[%24s:%5ld][%24s] "
|
||||
#define LOG_FLF_VAL , __FILE__, (long)__LINE__, __FUNCTION__
|
||||
#endif
|
||||
#else
|
||||
#define LOG_FLF_FMT "%s"
|
||||
#define LOG_FLF_VAL ,""
|
||||
#endif
|
||||
|
||||
#ifdef LOG_TEE_FILE_LINE_FUNCTION
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_TEE_FLF_FMT "[%24s:%5d][%24s] "
|
||||
#define LOG_TEE_FLF_VAL , __FILE__, __LINE__, __FUNCTION__
|
||||
#else
|
||||
#define LOG_TEE_FLF_FMT "[%24s:%5ld][%24s] "
|
||||
#define LOG_TEE_FLF_VAL , __FILE__, (long)__LINE__, __FUNCTION__
|
||||
#endif
|
||||
#else
|
||||
#define LOG_TEE_FLF_FMT "%s"
|
||||
#define LOG_TEE_FLF_VAL ,""
|
||||
#endif
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
// USE LOG() INSTEAD
|
||||
// ggml_backend_metal_log_allocated_size: allocated buffer, size = 6695.84 MiB, ( 6695.91 / 21845.34)
|
||||
// llm_load_tensors: ggml ctx size = 0.27 MiB
|
||||
// llm_load_tensors: offloading 32 repeating layers to GPU
|
||||
// llm_load_tensors: offloading non-repeating layers to GPU
|
||||
//
|
||||
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
|
||||
#define LOG_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
// with prefix = true, timestamps = true, the log output will look like this:
|
||||
//
|
||||
// 0.00.035.060 D ggml_backend_metal_log_allocated_size: allocated buffer, size = 6695.84 MiB, ( 6695.91 / 21845.34)
|
||||
// 0.00.035.064 I llm_load_tensors: ggml ctx size = 0.27 MiB
|
||||
// 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
|
||||
// 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
|
||||
//
|
||||
// I - info (stdout, V = 0)
|
||||
// W - warning (stderr, V = 0)
|
||||
// E - error (stderr, V = 0)
|
||||
// D - debug (stderr, V = LOG_DEFAULT_DEBUG)
|
||||
//
|
||||
|
||||
void gpt_log_set_file (struct gpt_log * log, const char * file); // not thread-safe
|
||||
void gpt_log_set_colors (struct gpt_log * log, bool colors); // not thread-safe
|
||||
void gpt_log_set_prefix (struct gpt_log * log, bool prefix); // whether to output prefix to each log
|
||||
void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps); // whether to output timestamps in the prefix
|
||||
|
||||
// helper macros for logging
|
||||
// use these to avoid computing log arguments if the verbosity of the log is higher than the threshold
|
||||
//
|
||||
// for example:
|
||||
//
|
||||
// LOG_DBG("this is a debug message: %d\n", expensive_function());
|
||||
//
|
||||
// this will avoid calling expensive_function() if LOG_DEFAULT_DEBUG > gpt_log_verbosity_thold
|
||||
//
|
||||
|
||||
#define LOG_TMPL(level, verbosity, ...) \
|
||||
do { \
|
||||
if ((verbosity) <= gpt_log_verbosity_thold) { \
|
||||
gpt_log_add(gpt_log_main(), (level), __VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define LOG_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
// USE LOG_TEE() INSTEAD
|
||||
//
|
||||
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
|
||||
#define LOG_TEE_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
if (LOG_TARGET != nullptr && LOG_TARGET != stdout && LOG_TARGET != stderr && LOG_TEE_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL, __VA_ARGS__); \
|
||||
fflush(LOG_TEE_TARGET); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define LOG_TEE_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
if (LOG_TARGET != nullptr && LOG_TARGET != stdout && LOG_TARGET != stderr && LOG_TEE_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL "", ##__VA_ARGS__); \
|
||||
fflush(LOG_TEE_TARGET); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
|
||||
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
||||
|
||||
// The '\0' as a last argument, is a trick to bypass the silly
|
||||
// "warning: ISO C++11 requires at least one argument for the "..." in a variadic macro"
|
||||
// so we can have a single macro which can be called just like printf.
|
||||
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
|
||||
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
|
||||
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
|
||||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
|
||||
|
||||
// Main LOG macro.
|
||||
// behaves like printf, and supports arguments the exact same way.
|
||||
//
|
||||
#if !defined(_MSC_VER) || defined(__clang__)
|
||||
#define LOG(...) LOG_IMPL(__VA_ARGS__, "")
|
||||
#else
|
||||
#define LOG(str, ...) LOG_IMPL("%s" str, "", ##__VA_ARGS__, "")
|
||||
#endif
|
||||
|
||||
// Main TEE macro.
|
||||
// does the same as LOG
|
||||
// and
|
||||
// simultaneously writes stderr.
|
||||
//
|
||||
// Secondary target can be changed just like LOG_TARGET
|
||||
// by defining LOG_TEE_TARGET
|
||||
//
|
||||
#if !defined(_MSC_VER) || defined(__clang__)
|
||||
#define LOG_TEE(...) LOG_TEE_IMPL(__VA_ARGS__, "")
|
||||
#else
|
||||
#define LOG_TEE(str, ...) LOG_TEE_IMPL("%s" str, "", ##__VA_ARGS__, "")
|
||||
#endif
|
||||
|
||||
// LOG macro variants with auto endline.
|
||||
#if !defined(_MSC_VER) || defined(__clang__)
|
||||
#define LOGLN(...) LOG_IMPL(__VA_ARGS__, "\n")
|
||||
#define LOG_TEELN(...) LOG_TEE_IMPL(__VA_ARGS__, "\n")
|
||||
#else
|
||||
#define LOGLN(str, ...) LOG_IMPL("%s" str, "", ##__VA_ARGS__, "\n")
|
||||
#define LOG_TEELN(str, ...) LOG_TEE_IMPL("%s" str, "", ##__VA_ARGS__, "\n")
|
||||
#endif
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_handler1_impl(bool change = false, LogTriState append = LogTriStateSame, LogTriState disable = LogTriStateSame, const std::string & filename = LOG_DEFAULT_FILE_NAME, FILE *target = nullptr)
|
||||
{
|
||||
static bool _initialized = false;
|
||||
static bool _append = false;
|
||||
static bool _disabled = filename.empty() && target == nullptr;
|
||||
static std::string log_current_filename{filename};
|
||||
static FILE *log_current_target{target};
|
||||
static FILE *logfile = nullptr;
|
||||
|
||||
if (change)
|
||||
{
|
||||
if (append != LogTriStateSame)
|
||||
{
|
||||
_append = append == LogTriStateTrue;
|
||||
return logfile;
|
||||
}
|
||||
|
||||
if (disable == LogTriStateTrue)
|
||||
{
|
||||
// Disable primary target
|
||||
_disabled = true;
|
||||
}
|
||||
// If previously disabled, only enable, and keep previous target
|
||||
else if (disable == LogTriStateFalse)
|
||||
{
|
||||
_disabled = false;
|
||||
}
|
||||
// Otherwise, process the arguments
|
||||
else if (log_current_filename != filename || log_current_target != target)
|
||||
{
|
||||
_initialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (_disabled)
|
||||
{
|
||||
// Log is disabled
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (_initialized)
|
||||
{
|
||||
// with fallback in case something went wrong
|
||||
return logfile ? logfile : stderr;
|
||||
}
|
||||
|
||||
// do the (re)initialization
|
||||
if (target != nullptr)
|
||||
{
|
||||
if (logfile != nullptr && logfile != stdout && logfile != stderr)
|
||||
{
|
||||
fclose(logfile);
|
||||
}
|
||||
|
||||
log_current_filename = LOG_DEFAULT_FILE_NAME;
|
||||
log_current_target = target;
|
||||
|
||||
logfile = target;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (log_current_filename != filename)
|
||||
{
|
||||
if (logfile != nullptr && logfile != stdout && logfile != stderr)
|
||||
{
|
||||
fclose(logfile);
|
||||
}
|
||||
}
|
||||
|
||||
logfile = fopen(filename.c_str(), _append ? "a" : "w");
|
||||
}
|
||||
|
||||
if (!logfile)
|
||||
{
|
||||
// Verify whether the file was opened, otherwise fallback to stderr
|
||||
logfile = stderr;
|
||||
|
||||
fprintf(stderr, "Failed to open logfile '%s' with error '%s'\n", filename.c_str(), std::strerror(errno));
|
||||
fflush(stderr);
|
||||
|
||||
// At this point we let the init flag be to true below, and let the target fallback to stderr
|
||||
// otherwise we would repeatedly fopen() which was already unsuccessful
|
||||
}
|
||||
|
||||
_initialized = true;
|
||||
|
||||
return logfile ? logfile : stderr;
|
||||
}
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_handler2_impl(bool change = false, LogTriState append = LogTriStateSame, LogTriState disable = LogTriStateSame, FILE *target = nullptr, const std::string & filename = LOG_DEFAULT_FILE_NAME)
|
||||
{
|
||||
return log_handler1_impl(change, append, disable, filename, target);
|
||||
}
|
||||
|
||||
// Disables logs entirely at runtime.
|
||||
// Makes LOG() and LOG_TEE() produce no output,
|
||||
// until enabled back.
|
||||
#define log_disable() log_disable_impl()
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_disable_impl()
|
||||
{
|
||||
return log_handler1_impl(true, LogTriStateSame, LogTriStateTrue);
|
||||
}
|
||||
|
||||
// Enables logs at runtime.
|
||||
#define log_enable() log_enable_impl()
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_enable_impl()
|
||||
{
|
||||
return log_handler1_impl(true, LogTriStateSame, LogTriStateFalse);
|
||||
}
|
||||
|
||||
// Sets target fir logs, either by a file name or FILE* pointer (stdout, stderr, or any valid FILE*)
|
||||
#define log_set_target(target) log_set_target_impl(target)
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_set_target_impl(const std::string & filename) { return log_handler1_impl(true, LogTriStateSame, LogTriStateSame, filename); }
|
||||
inline FILE *log_set_target_impl(FILE *target) { return log_handler2_impl(true, LogTriStateSame, LogTriStateSame, target); }
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_handler() { return log_handler1_impl(); }
|
||||
|
||||
// Enable or disable creating separate log files for each run.
|
||||
// can ONLY be invoked BEFORE first log use.
|
||||
#define log_multilog(enable) log_filename_generator_impl((enable) ? LogTriStateTrue : LogTriStateFalse, "", "")
|
||||
// Enable or disable append mode for log file.
|
||||
// can ONLY be invoked BEFORE first log use.
|
||||
#define log_append(enable) log_append_impl(enable)
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_append_impl(bool enable)
|
||||
{
|
||||
return log_handler1_impl(true, enable ? LogTriStateTrue : LogTriStateFalse, LogTriStateSame);
|
||||
}
|
||||
|
||||
inline void log_test()
|
||||
{
|
||||
log_disable();
|
||||
LOG("01 Hello World to nobody, because logs are disabled!\n");
|
||||
log_enable();
|
||||
LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET));
|
||||
LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n");
|
||||
log_set_target(stderr);
|
||||
LOG("04 Hello World to stderr!\n");
|
||||
LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n");
|
||||
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||
LOG("06 Hello World to default log file!\n");
|
||||
log_set_target(stdout);
|
||||
LOG("07 Hello World to stdout!\n");
|
||||
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||
LOG("08 Hello World to default log file again!\n");
|
||||
log_disable();
|
||||
LOG("09 Hello World _1_ into the void!\n");
|
||||
log_enable();
|
||||
LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n");
|
||||
log_disable();
|
||||
log_set_target("llama.anotherlog.log");
|
||||
LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n");
|
||||
log_enable();
|
||||
LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n");
|
||||
log_set_target("llama.yetanotherlog.log");
|
||||
LOG("13 Hello World this time in yet new file?\n");
|
||||
log_set_target(log_filename_generator("llama_autonamed", "log"));
|
||||
LOG("14 Hello World in log with generated filename!\n");
|
||||
#ifdef _MSC_VER
|
||||
LOG_TEE("15 Hello msvc TEE without arguments\n");
|
||||
LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test");
|
||||
LOG_TEELN("17 Hello msvc TEELN without arguments\n");
|
||||
LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test");
|
||||
LOG("19 Hello msvc LOG without arguments\n");
|
||||
LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test");
|
||||
LOGLN("21 Hello msvc LOGLN without arguments\n");
|
||||
LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test");
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool log_param_single_parse(const std::string & param)
|
||||
{
|
||||
if ( param == "--log-test")
|
||||
{
|
||||
log_test();
|
||||
return true;
|
||||
}
|
||||
|
||||
if ( param == "--log-disable")
|
||||
{
|
||||
log_disable();
|
||||
return true;
|
||||
}
|
||||
|
||||
if ( param == "--log-enable")
|
||||
{
|
||||
log_enable();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (param == "--log-new")
|
||||
{
|
||||
log_multilog(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (param == "--log-append")
|
||||
{
|
||||
log_append(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool log_param_pair_parse(bool check_but_dont_parse, const std::string & param, const std::string & next = std::string())
|
||||
{
|
||||
if ( param == "--log-file")
|
||||
{
|
||||
if (!check_but_dont_parse)
|
||||
{
|
||||
log_set_target(log_filename_generator(next.empty() ? "unnamed" : next, "log"));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void log_print_usage()
|
||||
{
|
||||
printf("log options:\n");
|
||||
/* format
|
||||
printf(" -h, --help show this help message and exit\n");*/
|
||||
/* spacing
|
||||
printf("__-param----------------Description\n");*/
|
||||
printf(" --log-test Run simple logging test\n");
|
||||
printf(" --log-disable Disable trace logs\n");
|
||||
printf(" --log-enable Enable trace logs\n");
|
||||
printf(" --log-file Specify a log filename (without extension)\n");
|
||||
printf(" --log-new Create a separate new log file on start. "
|
||||
"Each log file will have unique name: \"<name>.<ID>.log\"\n");
|
||||
printf(" --log-append Don't truncate the old log file.\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#define log_dump_cmdline(argc, argv) log_dump_cmdline_impl(argc, argv)
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline void log_dump_cmdline_impl(int argc, char **argv)
|
||||
{
|
||||
std::stringstream buf;
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
if (std::string(argv[i]).find(' ') != std::string::npos)
|
||||
{
|
||||
buf << " \"" << argv[i] <<"\"";
|
||||
}
|
||||
else
|
||||
{
|
||||
buf << " " << argv[i];
|
||||
}
|
||||
}
|
||||
LOGLN("Cmd:%s", buf.str().c_str());
|
||||
}
|
||||
|
||||
#define log_tostr(var) log_var_to_string_impl(var).c_str()
|
||||
|
||||
inline std::string log_var_to_string_impl(bool var)
|
||||
{
|
||||
return var ? "true" : "false";
|
||||
}
|
||||
|
||||
inline std::string log_var_to_string_impl(std::string var)
|
||||
{
|
||||
return var;
|
||||
}
|
||||
|
||||
inline std::string log_var_to_string_impl(const std::vector<int> & var)
|
||||
{
|
||||
std::stringstream buf;
|
||||
buf << "[ ";
|
||||
bool first = true;
|
||||
for (auto e : var)
|
||||
{
|
||||
if (first)
|
||||
{
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
buf << ", ";
|
||||
}
|
||||
buf << std::to_string(e);
|
||||
}
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
template <typename C, typename T>
|
||||
inline std::string LOG_TOKENS_TOSTR_PRETTY(const C & ctx, const T & tokens)
|
||||
{
|
||||
std::stringstream buf;
|
||||
buf << "[ ";
|
||||
|
||||
bool first = true;
|
||||
for (const auto & token : tokens)
|
||||
{
|
||||
if (!first) {
|
||||
buf << ", ";
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
|
||||
auto detokenized = llama_token_to_piece(ctx, token);
|
||||
|
||||
detokenized.erase(
|
||||
std::remove_if(
|
||||
detokenized.begin(),
|
||||
detokenized.end(),
|
||||
[](const unsigned char c) { return !std::isprint(c); }),
|
||||
detokenized.end());
|
||||
|
||||
buf
|
||||
<< "'" << detokenized << "'"
|
||||
<< ":" << std::to_string(token);
|
||||
}
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
template <typename C, typename B>
|
||||
inline std::string LOG_BATCH_TOSTR_PRETTY(const C & ctx, const B & batch)
|
||||
{
|
||||
std::stringstream buf;
|
||||
buf << "[ ";
|
||||
|
||||
bool first = true;
|
||||
for (int i = 0; i < batch.n_tokens; ++i)
|
||||
{
|
||||
if (!first) {
|
||||
buf << ", ";
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
|
||||
auto detokenized = llama_token_to_piece(ctx, batch.token[i]);
|
||||
|
||||
detokenized.erase(
|
||||
std::remove_if(
|
||||
detokenized.begin(),
|
||||
detokenized.end(),
|
||||
[](const unsigned char c) { return !std::isprint(c); }),
|
||||
detokenized.end());
|
||||
|
||||
buf
|
||||
<< "\n" << std::to_string(i)
|
||||
<< ":token '" << detokenized << "'"
|
||||
<< ":pos " << std::to_string(batch.pos[i])
|
||||
<< ":n_seq_id " << std::to_string(batch.n_seq_id[i])
|
||||
<< ":seq_id " << std::to_string(batch.seq_id[i][0])
|
||||
<< ":logits " << std::to_string(batch.logits[i]);
|
||||
}
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
#ifdef LOG_DISABLE_LOGS
|
||||
|
||||
#undef LOG
|
||||
#define LOG(...) // dummy stub
|
||||
#undef LOGLN
|
||||
#define LOGLN(...) // dummy stub
|
||||
|
||||
#undef LOG_TEE
|
||||
#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||
|
||||
#undef LOG_TEELN
|
||||
#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||
|
||||
#undef LOG_DISABLE
|
||||
#define LOG_DISABLE() // dummy stub
|
||||
|
||||
#undef LOG_ENABLE
|
||||
#define LOG_ENABLE() // dummy stub
|
||||
|
||||
#undef LOG_ENABLE
|
||||
#define LOG_ENABLE() // dummy stub
|
||||
|
||||
#undef LOG_SET_TARGET
|
||||
#define LOG_SET_TARGET(...) // dummy stub
|
||||
|
||||
#undef LOG_DUMP_CMDLINE
|
||||
#define LOG_DUMP_CMDLINE(...) // dummy stub
|
||||
|
||||
#endif // LOG_DISABLE_LOGS
|
||||
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
|
||||
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
|
||||
#define LOG_ERRV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, verbosity, __VA_ARGS__)
|
||||
#define LOG_DBGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, verbosity, __VA_ARGS__)
|
||||
|
|
|
@ -2,8 +2,11 @@
|
|||
#include "common.h"
|
||||
#include "log.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
|
||||
void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
|
||||
std::vector<llama_token> & inp, int nnew, bool print_progress) {
|
||||
|
|
|
@ -325,7 +325,7 @@ llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl) {
|
|||
}
|
||||
|
||||
std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {
|
||||
std::string result = "\tlogits ";
|
||||
std::string result = "logits ";
|
||||
|
||||
for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {
|
||||
const auto * smpl = llama_sampler_chain_get(gsmpl->chain, i);
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#include "train.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
#include <functional>
|
||||
#include <cstring>
|
||||
|
||||
struct random_normal_distribution {
|
||||
std::mt19937 gen;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue