arg : allow using -hf offline (#13202)
* arg : allow using -hf offline * add more comments in code [no ci]
This commit is contained in:
parent
da84c04d8f
commit
5933e6fdc9
1 changed files with 83 additions and 47 deletions
128
common/arg.cpp
128
common/arg.cpp
|
@ -43,6 +43,25 @@ std::initializer_list<enum llama_example> mmproj_examples = {
|
||||||
// TODO: add LLAMA_EXAMPLE_SERVER when it's ready
|
// TODO: add LLAMA_EXAMPLE_SERVER when it's ready
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static std::string read_file(const std::string & fname) {
|
||||||
|
std::ifstream file(fname);
|
||||||
|
if (!file) {
|
||||||
|
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
|
||||||
|
}
|
||||||
|
std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
||||||
|
file.close();
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_file(const std::string & fname, const std::string & content) {
|
||||||
|
std::ofstream file(fname);
|
||||||
|
if (!file) {
|
||||||
|
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
|
||||||
|
}
|
||||||
|
file << content;
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
|
||||||
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
||||||
this->examples = std::move(examples);
|
this->examples = std::move(examples);
|
||||||
return *this;
|
return *this;
|
||||||
|
@ -200,9 +219,11 @@ struct curl_slist_ptr {
|
||||||
|
|
||||||
static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) {
|
static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) {
|
||||||
int remaining_attempts = max_attempts;
|
int remaining_attempts = max_attempts;
|
||||||
|
char * method = nullptr;
|
||||||
|
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_METHOD, &method);
|
||||||
|
|
||||||
while (remaining_attempts > 0) {
|
while (remaining_attempts > 0) {
|
||||||
LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
|
LOG_INF("%s: %s %s (attempt %d of %d)...\n", __func__ , method, url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
|
||||||
|
|
||||||
CURLcode res = curl_easy_perform(curl);
|
CURLcode res = curl_easy_perform(curl);
|
||||||
if (res == CURLE_OK) {
|
if (res == CURLE_OK) {
|
||||||
|
@ -213,6 +234,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
|
||||||
LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
|
LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
|
||||||
|
|
||||||
remaining_attempts--;
|
remaining_attempts--;
|
||||||
|
if (remaining_attempts == 0) break;
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -231,8 +253,6 @@ static bool common_download_file_single(const std::string & url, const std::stri
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool force_download = false;
|
|
||||||
|
|
||||||
// Set the URL, allow to follow http redirection
|
// Set the URL, allow to follow http redirection
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
||||||
|
@ -256,7 +276,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
|
||||||
|
|
||||||
// If the file exists, check its JSON metadata companion file.
|
// If the file exists, check its JSON metadata companion file.
|
||||||
std::string metadata_path = path + ".json";
|
std::string metadata_path = path + ".json";
|
||||||
nlohmann::json metadata;
|
nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
|
||||||
std::string etag;
|
std::string etag;
|
||||||
std::string last_modified;
|
std::string last_modified;
|
||||||
|
|
||||||
|
@ -266,7 +286,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
|
||||||
if (metadata_in.good()) {
|
if (metadata_in.good()) {
|
||||||
try {
|
try {
|
||||||
metadata_in >> metadata;
|
metadata_in >> metadata;
|
||||||
LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
||||||
if (metadata.contains("url") && metadata.at("url").is_string()) {
|
if (metadata.contains("url") && metadata.at("url").is_string()) {
|
||||||
auto previous_url = metadata.at("url").get<std::string>();
|
auto previous_url = metadata.at("url").get<std::string>();
|
||||||
if (previous_url != url) {
|
if (previous_url != url) {
|
||||||
|
@ -296,7 +316,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
|
||||||
};
|
};
|
||||||
|
|
||||||
common_load_model_from_url_headers headers;
|
common_load_model_from_url_headers headers;
|
||||||
|
bool head_request_ok = false;
|
||||||
|
bool should_download = !file_exists; // by default, we should download if the file does not exist
|
||||||
|
|
||||||
|
// get ETag to see if the remote file has changed
|
||||||
{
|
{
|
||||||
typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
|
typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
|
||||||
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
|
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
|
||||||
|
@ -325,23 +348,28 @@ static bool common_download_file_single(const std::string & url, const std::stri
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
|
curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
||||||
|
|
||||||
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
|
// we only allow retrying once for HEAD requests
|
||||||
|
// this is for the use case of using running offline (no internet), retrying can be annoying
|
||||||
|
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0);
|
||||||
if (!was_perform_successful) {
|
if (!was_perform_successful) {
|
||||||
return false;
|
head_request_ok = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
long http_code = 0;
|
long http_code = 0;
|
||||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
||||||
if (http_code != 200) {
|
if (http_code == 200) {
|
||||||
// HEAD not supported, we don't know if the file has changed
|
head_request_ok = true;
|
||||||
// force trigger downloading
|
} else {
|
||||||
force_download = true;
|
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
||||||
LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
head_request_ok = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool should_download = !file_exists || force_download;
|
// if head_request_ok is false, we don't have the etag or last-modified headers
|
||||||
if (!should_download) {
|
// we leave should_download as-is, which is true if the file does not exist
|
||||||
|
if (head_request_ok) {
|
||||||
|
// check if ETag or Last-Modified headers are different
|
||||||
|
// if it is, we need to download the file again
|
||||||
if (!etag.empty() && etag != headers.etag) {
|
if (!etag.empty() && etag != headers.etag) {
|
||||||
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
|
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
|
||||||
should_download = true;
|
should_download = true;
|
||||||
|
@ -350,6 +378,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
|
||||||
should_download = true;
|
should_download = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (should_download) {
|
if (should_download) {
|
||||||
std::string path_temporary = path + ".downloadInProgress";
|
std::string path_temporary = path + ".downloadInProgress";
|
||||||
if (file_exists) {
|
if (file_exists) {
|
||||||
|
@ -424,13 +453,15 @@ static bool common_download_file_single(const std::string & url, const std::stri
|
||||||
{"etag", headers.etag},
|
{"etag", headers.etag},
|
||||||
{"lastModified", headers.last_modified}
|
{"lastModified", headers.last_modified}
|
||||||
});
|
});
|
||||||
std::ofstream(metadata_path) << metadata.dump(4);
|
write_file(metadata_path, metadata.dump(4));
|
||||||
LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
||||||
|
|
||||||
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
||||||
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -605,16 +636,37 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
|
||||||
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
|
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
|
||||||
// User-Agent header is already set in common_remote_get_content, no need to set it here
|
// User-Agent header is already set in common_remote_get_content, no need to set it here
|
||||||
|
|
||||||
|
// we use "=" to avoid clashing with other component, while still being allowed on windows
|
||||||
|
std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
|
||||||
|
string_replace_all(cached_response_fname, "/", "_");
|
||||||
|
std::string cached_response_path = fs_get_cache_file(cached_response_fname);
|
||||||
|
|
||||||
// make the request
|
// make the request
|
||||||
common_remote_params params;
|
common_remote_params params;
|
||||||
params.headers = headers;
|
params.headers = headers;
|
||||||
|
long res_code = 0;
|
||||||
|
std::string res_str;
|
||||||
|
bool use_cache = false;
|
||||||
|
try {
|
||||||
auto res = common_remote_get_content(url, params);
|
auto res = common_remote_get_content(url, params);
|
||||||
long res_code = res.first;
|
res_code = res.first;
|
||||||
std::string res_str(res.second.data(), res.second.size());
|
res_str = std::string(res.second.data(), res.second.size());
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
LOG_WRN("error: failed to get manifest: %s\n", e.what());
|
||||||
|
LOG_WRN("try reading from cache\n");
|
||||||
|
// try to read from cache
|
||||||
|
try {
|
||||||
|
res_str = read_file(cached_response_path);
|
||||||
|
res_code = 200;
|
||||||
|
use_cache = true;
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
throw std::runtime_error("error: failed to get manifest (check your internet connection)");
|
||||||
|
}
|
||||||
|
}
|
||||||
std::string ggufFile;
|
std::string ggufFile;
|
||||||
std::string mmprojFile;
|
std::string mmprojFile;
|
||||||
|
|
||||||
if (res_code == 200) {
|
if (res_code == 200 || res_code == 304) {
|
||||||
// extract ggufFile.rfilename in json, using regex
|
// extract ggufFile.rfilename in json, using regex
|
||||||
{
|
{
|
||||||
std::regex pattern("\"ggufFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\"");
|
std::regex pattern("\"ggufFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\"");
|
||||||
|
@ -631,6 +683,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
|
||||||
mmprojFile = match[1].str();
|
mmprojFile = match[1].str();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!use_cache) {
|
||||||
|
// if not using cached response, update the cache file
|
||||||
|
write_file(cached_response_path, res_str);
|
||||||
|
}
|
||||||
} else if (res_code == 401) {
|
} else if (res_code == 401) {
|
||||||
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
|
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
|
||||||
} else {
|
} else {
|
||||||
|
@ -1142,6 +1198,9 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
|
||||||
fprintf(stderr, "%s\n", ex.what());
|
fprintf(stderr, "%s\n", ex.what());
|
||||||
ctx_arg.params = params_org;
|
ctx_arg.params = params_org;
|
||||||
return false;
|
return false;
|
||||||
|
} catch (std::exception & ex) {
|
||||||
|
fprintf(stderr, "%s\n", ex.what());
|
||||||
|
exit(1); // for other exceptions, we exit with status code 1
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1442,13 +1501,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
{"-f", "--file"}, "FNAME",
|
{"-f", "--file"}, "FNAME",
|
||||||
"a file containing the prompt (default: none)",
|
"a file containing the prompt (default: none)",
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
std::ifstream file(value);
|
params.prompt = read_file(value);
|
||||||
if (!file) {
|
|
||||||
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
|
|
||||||
}
|
|
||||||
// store the external file name in params
|
// store the external file name in params
|
||||||
params.prompt_file = value;
|
params.prompt_file = value;
|
||||||
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
|
||||||
if (!params.prompt.empty() && params.prompt.back() == '\n') {
|
if (!params.prompt.empty() && params.prompt.back() == '\n') {
|
||||||
params.prompt.pop_back();
|
params.prompt.pop_back();
|
||||||
}
|
}
|
||||||
|
@ -1458,11 +1513,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
{"-sysf", "--system-prompt-file"}, "FNAME",
|
{"-sysf", "--system-prompt-file"}, "FNAME",
|
||||||
"a file containing the system prompt (default: none)",
|
"a file containing the system prompt (default: none)",
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
std::ifstream file(value);
|
params.system_prompt = read_file(value);
|
||||||
if (!file) {
|
|
||||||
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
|
|
||||||
}
|
|
||||||
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.system_prompt));
|
|
||||||
if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') {
|
if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') {
|
||||||
params.system_prompt.pop_back();
|
params.system_prompt.pop_back();
|
||||||
}
|
}
|
||||||
|
@ -1887,15 +1938,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
{"--grammar-file"}, "FNAME",
|
{"--grammar-file"}, "FNAME",
|
||||||
"file to read grammar from",
|
"file to read grammar from",
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
std::ifstream file(value);
|
params.sampling.grammar = read_file(value);
|
||||||
if (!file) {
|
|
||||||
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
|
|
||||||
}
|
|
||||||
std::copy(
|
|
||||||
std::istreambuf_iterator<char>(file),
|
|
||||||
std::istreambuf_iterator<char>(),
|
|
||||||
std::back_inserter(params.sampling.grammar)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
).set_sparam());
|
).set_sparam());
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
|
@ -2815,14 +2858,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
"list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
|
"list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
|
||||||
),
|
),
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
std::ifstream file(value);
|
params.chat_template = read_file(value);
|
||||||
if (!file) {
|
|
||||||
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
|
|
||||||
}
|
|
||||||
std::copy(
|
|
||||||
std::istreambuf_iterator<char>(file),
|
|
||||||
std::istreambuf_iterator<char>(),
|
|
||||||
std::back_inserter(params.chat_template));
|
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
|
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue