common : refactor downloading system, handle mmproj with -hf option (#12694)
* (wip) refactor downloading system [no ci] * fix all examples * fix mmproj with -hf * gemma3: update readme * only handle mmproj in llava example * fix multi-shard download * windows: fix problem with std::min and std::max * fix 2
This commit is contained in:
parent
f423981ac8
commit
267c1399f1
19 changed files with 673 additions and 635 deletions
|
@ -4,6 +4,26 @@
|
|||
>
|
||||
> This is very experimental, only used for demo purpose.
|
||||
|
||||
## Quick started
|
||||
|
||||
You can use pre-quantized model from [ggml-org](https://huggingface.co/ggml-org)'s Hugging Face account
|
||||
|
||||
```bash
|
||||
# build
|
||||
cmake -B build
|
||||
cmake --build build --target llama-gemma3-cli
|
||||
|
||||
# alternatively, install from brew (MacOS)
|
||||
brew install llama.cpp
|
||||
|
||||
# run it
|
||||
llama-gemma3-cli -hf ggml-org/gemma-3-4b-it-GGUF
|
||||
llama-gemma3-cli -hf ggml-org/gemma-3-12b-it-GGUF
|
||||
llama-gemma3-cli -hf ggml-org/gemma-3-27b-it-GGUF
|
||||
|
||||
# note: 1B model does not support vision
|
||||
```
|
||||
|
||||
## How to get mmproj.gguf?
|
||||
|
||||
```bash
|
||||
|
|
|
@ -78,7 +78,7 @@ struct gemma3_context {
|
|||
}
|
||||
|
||||
void init_clip_model(common_params & params) {
|
||||
const char * clip_path = params.mmproj.c_str();
|
||||
const char * clip_path = params.mmproj.path.c_str();
|
||||
ctx_clip = clip_model_load(clip_path, params.verbosity > 1);
|
||||
}
|
||||
|
||||
|
@ -232,13 +232,13 @@ int main(int argc, char ** argv) {
|
|||
|
||||
common_init();
|
||||
|
||||
if (params.mmproj.empty()) {
|
||||
if (params.mmproj.path.empty()) {
|
||||
show_additional_info(argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
gemma3_context ctx(params);
|
||||
printf("%s: %s\n", __func__, params.model.c_str());
|
||||
printf("%s: %s\n", __func__, params.model.path.c_str());
|
||||
|
||||
bool is_single_turn = !params.prompt.empty() && !params.image.empty();
|
||||
|
||||
|
|
|
@ -225,7 +225,7 @@ static struct llama_model * llava_init(common_params * params) {
|
|||
|
||||
llama_model_params model_params = common_model_params_to_llama(*params);
|
||||
|
||||
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
|
||||
llama_model * model = llama_model_load_from_file(params->model.path.c_str(), model_params);
|
||||
if (model == NULL) {
|
||||
LOG_ERR("%s: unable to load model\n" , __func__);
|
||||
return NULL;
|
||||
|
@ -234,7 +234,7 @@ static struct llama_model * llava_init(common_params * params) {
|
|||
}
|
||||
|
||||
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
|
||||
const char * clip_path = params->mmproj.c_str();
|
||||
const char * clip_path = params->mmproj.path.c_str();
|
||||
|
||||
auto prompt = params->prompt;
|
||||
if (prompt.empty()) {
|
||||
|
@ -283,7 +283,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
common_init();
|
||||
|
||||
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
||||
if (params.mmproj.path.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
||||
print_usage(argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ static struct llama_model * llava_init(common_params * params) {
|
|||
|
||||
llama_model_params model_params = common_model_params_to_llama(*params);
|
||||
|
||||
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
|
||||
llama_model * model = llama_model_load_from_file(params->model.path.c_str(), model_params);
|
||||
if (model == NULL) {
|
||||
LOG_ERR("%s: unable to load model\n" , __func__);
|
||||
return NULL;
|
||||
|
@ -80,7 +80,7 @@ static void llava_free(struct llava_context * ctx_llava) {
|
|||
}
|
||||
|
||||
static struct clip_ctx * clip_init_context(common_params * params) {
|
||||
const char * clip_path = params->mmproj.c_str();
|
||||
const char * clip_path = params->mmproj.path.c_str();
|
||||
|
||||
auto prompt = params->prompt;
|
||||
if (prompt.empty()) {
|
||||
|
@ -290,7 +290,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
common_init();
|
||||
|
||||
if (params.mmproj.empty() || (params.image.empty())) {
|
||||
if (params.mmproj.path.empty() || (params.image.empty())) {
|
||||
show_additional_info(argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -314,7 +314,7 @@ static struct llama_model * llava_init(common_params * params) {
|
|||
|
||||
llama_model_params model_params = common_model_params_to_llama(*params);
|
||||
|
||||
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
|
||||
llama_model * model = llama_model_load_from_file(params->model.path.c_str(), model_params);
|
||||
if (model == NULL) {
|
||||
LOG_ERR("%s: unable to load model\n" , __func__);
|
||||
return NULL;
|
||||
|
@ -323,7 +323,7 @@ static struct llama_model * llava_init(common_params * params) {
|
|||
}
|
||||
|
||||
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
|
||||
const char * clip_path = params->mmproj.c_str();
|
||||
const char * clip_path = params->mmproj.path.c_str();
|
||||
|
||||
auto prompt = params->prompt;
|
||||
if (prompt.empty()) {
|
||||
|
@ -524,7 +524,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
common_init();
|
||||
|
||||
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
||||
if (params.mmproj.path.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
||||
print_usage(argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue