llava : introduce libmtmd (#12849)
* wip llava2 * migrated gemma3 to llava2 * add timings * correct pre/postfix * fix missing include * fix compilation unused var warn * update llava2_tokenize * change name llava2 --> mtmd * improve api * refine helpers * Update examples/llava/mtmd.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
64eda5deb9
commit
8b9cc7cdd8
7 changed files with 687 additions and 128 deletions
|
@ -1,12 +1,15 @@
|
|||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#include "clip.h"
|
||||
|
||||
#include <climits>
|
||||
#include <cstdarg>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
// Internal header for clip.cpp
|
||||
|
||||
|
@ -120,6 +123,23 @@ static projector_type clip_projector_type_from_string(const std::string & str) {
|
|||
return PROJECTOR_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
// RGB uint8 image
|
||||
struct clip_image_u8 {
|
||||
int nx;
|
||||
int ny;
|
||||
|
||||
std::vector<uint8_t> buf;
|
||||
};
|
||||
|
||||
// RGB float32 image (NHWC)
|
||||
// Memory layout: RGBRGBRGB...
|
||||
struct clip_image_f32 {
|
||||
int nx;
|
||||
int ny;
|
||||
|
||||
std::vector<float> buf;
|
||||
};
|
||||
|
||||
//
|
||||
// logging
|
||||
//
|
||||
|
@ -178,6 +198,28 @@ static void clip_log_internal(enum ggml_log_level level, const char * format, ..
|
|||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
|
||||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, __VA_ARGS__)
|
||||
|
||||
//
|
||||
// cpp wrappers
|
||||
//
|
||||
|
||||
struct clip_image_u8_deleter {
|
||||
void operator()(clip_image_u8 * val) { clip_image_u8_free(val); }
|
||||
};
|
||||
|
||||
struct clip_image_f32_deleter {
|
||||
void operator()(clip_image_f32 * val) { clip_image_f32_free(val); }
|
||||
};
|
||||
|
||||
struct clip_image_f32_batch_deleter {
|
||||
void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); }
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<clip_image_u8, clip_image_u8_deleter> clip_image_u8_ptr;
|
||||
typedef std::unique_ptr<clip_image_f32, clip_image_f32_deleter> clip_image_f32_ptr;
|
||||
typedef std::unique_ptr<clip_image_f32_batch, clip_image_f32_batch_deleter> clip_image_f32_batch_ptr;
|
||||
|
||||
// TODO @ngxson : we're currently having a naming clash between struct clip_image_size and function clip_image_size()
|
||||
|
||||
//
|
||||
// common utils
|
||||
//
|
||||
|
@ -214,6 +256,20 @@ static void string_replace_all(std::string & s, const std::string & search, cons
|
|||
s = std::move(builder);
|
||||
}
|
||||
|
||||
// split string by a `std::string delim` instead of `char delim`
|
||||
static std::vector<std::string> string_split_str(std::string s, const std::string & delimiter) {
|
||||
std::vector<std::string> tokens;
|
||||
size_t pos = 0;
|
||||
std::string token;
|
||||
while ((pos = s.find(delimiter)) != std::string::npos) {
|
||||
token = s.substr(0, pos);
|
||||
tokens.push_back(token);
|
||||
s.erase(0, pos + delimiter.length());
|
||||
}
|
||||
tokens.push_back(s);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
//
|
||||
// gguf utils
|
||||
//
|
||||
|
@ -271,3 +327,9 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
|
|||
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// API used internally with mtmd
|
||||
//
|
||||
|
||||
projector_type clip_get_projector_type(const struct clip_ctx * ctx);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue