server : support audio input (#13714)
* server : support audio input * add audio support on webui
This commit is contained in:
parent
faaaff5f94
commit
9ecf3e66a3
12 changed files with 276 additions and 173 deletions
|
@ -12,17 +12,7 @@ size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks) {
|
|||
size_t n_tokens = 0;
|
||||
for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
|
||||
auto chunk = mtmd_input_chunks_get(chunks, i);
|
||||
auto chunk_type = mtmd_input_chunk_get_type(chunk);
|
||||
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
||||
size_t n_tokens_text;
|
||||
mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
|
||||
n_tokens += n_tokens_text;
|
||||
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
||||
auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
|
||||
n_tokens += mtmd_image_tokens_get_n_tokens(tokens_image);
|
||||
} else {
|
||||
GGML_ASSERT(false && "chunk type not supported");
|
||||
}
|
||||
n_tokens += mtmd_input_chunk_get_n_tokens(chunk);
|
||||
}
|
||||
return n_tokens;
|
||||
}
|
||||
|
@ -31,17 +21,7 @@ llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks) {
|
|||
llama_pos n_pos = 0;
|
||||
for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
|
||||
auto chunk = mtmd_input_chunks_get(chunks, i);
|
||||
auto chunk_type = mtmd_input_chunk_get_type(chunk);
|
||||
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
||||
size_t n_tokens_text;
|
||||
mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
|
||||
n_pos += n_tokens_text;
|
||||
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
||||
auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
|
||||
n_pos += mtmd_image_tokens_get_n_pos(tokens_image);
|
||||
} else {
|
||||
GGML_ASSERT(false && "chunk type not supported");
|
||||
}
|
||||
n_pos += mtmd_input_chunk_get_n_pos(chunk);
|
||||
}
|
||||
return n_pos;
|
||||
}
|
||||
|
|
|
@ -751,6 +751,10 @@ const unsigned char * mtmd_bitmap_get_data(const mtmd_bitmap * bitmap) {
|
|||
return bitmap->data.data();
|
||||
}
|
||||
|
||||
size_t mtmd_bitmap_get_n_bytes(const mtmd_bitmap * bitmap) {
|
||||
return bitmap->data.size();
|
||||
}
|
||||
|
||||
bool mtmd_bitmap_is_audio(const mtmd_bitmap * bitmap) {
|
||||
return bitmap->is_audio;
|
||||
}
|
||||
|
|
|
@ -119,11 +119,12 @@ MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
|
|||
// the data is in float format (PCM F32)
|
||||
MTMD_API mtmd_bitmap * mtmd_bitmap_init (uint32_t nx, uint32_t ny, const unsigned char * data);
|
||||
MTMD_API mtmd_bitmap * mtmd_bitmap_init_from_audio(size_t n_samples, const float * data);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_nx (const mtmd_bitmap * bitmap);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_ny (const mtmd_bitmap * bitmap);
|
||||
MTMD_API const unsigned char * mtmd_bitmap_get_data(const mtmd_bitmap * bitmap);
|
||||
MTMD_API bool mtmd_bitmap_is_audio(const mtmd_bitmap * bitmap);
|
||||
MTMD_API void mtmd_bitmap_free (mtmd_bitmap * bitmap);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_nx (const mtmd_bitmap * bitmap);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_ny (const mtmd_bitmap * bitmap);
|
||||
MTMD_API const unsigned char * mtmd_bitmap_get_data (const mtmd_bitmap * bitmap);
|
||||
MTMD_API size_t mtmd_bitmap_get_n_bytes(const mtmd_bitmap * bitmap);
|
||||
MTMD_API bool mtmd_bitmap_is_audio (const mtmd_bitmap * bitmap);
|
||||
MTMD_API void mtmd_bitmap_free (mtmd_bitmap * bitmap);
|
||||
// bitmap ID is optional, but useful for KV cache tracking
|
||||
// these getters/setters are dedicated functions, so you can for example calculate the hash of the image based on mtmd_bitmap_get_data()
|
||||
MTMD_API const char * mtmd_bitmap_get_id(const mtmd_bitmap * bitmap);
|
||||
|
@ -322,6 +323,7 @@ struct bitmap {
|
|||
uint32_t nx() { return mtmd_bitmap_get_nx(ptr.get()); }
|
||||
uint32_t ny() { return mtmd_bitmap_get_ny(ptr.get()); }
|
||||
const unsigned char * data() { return mtmd_bitmap_get_data(ptr.get()); }
|
||||
size_t n_bytes() { return mtmd_bitmap_get_n_bytes(ptr.get()); }
|
||||
std::string id() { return mtmd_bitmap_get_id(ptr.get()); }
|
||||
void set_id(const char * id) { mtmd_bitmap_set_id(ptr.get(), id); }
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue