mtmd : add support for Qwen2-Audio and SeaLLM-Audio (#13760)

* mtmd : add Qwen2-Audio support

* small clean up

* update discussion link

* clarify mtmd_get_output_embd

* clarification in multimodal.md

* fix ultravox bug

* ggml_cont
This commit is contained in:
Xuan-Son Nguyen 2025-05-25 14:06:32 +02:00 committed by GitHub
parent a08c1d2845
commit 40aaa8a403
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 144 additions and 52 deletions

View file

@ -4,6 +4,8 @@
#include <stddef.h>
#include <stdint.h>
// !!! Internal header, to be used by mtmd only !!!
struct clip_ctx;
struct clip_image_size {
@ -99,3 +101,4 @@ void clip_image_f32_batch_add_mel(struct clip_image_f32_batch * batch, int n_mel
bool clip_has_vision_encoder(const struct clip_ctx * ctx);
bool clip_has_audio_encoder(const struct clip_ctx * ctx);
bool clip_has_whisper_encoder(const struct clip_ctx * ctx);