mtmd : support Qwen 2.5 Omni (input audio+vision, no audio output) (#13784)
* mtmd : allow multiple modalities at the same time * refactor mtmd tokenizer * fix compile * ok, missing SinusoidsPositionEmbedding * first working version * fix style * more strict validate of n_embd * refactor if..else to switch * fix regression * add test for 3B * update docs * fix tokenizing with add_special * add more tests * fix test case "huge" * rm redundant code * set_position_mrope_1d rm n_tokens
This commit is contained in:
parent
72b090da2c
commit
bc583e3c63
12 changed files with 1148 additions and 744 deletions
|
@ -284,7 +284,9 @@ int main(int argc, char ** argv) {
|
|||
if (is_single_turn) {
|
||||
g_is_generating = true;
|
||||
if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
|
||||
params.prompt += mtmd_default_marker();
|
||||
for (size_t i = 0; i < params.image.size(); i++) {
|
||||
params.prompt += mtmd_default_marker();
|
||||
}
|
||||
}
|
||||
common_chat_msg msg;
|
||||
msg.role = "user";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue