llama/ggml: add LLM training support (#10544)

* llama/ggml: add LLM training support

more compact progress bar

llama_save_model_to_file

llama_opt_param_filter

ggml_graph_dup force_grads

refactor ggml_opt, fix test-opt

* remove logits_all

* refactor CUDA implementation for ACC

* reset graph at beginning of opt period
This commit is contained in:
Johannes Gäßler 2025-05-12 14:44:49 +02:00 committed by GitHub
parent 064cc596ac
commit 10d2af0eaa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 1415 additions and 359 deletions

View file

@ -1,5 +1,7 @@
#include "llama-vocab.h"
#include "ggml.h"
#include "gguf.h"
#include "llama-impl.h"
#include "llama-model-loader.h"
@ -1234,6 +1236,9 @@ struct fragment_buffer_variant {
struct llama_vocab::impl {
uint32_t n_token_types = 0; // for BERT-style token types
std::string tokenizer_model;
std::string tokenizer_pre;
enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
enum llama_vocab_pre_type pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
@ -1369,9 +1374,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
// determine vocab type
{
std::string tokenizer_model;
std::string tokenizer_pre;
ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
@ -1466,7 +1468,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str());
if (precompiled_charsmap_keyidx != -1) {
size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
const gguf_type pc_type = gguf_get_arr_type(ctx, precompiled_charsmap_keyidx);
GGML_ASSERT(pc_type == GGUF_TYPE_INT8 || pc_type == GGUF_TYPE_UINT8);
const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
#ifdef IS_BIG_ENDIAN
@ -2789,6 +2794,14 @@ void llama_vocab::load(llama_model_loader & ml, const LLM_KV & kv) {
pimpl->load(ml, kv);
}
std::string llama_vocab::get_tokenizer_model() const {
return pimpl->tokenizer_model;
}
std::string llama_vocab::get_tokenizer_pre() const {
return pimpl->tokenizer_pre;
}
enum llama_vocab_type llama_vocab::get_type() const {
return pimpl->type;
}
@ -3011,6 +3024,20 @@ int llama_vocab::find_bpe_rank(const std::string & token_left, const std::string
return it->second;
}
std::vector<std::string> llama_vocab::get_bpe_merges() const {
std::vector<std::string> result(pimpl->bpe_ranks.size());
for (const auto & pair : pimpl->bpe_ranks) {
result[pair.second] = pair.first.first + " " + pair.first.second;
}
return result;
}
std::vector<char> llama_vocab::get_precompiled_charsmap() const {
return pimpl->precompiled_charsmap;
}
int32_t llama_vocab::tokenize(
const char * text,
int32_t text_len,