llama/ggml: add LLM training support (#10544)

* llama/ggml: add LLM training support

more compact progress bar

llama_save_model_to_file

llama_opt_param_filter

ggml_graph_dup force_grads

refactor ggml_opt, fix test-opt

* remove logits_all

* refactor CUDA implementation for ACC

* reset graph at beginning of opt period
This commit is contained in:
Johannes Gäßler 2025-05-12 14:44:49 +02:00 committed by GitHub
parent 064cc596ac
commit 10d2af0eaa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 1415 additions and 359 deletions

View file

@ -5499,7 +5499,7 @@ static void ggml_compute_backward(
// tensor = src0 * 1 + src1 * 0
if (src0_needs_grads) {
// dsrc0 = dtensor * 1
ggml_add_or_set(ctx, cgraph, isrc0, grad);
ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
}
if (src1_needs_grads) {
// dsrc1 = dtensor * 0 -> noop
@ -5780,10 +5780,9 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
}
void ggml_build_backward_expand(
struct ggml_context * ctx_static,
struct ggml_context * ctx_compute,
struct ggml_cgraph * cgraph,
bool accumulate) {
struct ggml_context * ctx,
struct ggml_cgraph * cgraph,
struct ggml_tensor ** grad_accs) {
GGML_ASSERT(cgraph->n_nodes > 0);
GGML_ASSERT(cgraph->grads);
GGML_ASSERT(cgraph->grad_accs);
@ -5856,21 +5855,24 @@ void ggml_build_backward_expand(
GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
GGML_ASSERT(igrad != GGML_HASHSET_FULL);
GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, igrad));
if ((accumulate && (node->flags & GGML_TENSOR_FLAG_PARAM)) || (node->flags & GGML_TENSOR_FLAG_LOSS)) {
cgraph->grad_accs[igrad] = ggml_dup_tensor(ctx_static, node);
cgraph->grads[igrad] = cgraph->grad_accs[igrad];
ggml_format_name(cgraph->grad_accs[igrad], "grad acc for %s", node->name);
const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
GGML_ASSERT(ihash != GGML_HASHSET_FULL);
GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
if (grad_accs && grad_accs[i]) {
cgraph->grad_accs[ihash] = grad_accs[i];
cgraph->grads[ihash] = cgraph->grad_accs[ihash];
} else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
// loss tensors always need a gradient accumulator
cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
cgraph->grads[ihash] = cgraph->grad_accs[ihash];
}
grads_needed[igrad] = true;
grads_needed[ihash] = true;
}
for (int i = n_nodes_f - 1; i >= 0; --i) {
// inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
// use allocator to automatically make inplace operations
ggml_compute_backward(ctx_compute, cgraph, i, grads_needed);
ggml_compute_backward(ctx, cgraph, i, grads_needed);
}
free(grads_needed);
@ -6016,8 +6018,8 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
}
}
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL);
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
ggml_graph_cpy(cgraph, result);
return result;
}
@ -6036,6 +6038,9 @@ struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
}
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
if (!cgraph) {
return;
}
GGML_ASSERT(cgraph->grads != NULL);
for (int i = 0; i < cgraph->n_nodes; i++) {
@ -6345,8 +6350,8 @@ void ggml_set_output(struct ggml_tensor * tensor) {
tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
}
void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor) {
GGML_UNUSED(ctx); // TODO: remove this parameter
void ggml_set_param(struct ggml_tensor * tensor) {
GGML_ASSERT(tensor->op == GGML_OP_NONE);
tensor->flags |= GGML_TENSOR_FLAG_PARAM;
}