add geglu activation function (#14074)
Co-authored-by: dinhhuy <huy.dinh@brains-tech.co.jp>
This commit is contained in:
parent
056eb74534
commit
91a8ee6a6f
2 changed files with 23 additions and 0 deletions
|
@ -659,6 +659,28 @@ ggml_tensor * llm_graph_context::build_ffn(
|
||||||
cur = ggml_mul(ctx0, x0, x1);
|
cur = ggml_mul(ctx0, x0, x1);
|
||||||
cb(cur, "ffn_mul", il);
|
cb(cur, "ffn_mul", il);
|
||||||
} break;
|
} break;
|
||||||
|
case LLM_FFN_GEGLU:
|
||||||
|
{
|
||||||
|
// Split into two equal parts
|
||||||
|
int64_t split_point = cur->ne[0] / 2;
|
||||||
|
ggml_tensor * output_ffn_up = ggml_cont(ctx0, ggml_view_2d(
|
||||||
|
ctx0, cur, split_point,
|
||||||
|
cur->ne[1], cur->nb[1], 0
|
||||||
|
));
|
||||||
|
ggml_tensor * output_ffn_gate = ggml_cont(ctx0, ggml_view_2d(
|
||||||
|
ctx0, cur, split_point,
|
||||||
|
cur->ne[1], cur->nb[1],
|
||||||
|
split_point * ggml_element_size(cur)
|
||||||
|
));
|
||||||
|
|
||||||
|
// Apply GELU activation function to the first part
|
||||||
|
output_ffn_up = ggml_gelu(ctx0, output_ffn_up);
|
||||||
|
cb(output_ffn_up, "ffn_gelu", il);
|
||||||
|
|
||||||
|
// Element-wise multiplication between the activated part and the gate part
|
||||||
|
cur = ggml_mul(ctx0, output_ffn_up, output_ffn_gate);
|
||||||
|
cb(cur, "ffn_geglu", il);
|
||||||
|
} break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gate && type_gate == LLM_FFN_PAR) {
|
if (gate && type_gate == LLM_FFN_PAR) {
|
||||||
|
|
|
@ -36,6 +36,7 @@ enum llm_ffn_op_type {
|
||||||
LLM_FFN_RELU,
|
LLM_FFN_RELU,
|
||||||
LLM_FFN_RELU_SQR,
|
LLM_FFN_RELU_SQR,
|
||||||
LLM_FFN_SWIGLU,
|
LLM_FFN_SWIGLU,
|
||||||
|
LLM_FFN_GEGLU,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum llm_ffn_gate_type {
|
enum llm_ffn_gate_type {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue