ggml : add ALiBi support for ggml_soft_max_ext (#5488)
* ggml : avoid recomputing alibi slopes (CPU) * llama : reuse hparams.f_max_alibi_bias in all cases ggml-ci * ggml : support alibi bias in ggml_soft_max_ext (CPU + Metal) ggml-ci * ggml : handle all SRCs (do not break on first null) ggml-ci * tests : do not use slope for large soft_max accumulates too much error ggml-ci * ggml : alternative ALiBi without extra tensor We compute the slopes in the kernel ggml-ci * cuda : add ALiBi support in ggml_soft_max_ext ggml-ci * ggml : deprecate ggml_alibi * ggml : support multi-sequence ALiBi (Metal) ggml-ci * cuda : add multi-seq ALiBi + remote F16 soft_max ggml-ci * ggml : update deprecation message * ggml : fix pos ptr when no ALiBi ggml-ci * cuda : fix performance (pow -> powf) * cuda : precompute ALiBi constants * metal : pre-compute ALiBi slopes ggml-ci * llama : init kq_pos only if needed ggml-ci * test-backend-ops : add null pos test to soft_max test-backend-ops : replace soft_max tests ggml-ci --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
parent
6e4e973b26
commit
8f1be0d42f
9 changed files with 348 additions and 357 deletions
13
ggml.h
13
ggml.h
|
@ -1383,13 +1383,17 @@ extern "C" {
|
|||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// fused soft_max(a*scale + mask)
|
||||
// fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
|
||||
// mask is optional
|
||||
// pos is required when max_bias > 0.0f
|
||||
// max_bias = 0.0f for no ALiBi
|
||||
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * mask,
|
||||
float scale);
|
||||
struct ggml_tensor * pos,
|
||||
float scale,
|
||||
float max_bias);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_soft_max_back(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -1491,12 +1495,13 @@ extern "C" {
|
|||
|
||||
// alibi position embedding
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_alibi(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past,
|
||||
int n_head,
|
||||
float bias_max);
|
||||
float bias_max),
|
||||
"use ggml_soft_max_ext instead (will be removed in Mar 2024)");
|
||||
|
||||
// clamp
|
||||
// in-place, returns view(a)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue