quantize : handle user-defined pruning of whole layers (blocks) (#13037)

This commit is contained in:
Ed Addario 2025-06-22 22:16:26 +01:00 committed by GitHub
parent 238005c2dc
commit fa4a9f2a1c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 119 additions and 9 deletions

View file

@ -390,6 +390,7 @@ extern "C" {
void * imatrix; // pointer to importance matrix data
void * kv_overrides; // pointer to vector containing overrides
void * tensor_types; // pointer to vector containing tensor types
void * prune_layers; // pointer to vector containing layer indices to prune
} llama_model_quantize_params;
typedef struct llama_logit_bias {