quantize : handle user-defined pruning of whole layers (blocks) (#13037)
This commit is contained in:
parent
238005c2dc
commit
fa4a9f2a1c
3 changed files with 119 additions and 9 deletions
|
@ -390,6 +390,7 @@ extern "C" {
|
|||
void * imatrix; // pointer to importance matrix data
|
||||
void * kv_overrides; // pointer to vector containing overrides
|
||||
void * tensor_types; // pointer to vector containing tensor types
|
||||
void * prune_layers; // pointer to vector containing layer indices to prune
|
||||
} llama_model_quantize_params;
|
||||
|
||||
typedef struct llama_logit_bias {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue