sync : ggml (Metal F32 support + reduce ggml-alloc size) (#3192)

* sync : ggml (Metal F32 support + reduce ggml-alloc size)

ggml-ci

* llama-bench : fix ggml_cpu_has_metal() duplicate function

ggml-ci
This commit is contained in:
Georgi Gerganov 2023-09-15 19:06:03 +03:00 committed by GitHub
parent 7e50d34be6
commit 8c00b7a6ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 193 additions and 90 deletions

88
ggml.c
View file

@ -17294,10 +17294,18 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
} else {
// wait for other threads to finish
const int last = node_n;
do {
//sched_yield();
while (true) {
// TODO: this sched_yield can have significant impact on the performance - either positive or negative
// depending on the workload and the operating system.
// since it is not clear what is the best approach, it should potentially become user-configurable
// ref: https://github.com/ggerganov/ggml/issues/291
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
sched_yield();
#endif
node_n = atomic_load(&state->shared->node_n);
} while (node_n == last);
if (node_n != last) break;
};
}
// check if we should stop
@ -18348,7 +18356,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
for (int i = 0; i < cgraph->n_leafs; i++) {
struct ggml_tensor * node = cgraph->leafs[i];
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
i,
node->ne[0], node->ne[1],
ggml_op_name(node->op),
@ -20111,27 +20119,27 @@ const char * gguf_type_name(enum gguf_type type) {
return GGUF_TYPE_NAME[type];
}
int gguf_get_version(struct gguf_context * ctx) {
int gguf_get_version(const struct gguf_context * ctx) {
return ctx->header.version;
}
size_t gguf_get_alignment(struct gguf_context * ctx) {
size_t gguf_get_alignment(const struct gguf_context * ctx) {
return ctx->alignment;
}
size_t gguf_get_data_offset(struct gguf_context * ctx) {
size_t gguf_get_data_offset(const struct gguf_context * ctx) {
return ctx->offset;
}
void * gguf_get_data(struct gguf_context * ctx) {
void * gguf_get_data(const struct gguf_context * ctx) {
return ctx->data;
}
int gguf_get_n_kv(struct gguf_context * ctx) {
int gguf_get_n_kv(const struct gguf_context * ctx) {
return ctx->header.n_kv;
}
int gguf_find_key(struct gguf_context * ctx, const char * key) {
int gguf_find_key(const struct gguf_context * ctx, const char * key) {
// return -1 if key not found
int keyfound = -1;
@ -20147,85 +20155,85 @@ int gguf_find_key(struct gguf_context * ctx, const char * key) {
return keyfound;
}
const char * gguf_get_key(struct gguf_context * ctx, int i) {
const char * gguf_get_key(const struct gguf_context * ctx, int i) {
return ctx->kv[i].key.data;
}
enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) {
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int i) {
return ctx->kv[i].type;
}
enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) {
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.arr.type;
}
const void * gguf_get_arr_data(struct gguf_context * ctx, int i) {
const void * gguf_get_arr_data(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.arr.data;
}
const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) {
const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
struct gguf_kv * kv = &ctx->kv[key_id];
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
return str->data;
}
int gguf_get_arr_n(struct gguf_context * ctx, int i) {
int gguf_get_arr_n(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.arr.n;
}
uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) {
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.uint8;
}
int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) {
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.int8;
}
uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) {
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.uint16;
}
int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) {
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.int16;
}
uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) {
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.uint32;
}
int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) {
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.int32;
}
float gguf_get_val_f32(struct gguf_context * ctx, int i) {
float gguf_get_val_f32(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.float32;
}
uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.uint64;
}
int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.int64;
}
double gguf_get_val_f64(struct gguf_context * ctx, int i) {
double gguf_get_val_f64(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.float64;
}
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
bool gguf_get_val_bool(const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.bool_;
}
const char * gguf_get_val_str (struct gguf_context * ctx, int i) {
const char * gguf_get_val_str (const struct gguf_context * ctx, int i) {
return ctx->kv[i].value.str.data;
}
int gguf_get_n_tensors(struct gguf_context * ctx) {
int gguf_get_n_tensors(const struct gguf_context * ctx) {
return ctx->header.n_tensors;
}
int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
// return -1 if tensor not found
int tensorfound = -1;
@ -20241,11 +20249,11 @@ int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
return tensorfound;
}
size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) {
size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
return ctx->infos[i].offset;
}
char * gguf_get_tensor_name(struct gguf_context * ctx, int i) {
char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
return ctx->infos[i].name.data;
}
@ -20528,7 +20536,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
buf->offset += el_size;
}
static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
// write header
gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
@ -20643,7 +20651,7 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
}
}
void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta) {
void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
FILE * file = fopen(fname, "wb");
if (!file) {
GGML_ASSERT(false && "failed to open file for writing");
@ -20660,7 +20668,7 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only
fclose(file);
}
size_t gguf_get_meta_size(struct gguf_context * ctx) {
size_t gguf_get_meta_size(const struct gguf_context * ctx) {
// no allocs - only compute size
struct gguf_buf buf = gguf_buf_init(0);
@ -20669,7 +20677,7 @@ size_t gguf_get_meta_size(struct gguf_context * ctx) {
return buf.offset;
}
void gguf_get_meta_data(struct gguf_context * ctx, void * data) {
void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
struct gguf_buf buf = gguf_buf_init(16*1024);
gguf_write_to_buf(ctx, &buf, true);
@ -20745,6 +20753,14 @@ int ggml_cpu_has_arm_fma(void) {
#endif
}
int ggml_cpu_has_metal(void) {
#if defined(GGML_USE_METAL)
return 1;
#else
return 0;
#endif
}
int ggml_cpu_has_f16c(void) {
#if defined(__F16C__)
return 1;