Arrcttacsrks commited on
Commit
6aa6763
·
verified ·
1 Parent(s): 98f8d25

Upload llama.cpp/ggml/src/ggml-impl.h with huggingface_hub

Browse files
Files changed (1) hide show
  1. llama.cpp/ggml/src/ggml-impl.h +296 -0
llama.cpp/ggml/src/ggml-impl.h ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // GGML internal header
4
+
5
+ #include "ggml.h"
6
+
7
+ #include <assert.h>
8
+ #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
9
+ #include <stdbool.h>
10
+ #include <stdint.h>
11
+ #include <string.h>
12
+
13
+ #ifdef __cplusplus
14
+ extern "C" {
15
+ #endif
16
+
17
+ #undef MIN
18
+ #undef MAX
19
+
20
+ #define MIN(a, b) ((a) < (b) ? (a) : (b))
21
+ #define MAX(a, b) ((a) > (b) ? (a) : (b))
22
+
23
+ // required for mmap as gguf only guarantees 32-byte alignment
24
+ #define TENSOR_ALIGNMENT 32
25
+
26
+ // static_assert should be a #define, but if it's not,
27
+ // fall back to the _Static_assert C11 keyword.
28
+ // if C99 - static_assert is noop
29
+ // ref: https://stackoverflow.com/a/53923785/4039976
30
+ #ifndef __cplusplus
31
+ #ifndef static_assert
32
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
33
+ #define static_assert(cond, msg) _Static_assert(cond, msg)
34
+ #else
35
+ #define static_assert(cond, msg) struct global_scope_noop_trick
36
+ #endif
37
+ #endif
38
+ #endif
39
+
40
+ static inline int ggml_up32(int n) {
41
+ return (n + 31) & ~31;
42
+ }
43
+
44
+ //static inline int ggml_up64(int n) {
45
+ // return (n + 63) & ~63;
46
+ //}
47
+
48
+ static inline int ggml_up(int n, int m) {
49
+ // assert m is a power of 2
50
+ GGML_ASSERT((m & (m - 1)) == 0);
51
+ return (n + m - 1) & ~(m - 1);
52
+ }
53
+
54
+ //
55
+ // logging
56
+ //
57
+
58
+ GGML_ATTRIBUTE_FORMAT(2, 3)
59
+ void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
60
+ void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
61
+
62
+ #define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
63
+ #define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
64
+ #define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
65
+ #define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
66
+ #define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
67
+ #define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
68
+
69
+ #define GGML_DEBUG 0
70
+
71
+ #if (GGML_DEBUG >= 1)
72
+ #define GGML_PRINT_DEBUG(...) GGML_LOG_DEBUG(__VA_ARGS__)
73
+ #else
74
+ #define GGML_PRINT_DEBUG(...)
75
+ #endif
76
+
77
+ #if (GGML_DEBUG >= 5)
78
+ #define GGML_PRINT_DEBUG_5(...) GGML_LOG_DEBUG(__VA_ARGS__)
79
+ #else
80
+ #define GGML_PRINT_DEBUG_5(...)
81
+ #endif
82
+
83
+ #if (GGML_DEBUG >= 10)
84
+ #define GGML_PRINT_DEBUG_10(...) GGML_LOG_DEBUG(__VA_ARGS__)
85
+ #else
86
+ #define GGML_PRINT_DEBUG_10(...)
87
+ #endif
88
+
89
+ // tensor params
90
+
91
+ static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
92
+ GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
93
+ assert(params_size <= GGML_MAX_OP_PARAMS);
94
+ memcpy(tensor->op_params, params, params_size);
95
+ }
96
+
97
+ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
98
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
99
+ return ((const int32_t *)(tensor->op_params))[i];
100
+ }
101
+
102
+ static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
103
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
104
+ return ((const float *)(tensor->op_params))[i];
105
+ }
106
+
107
+ static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
108
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
109
+ ((int32_t *)(tensor->op_params))[i] = value;
110
+ }
111
+
112
+ static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
113
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
114
+ ((float *)(tensor->op_params))[i] = value;
115
+ }
116
+
117
+ struct ggml_map_custom1_op_params {
118
+ ggml_custom1_op_t fun;
119
+ int n_tasks;
120
+ void * userdata;
121
+ };
122
+
123
+
124
+ struct ggml_map_custom2_op_params {
125
+ ggml_custom2_op_t fun;
126
+ int n_tasks;
127
+ void * userdata;
128
+ };
129
+
130
+
131
+ struct ggml_map_custom3_op_params {
132
+ ggml_custom3_op_t fun;
133
+ int n_tasks;
134
+ void * userdata;
135
+ };
136
+
137
+ // bitset
138
+
139
+ typedef uint32_t ggml_bitset_t;
140
+
141
+ static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
142
+ #define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
143
+ #define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
144
+
145
+ static size_t ggml_bitset_size(size_t n) {
146
+ return (n + BITSET_MASK) >> BITSET_SHR;
147
+ }
148
+
149
+ static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) {
150
+ return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK)));
151
+ }
152
+
153
+ static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) {
154
+ bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK));
155
+ }
156
+
157
+ static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
158
+ bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK));
159
+ }
160
+
161
+ // hash set
162
+
163
+ #define GGML_HASHSET_FULL ((size_t)-1)
164
+ #define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
165
+
166
+ struct ggml_hash_set {
167
+ size_t size;
168
+ ggml_bitset_t * used; // whether or not the keys are in use i.e. set
169
+ struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
170
+ };
171
+
172
+ struct ggml_hash_set ggml_hash_set_new(size_t size);
173
+ void ggml_hash_set_free(struct ggml_hash_set * hash_set);
174
+
175
+ // returns the minimum size for a hash set that can hold min_sz elements
176
+ size_t ggml_hash_size(size_t min_sz);
177
+
178
+ // remove all elements from the hash set
179
+ void ggml_hash_set_reset(struct ggml_hash_set * hash_set);
180
+
181
+ // returns true if key is in the hash set
182
+ static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
183
+
184
+ // returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted
185
+ static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
186
+
187
+ // returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
188
+ static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
189
+
190
+ // return index, asserts if table is full
191
+ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
192
+
193
+ // hash function for ggml_tensor
194
+ static inline size_t ggml_hash(const struct ggml_tensor * p) {
195
+ // the last 4 bits are always zero due to alignment
196
+ return (size_t)(uintptr_t)p >> 4;
197
+ }
198
+
199
+ static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
200
+ size_t h = ggml_hash(key) % hash_set->size;
201
+
202
+ // linear probing
203
+ size_t i = h;
204
+ while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) {
205
+ i = (i + 1) % hash_set->size;
206
+ if (i == h) {
207
+ // visited all hash table entries -> not found
208
+ return GGML_HASHSET_FULL;
209
+ }
210
+ }
211
+ return i;
212
+ }
213
+
214
+ static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
215
+ size_t i = ggml_hash_find(hash_set, key);
216
+ return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i);
217
+ }
218
+
219
+ static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
220
+ size_t h = ggml_hash(key) % hash_set->size;
221
+
222
+ // linear probing
223
+ size_t i = h;
224
+ do {
225
+ if (!ggml_bitset_get(hash_set->used, i)) {
226
+ ggml_bitset_set(hash_set->used, i);
227
+ hash_set->keys[i] = key;
228
+ return i;
229
+ }
230
+ if (hash_set->keys[i] == key) {
231
+ return GGML_HASHSET_ALREADY_EXISTS;
232
+ }
233
+ i = (i + 1) % hash_set->size;
234
+ } while (i != h);
235
+
236
+ // visited all hash table entries -> not found
237
+ GGML_ABORT("fatal error");
238
+ }
239
+
240
+ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
241
+ size_t h = ggml_hash(key) % hash_set->size;
242
+
243
+ // linear probing
244
+ size_t i = h;
245
+ do {
246
+ if (!ggml_bitset_get(hash_set->used, i)) {
247
+ ggml_bitset_set(hash_set->used, i);
248
+ hash_set->keys[i] = key;
249
+ return i;
250
+ }
251
+ if (hash_set->keys[i] == key) {
252
+ return i;
253
+ }
254
+ i = (i + 1) % hash_set->size;
255
+ } while (i != h);
256
+
257
+ // visited all hash table entries -> not found
258
+ GGML_ABORT("fatal error");
259
+ }
260
+
261
+ // computation graph
262
+
263
+ enum ggml_cgraph_eval_order {
264
+ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
265
+ GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
266
+ GGML_CGRAPH_EVAL_ORDER_COUNT
267
+ };
268
+
269
+ struct ggml_cgraph {
270
+ int size;
271
+ int n_nodes;
272
+ int n_leafs;
273
+
274
+ struct ggml_tensor ** nodes;
275
+ struct ggml_tensor ** grads;
276
+ struct ggml_tensor ** leafs;
277
+
278
+ struct ggml_hash_set visited_hash_set;
279
+
280
+ enum ggml_cgraph_eval_order order;
281
+ };
282
+
283
+ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
284
+
285
+ // Memory allocation
286
+
287
+ void * ggml_aligned_malloc(size_t size);
288
+ void ggml_aligned_free(void * ptr, size_t size);
289
+
290
+ // TODO: move to threading file
291
+ void ggml_critical_section_start(void);
292
+ void ggml_critical_section_end(void);
293
+
294
+ #ifdef __cplusplus
295
+ }
296
+ #endif