Arrcttacsrks commited on
Commit
dfb12c6
·
verified ·
1 Parent(s): 4839697

Upload llama.cpp/ggml/src/ggml-backend-impl.h with huggingface_hub

Browse files
llama.cpp/ggml/src/ggml-backend-impl.h ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // ggml-backend internal header
4
+
5
+ #include "ggml-backend.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ //
12
+ // Backend buffer type
13
+ //
14
+
15
+ struct ggml_backend_buffer_type_i {
16
+ const char * (*get_name) (ggml_backend_buffer_type_t buft);
17
+ // allocate a buffer of this type
18
+ ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
19
+ // tensor alignment
20
+ size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
21
+ // (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
22
+ size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
23
+ // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
24
+ size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
25
+ // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
26
+ bool (*is_host) (ggml_backend_buffer_type_t buft);
27
+ };
28
+
29
+ struct ggml_backend_buffer_type {
30
+ struct ggml_backend_buffer_type_i iface;
31
+ ggml_backend_dev_t device;
32
+ void * context;
33
+ };
34
+
35
+ //
36
+ // Backend buffer
37
+ //
38
+
39
+ struct ggml_backend_buffer_i {
40
+ // (optional) free the buffer
41
+ void (*free_buffer) (ggml_backend_buffer_t buffer);
42
+ // base address of the buffer
43
+ void * (*get_base) (ggml_backend_buffer_t buffer);
44
+ // (optional) initialize a tensor in the buffer (eg. add tensor extras)
45
+ void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
46
+ // tensor data access
47
+ void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
48
+ void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
49
+ void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
50
+ // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
51
+ bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
52
+ // clear the entire buffer
53
+ void (*clear) (ggml_backend_buffer_t buffer, uint8_t value);
54
+ // (optional) reset any internal state due to tensor initialization, such as tensor extras
55
+ void (*reset) (ggml_backend_buffer_t buffer);
56
+ };
57
+
58
+ struct ggml_backend_buffer {
59
+ struct ggml_backend_buffer_i iface;
60
+ ggml_backend_buffer_type_t buft;
61
+ void * context;
62
+ size_t size;
63
+ enum ggml_backend_buffer_usage usage;
64
+ };
65
+
66
+ ggml_backend_buffer_t ggml_backend_buffer_init(
67
+ ggml_backend_buffer_type_t buft,
68
+ struct ggml_backend_buffer_i iface,
69
+ void * context,
70
+ size_t size);
71
+
72
+ // do not use directly, use ggml_backend_tensor_copy instead
73
+ bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
74
+
75
+ // multi-buffer
76
+ // buffer that contains a collection of buffers
77
+ ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
78
+ bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
79
+ void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
80
+
81
+ //
82
+ // Backend (stream)
83
+ //
84
+
85
+ struct ggml_backend_i {
86
+ const char * (*get_name)(ggml_backend_t backend);
87
+
88
+ void (*free)(ggml_backend_t backend);
89
+
90
+ // (optional) asynchronous tensor data access
91
+ void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
92
+ void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
93
+ bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
94
+
95
+ // (optional) complete all pending operations (required if the backend supports async operations)
96
+ void (*synchronize)(ggml_backend_t backend);
97
+
98
+ // (optional) graph plans (not used currently)
99
+ // compute graph with a plan
100
+ ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
101
+ void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
102
+ // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
103
+ void (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph);
104
+ // compute the graph with the plan
105
+ enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
106
+
107
+ // compute graph (always async if supported by the backend)
108
+ enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
109
+
110
+ // (optional) event synchronization
111
+ // record an event on this stream
112
+ void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
113
+ // wait for an event on on a different stream
114
+ void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
115
+ };
116
+
117
+ struct ggml_backend {
118
+ ggml_guid_t guid;
119
+ struct ggml_backend_i iface;
120
+ ggml_backend_dev_t device;
121
+ void * context;
122
+ };
123
+
124
+ struct ggml_backend_event {
125
+ struct ggml_backend_device * device;
126
+ void * context;
127
+ };
128
+
129
+ //
130
+ // Backend device
131
+ //
132
+
133
+ // Note: if additional properties are needed, we should add a struct with all of them
134
+ // the current functions to obtain the properties can remain, since they are more convenient for often used properties
135
+ struct ggml_backend_device_i {
136
+ // device name: short identifier for this device, such as "CPU" or "CUDA0"
137
+ const char * (*get_name)(ggml_backend_dev_t dev);
138
+
139
+ // device description: short informative description of the device, could be the model name
140
+ const char * (*get_description)(ggml_backend_dev_t dev);
141
+
142
+ // device memory in bytes
143
+ void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total);
144
+
145
+ // device type
146
+ enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev);
147
+
148
+ // device properties
149
+ void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props);
150
+
151
+ // backend (stream) initialization
152
+ ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params);
153
+
154
+ // preferred buffer type
155
+ ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev);
156
+
157
+ // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device)
158
+ ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev);
159
+
160
+ // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries)
161
+ ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size);
162
+
163
+ // check if the backend can compute an operation
164
+ bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
165
+
166
+ // check if the backend can use tensors allocated in a buffer type
167
+ bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
168
+
169
+ // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
170
+ // these should be expensive operations that may benefit from running on this backend instead of the CPU backend
171
+ bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
172
+
173
+ // (optional) event synchronization
174
+ ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev);
175
+ void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event);
176
+ void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event);
177
+ };
178
+
179
+ struct ggml_backend_device {
180
+ struct ggml_backend_device_i iface;
181
+ ggml_backend_reg_t reg;
182
+ void * context;
183
+ };
184
+
185
+ //
186
+ // Backend (reg)
187
+ //
188
+
189
+ struct ggml_backend_reg_i {
190
+ const char * (*get_name)(ggml_backend_reg_t reg);
191
+
192
+ // enumerate available devices
193
+ size_t (*get_device_count)(ggml_backend_reg_t reg);
194
+ ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index);
195
+
196
+ // (optional) get a pointer to a function in the backend
197
+ // backends can add custom functions that are not part of the standard ggml-backend interface
198
+ void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name);
199
+ };
200
+
201
+ struct ggml_backend_reg {
202
+ // int api_version; // TODO: for dynamic loading
203
+ struct ggml_backend_reg_i iface;
204
+ void * context;
205
+ };
206
+
207
+
208
+ // Internal backend registry API
209
+ void ggml_backend_register(ggml_backend_reg_t reg);
210
+ void ggml_backend_device_register(ggml_backend_dev_t device);
211
+ // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
212
+ // typedef ggml_backend_register_t * (*ggml_backend_init)(void);
213
+
214
+ #ifdef __cplusplus
215
+ }
216
+ #endif