Arrcttacsrks commited on
Commit
ed35717
·
verified ·
1 Parent(s): af6ddf2

Upload llama.cpp/ggml/src/ggml-aarch64.h with huggingface_hub

Browse files
Files changed (1) hide show
  1. llama.cpp/ggml/src/ggml-aarch64.h +39 -0
llama.cpp/ggml/src/ggml-aarch64.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // SPDX-FileCopyrightText: Copyright 2024 Arm Ltd.
2
+ #pragma once
3
+
4
+ #define GGML_COMMON_DECL_C
5
+ #include "ggml-common.h"
6
+
7
+ #include "ggml.h"
8
+
9
+ // GGML internal header
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ // Quantization
16
+ void quantize_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
17
+ void quantize_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
18
+
19
+ void quantize_mat_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nrows, int64_t n_per_row, int64_t blck_size_interleave);
20
+
21
+ // Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
22
+ size_t quantize_q4_0_4x4(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
23
+ size_t quantize_q4_0_4x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
24
+ size_t quantize_q4_0_8x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
25
+
26
+ // GEMV
27
+ void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
28
+ void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
29
+ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
30
+
31
+ // GEMM
32
+ void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
33
+ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
34
+ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
35
+
36
+ #ifdef __cplusplus
37
+ }
38
+ #endif
39
+