Update config.aiml
Browse files- config.aiml +39 -49
config.aiml
CHANGED
@@ -1,55 +1,45 @@
|
|
1 |
-
|
2 |
-
#
|
3 |
|
4 |
{
|
5 |
-
"aiml": {
|
6 |
-
"version": "0.1.1",
|
7 |
-
"last_modified": "September 9th 2023, 01:30",
|
8 |
-
"properties": {
|
9 |
-
"purpose": "text-generation-llm",
|
10 |
-
"custom_key": "custom_value"
|
11 |
-
}
|
12 |
-
},
|
13 |
"modelProfile": {
|
14 |
-
"
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
"
|
32 |
-
"
|
33 |
-
"Prorietary german conversation dataset - [View Dataset](https://huggingface.co/datasets)"",
|
34 |
-
"German Squad - [View Dataset](https://huggingface.co/datasets)",
|
35 |
-
"German Legal Squad - [View Dataset](https://huggingface.co/datasets)"",
|
36 |
-
],
|
37 |
-
"description": "According to the model creator, the dataset was finetuned with several instruct-based datasets in German language and augmented with false RAG data to improve model factuality.""
|
38 |
-
},
|
39 |
-
"quantization": {
|
40 |
-
"types": ["8 Bit", "5 Bit (k_m)"],
|
41 |
-
"formats": ["GGUF"],
|
42 |
-
"tools_used": "llama.cpp (commit 9e20231) - for quantization to 8, 5 and 4 bit"
|
43 |
},
|
44 |
-
"
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
}
|
55 |
}
|
|
|
1 |
+
# WORK IN PROGRESS
|
2 |
+
# DONT USE THIS IN PRODUCTIOn
|
3 |
|
4 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"modelProfile": {
|
6 |
+
"fileName": "config.aiml",
|
7 |
+
"description": "Contains all relevant configuration parameters, properties, and rules for securely deploying the AI model without hassle. Imagine a Dockerfile, but for AI service deployment and inference."
|
8 |
+
},
|
9 |
+
"generalInformation": {
|
10 |
+
"id": 1,
|
11 |
+
"name": "[jphme/Llama-2-13b-chat-german](https://huggingface.co/jphme/Llama-2-13b-chat-german)",
|
12 |
+
"creator": "[jphme](https://huggingface.co/jphme)",
|
13 |
+
"sourceUrl": "https://huggingface.co/"
|
14 |
+
},
|
15 |
+
"modelSpecifications": {
|
16 |
+
"type": "Large Language Model",
|
17 |
+
"pipeline": "Text Generation",
|
18 |
+
"architecture": "Transformers",
|
19 |
+
"variables": {
|
20 |
+
"llmLanguages": "en,de,nl,it,fr",
|
21 |
+
"llmFlavor": "llama",
|
22 |
+
"llmPromptTemplate": "llama2",
|
23 |
+
"devices": "gpu[0,1,2,3],cpu[0]",
|
24 |
+
"key": "value"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
},
|
26 |
+
"filetype": "GGUF",
|
27 |
+
"inferenceTools": ["Llama.cpp", "Text Generation Inference (TGI)", "h2oGPT Server", "KoboldCpp", "Custom"],
|
28 |
+
"compression": ["8 Bit", "5 Bit (K_M)", "4 Bit (K_M)"],
|
29 |
+
"compressionMethod": "llama.cpp - convert.py Script",
|
30 |
+
"notes": "First, a FP16 GGUF file was generated, and then quantized it to 8, 4 (K_M) and 5 (K_M) Bit with llama.cpp/quantize"
|
31 |
+
},
|
32 |
+
"customization": {
|
33 |
+
"type": "finetune_full",
|
34 |
+
"class": ["Instruct", "Chat"],
|
35 |
+
"datasets": [
|
36 |
+
"[Prorietary German Conversation Dataset](https://placeholder.ocal/dataset)",
|
37 |
+
"[German & German legal SQuAD](https://placeholder.local/dataset)"
|
38 |
+
],
|
39 |
+
"notes": "The datasets were augmented with rows containing 'wrong' contexts, in order to improve factual RAG performance."
|
40 |
+
},
|
41 |
+
"runInstructions": {
|
42 |
+
"startModel": "#/bin/sh\nchmod +x run.sh && ./run.sh\n# This is an example. Functioning run.sh Script to be published soon",
|
43 |
+
"stopModel": "# Coming soon, todo"
|
44 |
}
|
45 |
}
|