freefallr commited on
Commit
da6dd20
·
1 Parent(s): c71e477

Update config.aiml

Browse files
Files changed (1) hide show
  1. config.aiml +39 -49
config.aiml CHANGED
@@ -1,55 +1,45 @@
1
- ## This is an experimental example of an .aiml file.
2
- # Everything you see hereis a work in progress, dont use this in production. Please.
3
 
4
  {
5
- "aiml": {
6
- "version": "0.1.1",
7
- "last_modified": "September 9th 2023, 01:30",
8
- "properties": {
9
- "purpose": "text-generation-llm",
10
- "custom_key": "custom_value"
11
- }
12
- },
13
  "modelProfile": {
14
- "model": {
15
- "name": "jphme/llama-2-13b-chat-german",
16
- "purpose": "text-generation",
17
- "created_by": "hf.co/jphme",
18
- "category": "llm",
19
- "class": "llama2",
20
- "architecture": "transformers",
21
- "finetuned": "yes",
22
- "finetuned_type": "chat, instruct",
23
- "[category]_metadata": { # e.g. "llm_metadata", specific metadata for model type
24
- "parameters": "70b",
25
- "architecture": "transformers",
26
- "fileFormats": "gguf, ggmlv3, gptq",
27
- "custom_key": "custom_value"
28
- }
29
- },
30
- "finetuning": {
31
- "method": "full",
32
- "datasets": [
33
- "Prorietary german conversation dataset - [View Dataset](https://huggingface.co/datasets)"",
34
- "German Squad - [View Dataset](https://huggingface.co/datasets)",
35
- "German Legal Squad - [View Dataset](https://huggingface.co/datasets)"",
36
- ],
37
- "description": "According to the model creator, the dataset was finetuned with several instruct-based datasets in German language and augmented with false RAG data to improve model factuality.""
38
- },
39
- "quantization": {
40
- "types": ["8 Bit", "5 Bit (k_m)"],
41
- "formats": ["GGUF"],
42
- "tools_used": "llama.cpp (commit 9e20231) - for quantization to 8, 5 and 4 bit"
43
  },
44
- "deployment": {
45
- "id": "1",
46
- "title": "Bare Metal - Only CPU (64 Bit Intel/AMD/ARM)",
47
- "description": "Use run.sh to deploy the model. Note: The run.sh script is not yet published in this repo. It will be made available within the next few days."
48
- "cpu_architecture": "x86-64, arm64",
49
- "os": "Mac OS, Linux (Ubuntu), Linux (Debian), Windows",
50
- "install": "chmod +x install.sh && ./install.sh",
51
- "start": "./run.sh",
52
- "stop": "systemctl stop run_aiml.service",
53
- }
 
 
 
 
 
 
 
 
54
  }
55
  }
 
1
+ # WORK IN PROGRESS
2
+ # DONT USE THIS IN PRODUCTIOn
3
 
4
  {
 
 
 
 
 
 
 
 
5
  "modelProfile": {
6
+ "fileName": "config.aiml",
7
+ "description": "Contains all relevant configuration parameters, properties, and rules for securely deploying the AI model without hassle. Imagine a Dockerfile, but for AI service deployment and inference."
8
+ },
9
+ "generalInformation": {
10
+ "id": 1,
11
+ "name": "[jphme/Llama-2-13b-chat-german](https://huggingface.co/jphme/Llama-2-13b-chat-german)",
12
+ "creator": "[jphme](https://huggingface.co/jphme)",
13
+ "sourceUrl": "https://huggingface.co/"
14
+ },
15
+ "modelSpecifications": {
16
+ "type": "Large Language Model",
17
+ "pipeline": "Text Generation",
18
+ "architecture": "Transformers",
19
+ "variables": {
20
+ "llmLanguages": "en,de,nl,it,fr",
21
+ "llmFlavor": "llama",
22
+ "llmPromptTemplate": "llama2",
23
+ "devices": "gpu[0,1,2,3],cpu[0]",
24
+ "key": "value"
 
 
 
 
 
 
 
 
 
 
25
  },
26
+ "filetype": "GGUF",
27
+ "inferenceTools": ["Llama.cpp", "Text Generation Inference (TGI)", "h2oGPT Server", "KoboldCpp", "Custom"],
28
+ "compression": ["8 Bit", "5 Bit (K_M)", "4 Bit (K_M)"],
29
+ "compressionMethod": "llama.cpp - convert.py Script",
30
+ "notes": "First, a FP16 GGUF file was generated, and then quantized it to 8, 4 (K_M) and 5 (K_M) Bit with llama.cpp/quantize"
31
+ },
32
+ "customization": {
33
+ "type": "finetune_full",
34
+ "class": ["Instruct", "Chat"],
35
+ "datasets": [
36
+ "[Prorietary German Conversation Dataset](https://placeholder.ocal/dataset)",
37
+ "[German & German legal SQuAD](https://placeholder.local/dataset)"
38
+ ],
39
+ "notes": "The datasets were augmented with rows containing 'wrong' contexts, in order to improve factual RAG performance."
40
+ },
41
+ "runInstructions": {
42
+ "startModel": "#/bin/sh\nchmod +x run.sh && ./run.sh\n# This is an example. Functioning run.sh Script to be published soon",
43
+ "stopModel": "# Coming soon, todo"
44
  }
45
  }