morgendigital
/

Llama-2-13b-chat-german-GGUF

@@ -1,55 +1,45 @@
-## This is an experimental example of an .aiml file.
-# Everything you see hereis a work in progress, dont use this in production. Please.
 {
-  "aiml": {
-    "version": "0.1.1",
-    "last_modified": "September 9th 2023, 01:30",
-    "properties": {
-      "purpose": "text-generation-llm",
-      "custom_key": "custom_value"
-    }
-  },
   "modelProfile": {
-    "model": {
-      "name": "jphme/llama-2-13b-chat-german",
-      "purpose": "text-generation",
-      "created_by": "hf.co/jphme",
-      "category": "llm",
-      "class": "llama2",
-      "architecture": "transformers",
-      "finetuned": "yes",
-      "finetuned_type": "chat, instruct",
-      "[category]_metadata": {    # e.g. "llm_metadata", specific metadata for model type
-        "parameters": "70b",
-        "architecture": "transformers",
-        "fileFormats": "gguf, ggmlv3, gptq",
-        "custom_key": "custom_value"
-      }
-    },
-    "finetuning": {
-      "method": "full",
-      "datasets": [
-        "Prorietary german conversation dataset - [View Dataset](https://huggingface.co/datasets)"",
-        "German Squad - [View Dataset](https://huggingface.co/datasets)",
-        "German Legal Squad - [View Dataset](https://huggingface.co/datasets)"",
-      ],
-      "description": "According to the model creator, the dataset was finetuned with several instruct-based datasets in German language and augmented with false RAG data to improve model factuality.""
-    },
-    "quantization": {
-      "types": ["8 Bit", "5 Bit (k_m)"],
-      "formats": ["GGUF"],
-      "tools_used": "llama.cpp (commit 9e20231) - for quantization to 8, 5 and 4 bit"
     },
-    "deployment": {
-      "id": "1",
-      "title": "Bare Metal - Only CPU (64 Bit Intel/AMD/ARM)",
-      "description": "Use run.sh to deploy the model. Note: The run.sh script is not yet published in this repo. It will be made available within the next few days."
-      "cpu_architecture": "x86-64, arm64",
-      "os": "Mac OS, Linux (Ubuntu), Linux (Debian), Windows",
-      "install": "chmod +x install.sh && ./install.sh",
-      "start": "./run.sh",
-      "stop": "systemctl stop run_aiml.service",
-    }
   }
 }

+# WORK IN PROGRESS
+# DONT USE THIS IN PRODUCTIOn
 {
   "modelProfile": {
+    "fileName": "config.aiml",
+    "description": "Contains all relevant configuration parameters, properties, and rules for securely deploying the AI model without hassle. Imagine a Dockerfile, but for AI service deployment and inference."
+  },
+  "generalInformation": {
+    "id": 1,
+    "name": "[jphme/Llama-2-13b-chat-german](https://huggingface.co/jphme/Llama-2-13b-chat-german)",
+    "creator": "[jphme](https://huggingface.co/jphme)",
+    "sourceUrl": "https://huggingface.co/"
+  },
+  "modelSpecifications": {
+    "type": "Large Language Model",
+    "pipeline": "Text Generation",
+    "architecture": "Transformers",
+    "variables": {
+      "llmLanguages": "en,de,nl,it,fr",
+      "llmFlavor": "llama",
+      "llmPromptTemplate": "llama2",
+      "devices": "gpu[0,1,2,3],cpu[0]",
+      "key": "value"
     },
+    "filetype": "GGUF",
+    "inferenceTools": ["Llama.cpp", "Text Generation Inference (TGI)", "h2oGPT Server", "KoboldCpp", "Custom"],
+    "compression": ["8 Bit", "5 Bit (K_M)", "4 Bit (K_M)"],
+    "compressionMethod": "llama.cpp - convert.py Script",
+    "notes": "First, a FP16 GGUF file was generated, and then quantized it to 8, 4 (K_M) and 5 (K_M) Bit with llama.cpp/quantize"
+  },
+  "customization": {
+    "type": "finetune_full",
+    "class": ["Instruct", "Chat"],
+    "datasets": [
+      "[Prorietary German Conversation Dataset](https://placeholder.ocal/dataset)",
+      "[German & German legal SQuAD](https://placeholder.local/dataset)"
+    ],
+    "notes": "The datasets were augmented with rows containing 'wrong' contexts, in order to improve factual RAG performance."
+  },
+  "runInstructions": {
+    "startModel": "#/bin/sh\nchmod +x run.sh && ./run.sh\n# This is an example. Functioning run.sh Script to be published soon",
+    "stopModel": "# Coming soon, todo"
   }
 }