File size: 1,873 Bytes

da6dd20
97cd8b7
debadec
ef9950d
 
da6dd20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef9950d
da6dd20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef9950d

# WORK IN PROGRESS
# DONT USE THIS IN PRODUCTION

{
  "modelProfile": {
    "fileName": "config.aiml",
    "description": "Contains all relevant configuration parameters, properties, and rules for securely deploying the AI model without hassle. Imagine a Dockerfile, but for AI service deployment and inference."
  },
  "generalInformation": {
    "id": 1,
    "name": "[jphme/Llama-2-13b-chat-german](https://huggingface.co./jphme/Llama-2-13b-chat-german)",
    "creator": "[jphme](https://huggingface.co./jphme)",
    "sourceUrl": "https://huggingface.co./"
  },
  "modelSpecifications": {
    "type": "Large Language Model",
    "pipeline": "Text Generation",
    "architecture": "Transformers",
    "variables": {
      "llmLanguages": "en,de,nl,it,fr",
      "llmFlavor": "llama",
      "llmPromptTemplate": "llama2",
      "devices": "gpu[0,1,2,3],cpu[0]",
      "key": "value"
    },
    "filetype": "GGUF",
    "inferenceTools": ["Llama.cpp", "Text Generation Inference (TGI)", "h2oGPT Server", "KoboldCpp", "Custom"],
    "compression": ["8 Bit", "5 Bit (K_M)", "4 Bit (K_M)"],
    "compressionMethod": "llama.cpp - convert.py Script",
    "notes": "First, a FP16 GGUF file was generated, and then quantized it to 8, 4 (K_M) and 5 (K_M) Bit with llama.cpp/quantize"
  },
  "customization": {
    "type": "finetune_full",
    "class": ["Instruct", "Chat"],
    "datasets": [
      "[Prorietary German Conversation Dataset](https://placeholder.ocal/dataset)",
      "[German & German legal SQuAD](https://placeholder.local/dataset)"
    ],
    "notes": "The datasets were augmented with rows containing 'wrong' contexts, in order to improve factual RAG performance."
  },
  "runInstructions": {
    "startModel": "#/bin/sh\nchmod +x run.sh && ./run.sh\n# This is an example. Functioning run.sh Script to be published soon",
    "stopModel": "# Coming soon, todo"
  }
}