freefallr's picture
Update config.aiml
97cd8b7
raw
history blame
1.87 kB
# WORK IN PROGRESS
# DONT USE THIS IN PRODUCTION
{
"modelProfile": {
"fileName": "config.aiml",
"description": "Contains all relevant configuration parameters, properties, and rules for securely deploying the AI model without hassle. Imagine a Dockerfile, but for AI service deployment and inference."
},
"generalInformation": {
"id": 1,
"name": "[jphme/Llama-2-13b-chat-german](https://huggingface.co./jphme/Llama-2-13b-chat-german)",
"creator": "[jphme](https://huggingface.co./jphme)",
"sourceUrl": "https://huggingface.co./"
},
"modelSpecifications": {
"type": "Large Language Model",
"pipeline": "Text Generation",
"architecture": "Transformers",
"variables": {
"llmLanguages": "en,de,nl,it,fr",
"llmFlavor": "llama",
"llmPromptTemplate": "llama2",
"devices": "gpu[0,1,2,3],cpu[0]",
"key": "value"
},
"filetype": "GGUF",
"inferenceTools": ["Llama.cpp", "Text Generation Inference (TGI)", "h2oGPT Server", "KoboldCpp", "Custom"],
"compression": ["8 Bit", "5 Bit (K_M)", "4 Bit (K_M)"],
"compressionMethod": "llama.cpp - convert.py Script",
"notes": "First, a FP16 GGUF file was generated, and then quantized it to 8, 4 (K_M) and 5 (K_M) Bit with llama.cpp/quantize"
},
"customization": {
"type": "finetune_full",
"class": ["Instruct", "Chat"],
"datasets": [
"[Prorietary German Conversation Dataset](https://placeholder.ocal/dataset)",
"[German & German legal SQuAD](https://placeholder.local/dataset)"
],
"notes": "The datasets were augmented with rows containing 'wrong' contexts, in order to improve factual RAG performance."
},
"runInstructions": {
"startModel": "#/bin/sh\nchmod +x run.sh && ./run.sh\n# This is an example. Functioning run.sh Script to be published soon",
"stopModel": "# Coming soon, todo"
}
}