# WORK IN PROGRESS | |
# DONT USE THIS IN PRODUCTION | |
{ | |
"modelProfile": { | |
"fileName": "config.aiml", | |
"description": "Contains all relevant configuration parameters, properties, and rules for securely deploying the AI model without hassle. Imagine a Dockerfile, but for AI service deployment and inference." | |
}, | |
"generalInformation": { | |
"id": 1, | |
"name": "[jphme/Llama-2-13b-chat-german](https://huggingface.co./jphme/Llama-2-13b-chat-german)", | |
"creator": "[jphme](https://huggingface.co./jphme)", | |
"sourceUrl": "https://huggingface.co./" | |
}, | |
"modelSpecifications": { | |
"type": "Large Language Model", | |
"pipeline": "Text Generation", | |
"architecture": "Transformers", | |
"variables": { | |
"llmLanguages": "en,de,nl,it,fr", | |
"llmFlavor": "llama", | |
"llmPromptTemplate": "llama2", | |
"devices": "gpu[0,1,2,3],cpu[0]", | |
"key": "value" | |
}, | |
"filetype": "GGUF", | |
"inferenceTools": ["Llama.cpp", "Text Generation Inference (TGI)", "h2oGPT Server", "KoboldCpp", "Custom"], | |
"compression": ["8 Bit", "5 Bit (K_M)", "4 Bit (K_M)"], | |
"compressionMethod": "llama.cpp - convert.py Script", | |
"notes": "First, a FP16 GGUF file was generated, and then quantized it to 8, 4 (K_M) and 5 (K_M) Bit with llama.cpp/quantize" | |
}, | |
"customization": { | |
"type": "finetune_full", | |
"class": ["Instruct", "Chat"], | |
"datasets": [ | |
"[Prorietary German Conversation Dataset](https://placeholder.ocal/dataset)", | |
"[German & German legal SQuAD](https://placeholder.local/dataset)" | |
], | |
"notes": "The datasets were augmented with rows containing 'wrong' contexts, in order to improve factual RAG performance." | |
}, | |
"runInstructions": { | |
"startModel": "#/bin/sh\nchmod +x run.sh && ./run.sh\n# This is an example. Functioning run.sh Script to be published soon", | |
"stopModel": "# Coming soon, todo" | |
} | |
} | |