# WORK IN PROGRESS # DONT USE THIS IN PRODUCTION { "modelProfile": { "fileName": "config.aiml", "description": "Contains all relevant configuration parameters, properties, and rules for securely deploying the AI model without hassle. Imagine a Dockerfile, but for AI service deployment and inference." }, "generalInformation": { "id": 1, "name": "[jphme/Llama-2-13b-chat-german](https://huggingface.co./jphme/Llama-2-13b-chat-german)", "creator": "[jphme](https://huggingface.co./jphme)", "sourceUrl": "https://huggingface.co./" }, "modelSpecifications": { "type": "Large Language Model", "pipeline": "Text Generation", "architecture": "Transformers", "variables": { "llmLanguages": "en,de,nl,it,fr", "llmFlavor": "llama", "llmPromptTemplate": "llama2", "devices": "gpu[0,1,2,3],cpu[0]", "key": "value" }, "filetype": "GGUF", "inferenceTools": ["Llama.cpp", "Text Generation Inference (TGI)", "h2oGPT Server", "KoboldCpp", "Custom"], "compression": ["8 Bit", "5 Bit (K_M)", "4 Bit (K_M)"], "compressionMethod": "llama.cpp - convert.py Script", "notes": "First, a FP16 GGUF file was generated, and then quantized it to 8, 4 (K_M) and 5 (K_M) Bit with llama.cpp/quantize" }, "customization": { "type": "finetune_full", "class": ["Instruct", "Chat"], "datasets": [ "[Prorietary German Conversation Dataset](https://placeholder.ocal/dataset)", "[German & German legal SQuAD](https://placeholder.local/dataset)" ], "notes": "The datasets were augmented with rows containing 'wrong' contexts, in order to improve factual RAG performance." }, "runInstructions": { "startModel": "#/bin/sh\nchmod +x run.sh && ./run.sh\n# This is an example. Functioning run.sh Script to be published soon", "stopModel": "# Coming soon, todo" } }