chat-hf / app.py
futranbg's picture
Update app.py
e255213
raw
history blame contribute delete
739 Bytes
import os
import gradio as gr
from ctransformers import AutoModelForCausalLM
model_repo = os.getenv('HF_MODEL_REPO')
model_bin = os.getenv('HF_MODEL_BIN')
llm = AutoModelForCausalLM.from_pretrained(
model_repo,
model_file=model_bin,
threads=2,
seed=42,
context_length=16384,
lib="avx2",
)
def response(prompt):
txt = llm(prompt, max_new_tokens=8192, temperature=0.8, top_p=0.5, repetition_penalty=1.1, reset=False, stop=["</s>","<|im_end|>"], )
return txt
if __name__ == '__main__':
title = "Chat"
demo_status = "Demo is running on CPU"
gr.Interface(response, inputs="text", outputs="text",
title=title,
).launch()