import langchain_community,langchain from langchain_community.llms import LlamaCpp from langchain.schema import AIMessage, HumanMessage, SystemMessage import gradio as gr # Define the model path model_path = "./model/Model-1.2B-Q8_0.gguf" system_message = "You are a helpful assistant who acts like a pirate." llm = LlamaCpp( model_path=model_path, temperature=0.8, max_tokens=250, top_p=0.6, verbose=True ) def stream_response(message, history): print(f"Input: {message}. History: {history}\n") history_langchain_format = [] history_langchain_format.append(SystemMessage(content=system_message)) for human, ai in history: history_langchain_format.append(HumanMessage(content=human)) history_langchain_format.append(AIMessage(content=ai)) if message is not None: history_langchain_format.append(HumanMessage(content=message)) partial_message = "" for response in llm.stream(history_langchain_format): partial_message += response yield partial_message demo_interface = gr.ChatInterface( stream_response, textbox=gr.Textbox(placeholder="Send to the LLM...", container=False, autoscroll=True, scale=7), ) demo_interface.launch(share=False, debug=True)