|
import langchain_community,langchain |
|
from langchain_community.llms import LlamaCpp |
|
from langchain.schema import AIMessage, HumanMessage, SystemMessage |
|
import gradio as gr |
|
|
|
|
|
model_path = "./model/Model-1.2B-Q8_0.gguf" |
|
system_message = "You are a helpful assistant who acts like a pirate." |
|
llm = LlamaCpp( |
|
model_path=model_path, |
|
temperature=0.8, |
|
max_tokens=250, |
|
top_p=0.6, |
|
verbose=True |
|
) |
|
|
|
def stream_response(message, history): |
|
print(f"Input: {message}. History: {history}\n") |
|
|
|
history_langchain_format = [] |
|
history_langchain_format.append(SystemMessage(content=system_message)) |
|
|
|
for human, ai in history: |
|
history_langchain_format.append(HumanMessage(content=human)) |
|
history_langchain_format.append(AIMessage(content=ai)) |
|
|
|
if message is not None: |
|
history_langchain_format.append(HumanMessage(content=message)) |
|
partial_message = "" |
|
for response in llm.stream(history_langchain_format): |
|
partial_message += response |
|
yield partial_message |
|
|
|
|
|
demo_interface = gr.ChatInterface( |
|
|
|
stream_response, |
|
textbox=gr.Textbox(placeholder="Send to the LLM...", |
|
container=False, |
|
autoscroll=True, |
|
scale=7), |
|
) |
|
|
|
demo_interface.launch(share=False, debug=True) |