Spaces:
Sleeping
Sleeping
import os | |
import random | |
import gradio as gr | |
from langchain_core.prompts import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
MessagesPlaceholder, | |
) | |
from langchain_core.messages import SystemMessage | |
from langchain.chains.conversation.memory import ConversationBufferWindowMemory | |
from langchain_groq import ChatGroq | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
# Initialize Groq Langchain chat object and conversation | |
groq_chat = ChatGroq( | |
groq_api_key=os.environ.get("GROQ_API_KEY"), | |
) | |
# Initialize Google Langchain chat object and conversation | |
google_chat = ChatGoogleGenerativeAI( | |
api_key=os.environ.get("GOOGLE_API_KEY"), | |
) | |
# Initialize memory to manages the chat history, | |
# ensuring the AI remembers the specified number of history messages, in this case 8. | |
memory = ConversationBufferWindowMemory(k=8, memory_key="chat_history", return_messages=True) | |
def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed): | |
print( "Model =", model) | |
if model.startswith("gemini"): | |
chat = google_chat | |
chat.model = model | |
else: | |
chat = groq_chat | |
chat.model_name = model | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
# This is the persistent system prompt, sets the initial context for the AI. | |
SystemMessage(content='You are a helpful AI assistant.'), | |
# This placeholder will take care of chat history. | |
MessagesPlaceholder(variable_name="chat_history"), | |
# This template is where the user's current input will be injected into the prompt. | |
HumanMessagePromptTemplate.from_template("{human_input}"), | |
] | |
) | |
# Create a conversation sequence using RunnableSequence | |
conversation = prompt | chat | |
# Load chat_history | |
chat_history = memory.load_memory_variables({})["chat_history"] | |
# The chatbot's answer is generated by sending the full prompt to the LLM | |
response = conversation.invoke({"human_input": user_input, "chat_history": chat_history}) | |
# Update the memory with the new interaction | |
memory.save_context({"input": user_input}, {"output": response.content}) | |
return response.content | |
# Define additional inputs and examples if needed | |
additional_inputs = [ | |
gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it","gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-2.0-flash-exp"], value="llama-3.1-70b-versatile", label="Model"), | |
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."), | |
gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."), | |
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."), | |
gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random") | |
] | |
example1 = [ | |
["What's the distance from Tokyo to New York?"], | |
["What to San Francisco?"], | |
["Then what to Beijing?"], | |
["And what to Kyoto?"], | |
["What from Beijing to New York?"] | |
] | |
# Create the Gradio interface | |
interface = gr.ChatInterface( | |
fn=generate_response, | |
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"), | |
additional_inputs=additional_inputs, | |
examples=example1, | |
cache_examples=False, | |
) | |
# Launch the app | |
interface.launch() | |