Spaces:

jchen8000
/

LangChain_Demo

Sleeping

App Files Files Community

LangChain_Demo / app.py

jchen8000

Update app.py

c6d3d79 verified about 1 month ago

raw

history blame

3.98 kB

	import os
	import random
	import gradio as gr
	from langchain_core.prompts import (
	ChatPromptTemplate,
	HumanMessagePromptTemplate,
	MessagesPlaceholder,
	)
	from langchain_core.messages import SystemMessage
	from langchain.chains.conversation.memory import ConversationBufferWindowMemory
	from langchain_groq import ChatGroq
	from langchain_google_genai import ChatGoogleGenerativeAI

	# Initialize Groq Langchain chat object and conversation
	groq_chat = ChatGroq(
	groq_api_key=os.environ.get("GROQ_API_KEY"),
	)
	# Initialize Google Langchain chat object and conversation
	google_chat = ChatGoogleGenerativeAI(
	api_key=os.environ.get("GOOGLE_API_KEY"),
	)


	# Initialize memory to manages the chat history,
	# ensuring the AI remembers the specified number of history messages, in this case 8.
	memory = ConversationBufferWindowMemory(k=8, memory_key="chat_history", return_messages=True)


	def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
	print( "Model =", model)

	if model.startswith("gemini"):
	chat = google_chat
	chat.model = model
	else:
	chat = groq_chat
	chat.model_name = model

	prompt = ChatPromptTemplate.from_messages(
	[
	# This is the persistent system prompt, sets the initial context for the AI.
	SystemMessage(content='You are a helpful AI assistant.'),
	# This placeholder will take care of chat history.
	MessagesPlaceholder(variable_name="chat_history"),
	# This template is where the user's current input will be injected into the prompt.
	HumanMessagePromptTemplate.from_template("{human_input}"),
	]
	)

	# Create a conversation sequence using RunnableSequence
	conversation = prompt \| chat

	# Load chat_history
	chat_history = memory.load_memory_variables({})["chat_history"]

	# The chatbot's answer is generated by sending the full prompt to the LLM
	response = conversation.invoke({"human_input": user_input, "chat_history": chat_history})

	# Update the memory with the new interaction
	memory.save_context({"input": user_input}, {"output": response.content})

	return response.content

	# Define additional inputs and examples if needed
	additional_inputs = [
	gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it","gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-2.0-flash-exp"], value="llama-3.1-70b-versatile", label="Model"),
	gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
	gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
	gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
	gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
	]

	example1 = [
	["What's the distance from Tokyo to New York?"],
	["What to San Francisco?"],
	["Then what to Beijing?"],
	["And what to Kyoto?"],
	["What from Beijing to New York?"]
	]

	# Create the Gradio interface
	interface = gr.ChatInterface(
	fn=generate_response,
	chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
	additional_inputs=additional_inputs,
	examples=example1,
	cache_examples=False,
	)

	# Launch the app
	interface.launch()