jchen8000 commited on
Commit
c6d3d79
·
verified ·
1 Parent(s): 23837f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -5
app.py CHANGED
@@ -9,11 +9,17 @@ from langchain_core.prompts import (
9
  from langchain_core.messages import SystemMessage
10
  from langchain.chains.conversation.memory import ConversationBufferWindowMemory
11
  from langchain_groq import ChatGroq
 
12
 
13
  # Initialize Groq Langchain chat object and conversation
14
  groq_chat = ChatGroq(
15
- groq_api_key=os.environ.get("GROQ_API_KEY"),
16
  )
 
 
 
 
 
17
 
18
  # Initialize memory to manages the chat history,
19
  # ensuring the AI remembers the specified number of history messages, in this case 8.
@@ -22,8 +28,14 @@ memory = ConversationBufferWindowMemory(k=8, memory_key="chat_history", return_m
22
 
23
  def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
24
  print( "Model =", model)
25
-
26
- groq_chat.model_name = model
 
 
 
 
 
 
27
  prompt = ChatPromptTemplate.from_messages(
28
  [
29
  # This is the persistent system prompt, sets the initial context for the AI.
@@ -36,7 +48,7 @@ def generate_response(user_input, history, model, temperature, max_tokens, top_p
36
  )
37
 
38
  # Create a conversation sequence using RunnableSequence
39
- conversation = prompt | groq_chat
40
 
41
  # Load chat_history
42
  chat_history = memory.load_memory_variables({})["chat_history"]
@@ -51,7 +63,7 @@ def generate_response(user_input, history, model, temperature, max_tokens, top_p
51
 
52
  # Define additional inputs and examples if needed
53
  additional_inputs = [
54
- gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it"], value="llama-3.1-70b-versatile", label="Model"),
55
  gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
56
  gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
57
  gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
 
9
  from langchain_core.messages import SystemMessage
10
  from langchain.chains.conversation.memory import ConversationBufferWindowMemory
11
  from langchain_groq import ChatGroq
12
+ from langchain_google_genai import ChatGoogleGenerativeAI
13
 
14
  # Initialize Groq Langchain chat object and conversation
15
  groq_chat = ChatGroq(
16
+ groq_api_key=os.environ.get("GROQ_API_KEY"),
17
  )
18
+ # Initialize Google Langchain chat object and conversation
19
+ google_chat = ChatGoogleGenerativeAI(
20
+ api_key=os.environ.get("GOOGLE_API_KEY"),
21
+ )
22
+
23
 
24
  # Initialize memory to manages the chat history,
25
  # ensuring the AI remembers the specified number of history messages, in this case 8.
 
28
 
29
  def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
30
  print( "Model =", model)
31
+
32
+ if model.startswith("gemini"):
33
+ chat = google_chat
34
+ chat.model = model
35
+ else:
36
+ chat = groq_chat
37
+ chat.model_name = model
38
+
39
  prompt = ChatPromptTemplate.from_messages(
40
  [
41
  # This is the persistent system prompt, sets the initial context for the AI.
 
48
  )
49
 
50
  # Create a conversation sequence using RunnableSequence
51
+ conversation = prompt | chat
52
 
53
  # Load chat_history
54
  chat_history = memory.load_memory_variables({})["chat_history"]
 
63
 
64
  # Define additional inputs and examples if needed
65
  additional_inputs = [
66
+ gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it","gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-2.0-flash-exp"], value="llama-3.1-70b-versatile", label="Model"),
67
  gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
68
  gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
69
  gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),