Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,11 +9,17 @@ from langchain_core.prompts import (
|
|
9 |
from langchain_core.messages import SystemMessage
|
10 |
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
|
11 |
from langchain_groq import ChatGroq
|
|
|
12 |
|
13 |
# Initialize Groq Langchain chat object and conversation
|
14 |
groq_chat = ChatGroq(
|
15 |
-
|
16 |
)
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# Initialize memory to manages the chat history,
|
19 |
# ensuring the AI remembers the specified number of history messages, in this case 8.
|
@@ -22,8 +28,14 @@ memory = ConversationBufferWindowMemory(k=8, memory_key="chat_history", return_m
|
|
22 |
|
23 |
def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
|
24 |
print( "Model =", model)
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
prompt = ChatPromptTemplate.from_messages(
|
28 |
[
|
29 |
# This is the persistent system prompt, sets the initial context for the AI.
|
@@ -36,7 +48,7 @@ def generate_response(user_input, history, model, temperature, max_tokens, top_p
|
|
36 |
)
|
37 |
|
38 |
# Create a conversation sequence using RunnableSequence
|
39 |
-
conversation = prompt |
|
40 |
|
41 |
# Load chat_history
|
42 |
chat_history = memory.load_memory_variables({})["chat_history"]
|
@@ -51,7 +63,7 @@ def generate_response(user_input, history, model, temperature, max_tokens, top_p
|
|
51 |
|
52 |
# Define additional inputs and examples if needed
|
53 |
additional_inputs = [
|
54 |
-
gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it"], value="llama-3.1-70b-versatile", label="Model"),
|
55 |
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
|
56 |
gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
|
57 |
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
|
|
|
9 |
from langchain_core.messages import SystemMessage
|
10 |
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
|
11 |
from langchain_groq import ChatGroq
|
12 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
13 |
|
14 |
# Initialize Groq Langchain chat object and conversation
|
15 |
groq_chat = ChatGroq(
|
16 |
+
groq_api_key=os.environ.get("GROQ_API_KEY"),
|
17 |
)
|
18 |
+
# Initialize Google Langchain chat object and conversation
|
19 |
+
google_chat = ChatGoogleGenerativeAI(
|
20 |
+
api_key=os.environ.get("GOOGLE_API_KEY"),
|
21 |
+
)
|
22 |
+
|
23 |
|
24 |
# Initialize memory to manages the chat history,
|
25 |
# ensuring the AI remembers the specified number of history messages, in this case 8.
|
|
|
28 |
|
29 |
def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
|
30 |
print( "Model =", model)
|
31 |
+
|
32 |
+
if model.startswith("gemini"):
|
33 |
+
chat = google_chat
|
34 |
+
chat.model = model
|
35 |
+
else:
|
36 |
+
chat = groq_chat
|
37 |
+
chat.model_name = model
|
38 |
+
|
39 |
prompt = ChatPromptTemplate.from_messages(
|
40 |
[
|
41 |
# This is the persistent system prompt, sets the initial context for the AI.
|
|
|
48 |
)
|
49 |
|
50 |
# Create a conversation sequence using RunnableSequence
|
51 |
+
conversation = prompt | chat
|
52 |
|
53 |
# Load chat_history
|
54 |
chat_history = memory.load_memory_variables({})["chat_history"]
|
|
|
63 |
|
64 |
# Define additional inputs and examples if needed
|
65 |
additional_inputs = [
|
66 |
+
gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it","gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-2.0-flash-exp"], value="llama-3.1-70b-versatile", label="Model"),
|
67 |
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
|
68 |
gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
|
69 |
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
|