chat with groq
Browse files- .gitignore +11 -0
- app.py +236 -64
- epub2txt-all.py +1 -1
- requirements.txt +6 -0
.gitignore
CHANGED
@@ -23,3 +23,14 @@ txt/*.txt
|
|
23 |
# exclude json files
|
24 |
#freud_index/*.json
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
# exclude json files
|
24 |
#freud_index/*.json
|
25 |
|
26 |
+
# exclude .env file
|
27 |
+
.env
|
28 |
+
|
29 |
+
# exlude .venv folder
|
30 |
+
.venv/
|
31 |
+
|
32 |
+
# .streamlit folder
|
33 |
+
.streamlit/
|
34 |
+
|
35 |
+
|
36 |
+
|
app.py
CHANGED
@@ -4,6 +4,12 @@ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
4 |
from llama_index.core import Settings
|
5 |
from llama_index.retrievers.bm25 import BM25Retriever
|
6 |
from llama_index.core.retrievers import QueryFusionRetriever
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Page config
|
9 |
st.set_page_config(
|
@@ -12,20 +18,52 @@ st.set_page_config(
|
|
12 |
layout="wide"
|
13 |
)
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
@st.cache_resource
|
24 |
def load_indices():
|
25 |
"""Load the index and create retrievers"""
|
26 |
# Load embeddings
|
27 |
embed_model = HuggingFaceEmbedding(model_name="multi-qa-MiniLM-L6-cos-v1")
|
28 |
-
Settings.llm = None
|
29 |
Settings.embed_model = embed_model
|
30 |
|
31 |
# Load index
|
@@ -42,7 +80,7 @@ def load_indices():
|
|
42 |
hybrid_retriever = QueryFusionRetriever(
|
43 |
[vector_retriever, bm25_retriever],
|
44 |
similarity_top_k=10,
|
45 |
-
num_queries=1,
|
46 |
mode="reciprocal_rerank",
|
47 |
use_async=True,
|
48 |
verbose=True,
|
@@ -53,64 +91,198 @@ def load_indices():
|
|
53 |
# Load indices
|
54 |
index, vector_retriever, bm25_retriever, hybrid_retriever = load_indices()
|
55 |
|
56 |
-
#
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
# Search type selector
|
68 |
-
search_type = st.radio(
|
69 |
-
"Select search method:",
|
70 |
-
["Hybrid", "Vector", "BM25"],
|
71 |
-
horizontal=True,
|
72 |
-
help="""
|
73 |
-
- **BM25**: Keyword-based search that works best for exact matches and specific terms. Similar to traditional search engines.
|
74 |
-
- **Vector**: Semantic search that understands the meaning of your query, even if it uses different words than the source text.
|
75 |
-
- **Hybrid**: Combines both approaches for better overall results, balancing exact matches with semantic understanding.
|
76 |
-
"""
|
77 |
-
)
|
78 |
|
79 |
-
|
80 |
-
with st.spinner('Searching...'):
|
81 |
-
if search_type == "Hybrid":
|
82 |
-
nodes = hybrid_retriever.retrieve(search_query)
|
83 |
-
elif search_type == "Vector":
|
84 |
-
nodes = vector_retriever.retrieve(search_query)
|
85 |
-
else: # BM25
|
86 |
-
nodes = bm25_retriever.retrieve(search_query)
|
87 |
-
|
88 |
-
# Display results
|
89 |
-
st.subheader(f"Search Results")
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
-
#
|
96 |
-
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
st.markdown(
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
|
|
4 |
from llama_index.core import Settings
|
5 |
from llama_index.retrievers.bm25 import BM25Retriever
|
6 |
from llama_index.core.retrievers import QueryFusionRetriever
|
7 |
+
from litellm import completion
|
8 |
+
import os
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from llama_index.core.settings import Settings
|
11 |
+
from llama_index.core.llms import ChatMessage, MessageRole
|
12 |
+
from llama_index.llms.groq import Groq
|
13 |
|
14 |
# Page config
|
15 |
st.set_page_config(
|
|
|
18 |
layout="wide"
|
19 |
)
|
20 |
|
21 |
+
# Load environment variables
|
22 |
+
load_dotenv()
|
23 |
+
|
24 |
+
# Initialize session state
|
25 |
+
if "messages" not in st.session_state:
|
26 |
+
st.session_state.messages = []
|
27 |
+
if "sources" not in st.session_state:
|
28 |
+
st.session_state.sources = {}
|
29 |
+
if "system_prompt" not in st.session_state:
|
30 |
+
st.session_state.system_prompt = """You are Sigmund Freud, speaking from your historical context and perspective. As the founder of psychoanalysis, you should:
|
31 |
+
|
32 |
+
1. Only engage with topics related to:
|
33 |
+
- Psychoanalysis and its theories
|
34 |
+
- Dreams and their interpretation
|
35 |
+
- The unconscious mind
|
36 |
+
- Human sexuality and development
|
37 |
+
- Your published works and case studies
|
38 |
+
- Your historical context and contemporaries
|
39 |
+
|
40 |
+
2. Politely decline to answer:
|
41 |
+
- Questions about events after your death in 1939
|
42 |
+
- Medical advice or diagnosis
|
43 |
+
- Topics outside your expertise or historical context
|
44 |
+
- Personal matters unrelated to psychoanalysis
|
45 |
+
|
46 |
+
3. Maintain your characteristic style:
|
47 |
+
- Speak with authority on psychoanalytic matters
|
48 |
+
- Use psychoanalytic terminology when appropriate
|
49 |
+
- Reference your own works and theories
|
50 |
+
- Interpret questions through a psychoanalytic lens
|
51 |
+
|
52 |
+
If a question is inappropriate or outside your scope, explain why you cannot answer it from your perspective as Freud."""
|
53 |
+
|
54 |
+
# Configure LlamaIndex settings
|
55 |
+
Settings.llm = Groq(
|
56 |
+
model="llama3-8b-8192",
|
57 |
+
api_key=os.getenv("GROQ_API_KEY"),
|
58 |
+
max_tokens=6000,
|
59 |
+
context_window=6000
|
60 |
+
)
|
61 |
|
62 |
@st.cache_resource
|
63 |
def load_indices():
|
64 |
"""Load the index and create retrievers"""
|
65 |
# Load embeddings
|
66 |
embed_model = HuggingFaceEmbedding(model_name="multi-qa-MiniLM-L6-cos-v1")
|
|
|
67 |
Settings.embed_model = embed_model
|
68 |
|
69 |
# Load index
|
|
|
80 |
hybrid_retriever = QueryFusionRetriever(
|
81 |
[vector_retriever, bm25_retriever],
|
82 |
similarity_top_k=10,
|
83 |
+
num_queries=1,
|
84 |
mode="reciprocal_rerank",
|
85 |
use_async=True,
|
86 |
verbose=True,
|
|
|
91 |
# Load indices
|
92 |
index, vector_retriever, bm25_retriever, hybrid_retriever = load_indices()
|
93 |
|
94 |
+
# Function to process chat with RAG
|
95 |
+
def chat_with_rag(message, history, retriever):
|
96 |
+
# Get context from the index if RAG is enabled
|
97 |
+
if st.session_state.get('use_rag', True):
|
98 |
+
nodes = retriever.retrieve(message)
|
99 |
+
# sort nodes by score
|
100 |
+
nodes = sorted(nodes, key=lambda x: x.score, reverse=True)
|
101 |
+
# nodes up to slider value
|
102 |
+
nodes = nodes[:st.session_state.get('num_chunks', 1)]
|
103 |
+
context = "\n\n".join([node.text for node in nodes])
|
104 |
+
system_prompt = f"""{st.session_state.system_prompt}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
Use the following passages from my writings to inform your response:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
+
{context}
|
109 |
+
"""
|
110 |
+
|
111 |
+
# Store sources in session state for this message
|
112 |
+
# Calculate the correct message index (total number of messages)
|
113 |
+
message_index = len(st.session_state.messages)
|
114 |
+
st.session_state.sources[message_index] = nodes
|
115 |
+
else:
|
116 |
+
system_prompt = st.session_state.system_prompt
|
117 |
+
nodes = []
|
118 |
+
|
119 |
+
# Prepare messages for the API call
|
120 |
+
messages = [ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)]
|
121 |
+
for h in history:
|
122 |
+
role = MessageRole.ASSISTANT if h["role"] == "assistant" else MessageRole.USER
|
123 |
+
messages.append(ChatMessage(role=role, content=h["content"]))
|
124 |
+
messages.append(ChatMessage(role=MessageRole.USER, content=message))
|
125 |
+
|
126 |
+
# Call Groq via LiteLLM (replace with LlamaIndex's Groq)
|
127 |
+
response = Settings.llm.chat(messages)
|
128 |
+
assistant_response = response.message.content
|
129 |
+
|
130 |
+
return assistant_response
|
131 |
+
|
132 |
+
# Move the title to the top, before tabs
|
133 |
+
st.title("Freud Explorer")
|
134 |
+
|
135 |
+
# Add tab selection
|
136 |
+
tab2, tab1 = st.tabs(["Chat", "Search"])
|
137 |
+
|
138 |
+
with tab1:
|
139 |
+
st.title("Freud Works Hybrid Search")
|
140 |
+
st.markdown("""
|
141 |
+
This demo allows you to search through Freud's complete works using a hybrid approach combining:
|
142 |
+
- BM25 (keyword-based search)
|
143 |
+
- Vector search (semantic similarity)
|
144 |
+
""")
|
145 |
+
|
146 |
+
# Search interface
|
147 |
+
search_query = st.text_input("Enter your search query:", placeholder="e.g. Oedipus complex")
|
148 |
+
|
149 |
+
# Add top_k selector
|
150 |
+
top_k = st.slider("Number of results to return:", min_value=1, max_value=20, value=10)
|
151 |
+
|
152 |
+
# Update retrievers with new top_k
|
153 |
+
vector_retriever.similarity_top_k = top_k
|
154 |
+
bm25_retriever.similarity_top_k = top_k
|
155 |
+
hybrid_retriever.similarity_top_k = top_k
|
156 |
+
|
157 |
+
# Search type selector
|
158 |
+
search_type = st.radio(
|
159 |
+
"Select search method:",
|
160 |
+
["Hybrid", "Vector", "BM25"],
|
161 |
+
horizontal=True,
|
162 |
+
help="""
|
163 |
+
- **BM25**: Keyword-based search that works best for exact matches and specific terms.
|
164 |
+
- **Vector**: Semantic search that understands the meaning of your query.
|
165 |
+
- **Hybrid**: Combines both approaches for better overall results.
|
166 |
+
"""
|
167 |
+
)
|
168 |
+
|
169 |
+
if search_query:
|
170 |
+
with st.spinner('Searching...'):
|
171 |
+
if search_type == "Hybrid":
|
172 |
+
nodes = hybrid_retriever.retrieve(search_query)
|
173 |
+
elif search_type == "Vector":
|
174 |
+
nodes = vector_retriever.retrieve(search_query)
|
175 |
+
else: # BM25
|
176 |
+
nodes = bm25_retriever.retrieve(search_query)
|
177 |
|
178 |
+
# Display results
|
179 |
+
st.subheader(f"Search Results")
|
180 |
|
181 |
+
for i, node in enumerate(nodes, 1):
|
182 |
+
preview = node.text[:200] + "..." if len(node.text) > 200 else node.text
|
183 |
+
score = f"{node.score:.3f}" if hasattr(node, 'score') else "N/A"
|
184 |
+
|
185 |
+
with st.expander(f"Result {i} (score: {score})\n\n{preview}", expanded=False):
|
186 |
+
st.markdown(node.text)
|
187 |
+
if node.metadata:
|
188 |
+
st.markdown("---")
|
189 |
+
st.markdown("**Source:**")
|
190 |
+
st.json(node.metadata)
|
191 |
+
|
192 |
+
# Add sidebar with information
|
193 |
+
with st.sidebar:
|
194 |
+
st.header("About")
|
195 |
+
st.markdown("""
|
196 |
+
This demo searches through Freud's complete works using:
|
197 |
+
|
198 |
+
- **BM25**: Traditional keyword-based search
|
199 |
+
- **Vector Search**: Semantic similarity using embeddings
|
200 |
+
- **Hybrid**: Combines both approaches
|
201 |
+
""")
|
202 |
+
|
203 |
+
with tab2:
|
204 |
+
st.header("Chat with Freud's Works")
|
205 |
|
206 |
+
# Create containers in the right order
|
207 |
+
chat_container = st.container()
|
208 |
+
input_container = st.container()
|
209 |
+
options_container = st.container()
|
210 |
+
|
211 |
+
# System prompt editor in an expander with help text above
|
212 |
+
with options_container:
|
213 |
+
st.info("💡 The system prompt defines the AI's persona and behavior. It's like giving stage directions to an actor.")
|
214 |
+
with st.expander("System Prompt"):
|
215 |
+
st.text_area(
|
216 |
+
"Edit System Prompt",
|
217 |
+
value=st.session_state.system_prompt,
|
218 |
+
height=100,
|
219 |
+
help="This prompt sets the AI's personality and behavior. When RAG is enabled, relevant passages will be automatically added after this prompt.",
|
220 |
+
key="system_prompt",
|
221 |
+
on_change=lambda: setattr(st.session_state, 'system_prompt', st.session_state.system_prompt)
|
222 |
+
)
|
223 |
+
|
224 |
+
# Put the RAG toggle, chunks slider, and clear button in a horizontal layout
|
225 |
+
col1, col2, col3 = st.columns([2, 2, 1])
|
226 |
+
with col1:
|
227 |
+
st.session_state.use_rag = st.toggle(
|
228 |
+
"Enable RAG (Retrieval Augmented Generation)",
|
229 |
+
value=st.session_state.get('use_rag', True),
|
230 |
+
key='rag_toggle'
|
231 |
+
)
|
232 |
+
with col2:
|
233 |
+
if st.session_state.use_rag:
|
234 |
+
num_chunks = st.slider(
|
235 |
+
"Number of chunks to retrieve",
|
236 |
+
min_value=1,
|
237 |
+
max_value=3,
|
238 |
+
value=st.session_state.get('num_chunks', 1),
|
239 |
+
key='num_chunks_slider'
|
240 |
+
)
|
241 |
+
with col3:
|
242 |
+
if st.button("Clear Chat", use_container_width=True):
|
243 |
+
st.session_state.messages = []
|
244 |
+
st.session_state.sources = {}
|
245 |
+
st.rerun()
|
246 |
+
|
247 |
+
# Display chat messages in the chat container
|
248 |
+
with chat_container:
|
249 |
+
for i, message in enumerate(st.session_state.messages):
|
250 |
+
with st.chat_message(message["role"]):
|
251 |
+
st.markdown(message["content"])
|
252 |
+
if (message["role"] == "assistant" and
|
253 |
+
i in st.session_state.sources and
|
254 |
+
st.session_state.sources[i]):
|
255 |
+
with st.expander("View Sources"):
|
256 |
+
nodes = st.session_state.sources[i]
|
257 |
+
for j, node in enumerate(nodes, 1):
|
258 |
+
st.markdown(f"**Source {j}:**")
|
259 |
+
st.markdown(node.text)
|
260 |
+
if node.metadata:
|
261 |
+
st.markdown("---")
|
262 |
+
st.markdown("**Metadata:**")
|
263 |
+
st.json(node.metadata)
|
264 |
+
|
265 |
+
# Chat input at the bottom
|
266 |
+
with input_container:
|
267 |
+
if prompt := st.chat_input("What would you like to know about Freud's works?", key="chat_input"):
|
268 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
269 |
+
with chat_container:
|
270 |
+
with st.chat_message("user"):
|
271 |
+
st.markdown(prompt)
|
272 |
+
|
273 |
+
with chat_container:
|
274 |
+
with st.chat_message("assistant"):
|
275 |
+
with st.spinner("Thinking..."):
|
276 |
+
response = chat_with_rag(
|
277 |
+
prompt,
|
278 |
+
st.session_state.messages[:-1],
|
279 |
+
hybrid_retriever if st.session_state.use_rag else None
|
280 |
+
)
|
281 |
+
st.markdown(response)
|
282 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
283 |
+
|
284 |
+
st.rerun()
|
285 |
+
|
286 |
+
if __name__ == "__main__":
|
287 |
+
pass # Remove the duplicate title
|
288 |
|
epub2txt-all.py
CHANGED
@@ -596,7 +596,7 @@ def main():
|
|
596 |
sys.stderr.write('Error converting {!r}:\n'.format(filename))
|
597 |
raise
|
598 |
if len(txt.strip()) > 0:
|
599 |
-
if out is None:
|
600 |
out = open(args.outfile, "w", encoding="utf-8") if args.outfile != '-' else sys.stdout
|
601 |
out.write(txt)
|
602 |
out.flush()
|
|
|
596 |
sys.stderr.write('Error converting {!r}:\n'.format(filename))
|
597 |
raise
|
598 |
if len(txt.strip()) > 0:
|
599 |
+
if out is None:
|
600 |
out = open(args.outfile, "w", encoding="utf-8") if args.outfile != '-' else sys.stdout
|
601 |
out.write(txt)
|
602 |
out.flush()
|
requirements.txt
CHANGED
@@ -1,5 +1,11 @@
|
|
1 |
streamlit
|
|
|
|
|
|
|
|
|
|
|
2 |
llama-index
|
3 |
sentence-transformers
|
4 |
llama-index-retrievers-bm25
|
5 |
llama-index-embeddings-huggingface
|
|
|
|
1 |
streamlit
|
2 |
+
python-dotenv
|
3 |
+
litellm
|
4 |
+
langchain
|
5 |
+
chromadb
|
6 |
+
tiktoken
|
7 |
llama-index
|
8 |
sentence-transformers
|
9 |
llama-index-retrievers-bm25
|
10 |
llama-index-embeddings-huggingface
|
11 |
+
llama-index-llms-groq
|