devve1 commited on
Commit
215effb
1 Parent(s): e131573

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -35
app.py CHANGED
@@ -246,7 +246,8 @@ async def main(query: str, chunks: list[str], llm, dense_model, sparse_model):
246
  output = reduce_chain.invoke([summaries])
247
  return output['output_text']
248
 
249
- async def load_models_and_components():
 
250
  model_path = hf_hub_download(
251
  repo_id='NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF',
252
  filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
@@ -276,6 +277,11 @@ async def load_models_and_components():
276
  providers=provider,
277
  batch_size=32
278
  )
 
 
 
 
 
279
  docs = WikipediaLoader(query='Action-RPG').load()
280
 
281
  text_splitter = SemanticChunker(
@@ -284,45 +290,36 @@ async def load_models_and_components():
284
  )
285
 
286
  documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
287
-
288
- return llm, documents, dense_model, sparse_model
289
-
290
- async def initialize():
291
- with st.spinner(text='Please Wait...'):
292
- st.session_state.llm, st.session_state.chunks, st.session_state.dense, st.session_state.sparse = await load_models_and_components()
293
- st.session_state.loaded = True
294
 
295
  if __name__ == '__main__':
296
  st.set_page_config(
297
  page_title="Video Game Assistant",
298
  layout="wide"
299
  )
 
 
 
300
 
301
- if "loaded" not in st.session_state:
302
- st.session_state.loaded = False
303
-
304
- if st.session_state.loaded:
305
- if "messages" not in st.session_state:
306
- st.session_state.messages = []
307
-
308
- for message in st.session_state.messages:
309
- with st.chat_message(message["role"]):
310
- st.markdown(message["content"])
311
-
312
- if prompt := st.chat_input("Message Video Game Assistant"):
313
- st.chat_message("user").markdown(prompt)
314
- st.session_state.messages.append({"role": "user", "content": prompt})
315
-
316
- ai_response = asyncio.run(main(prompt, st.session_state.chunks, st.session_state.llm, st.session_state.dense, st.session_state))
317
- response = f"Echo: {ai_response}"
318
- with st.chat_message("assistant"):
319
- message_placeholder = st.empty()
320
- full_response = ""
321
- for chunk in re.split(r'(\s+)', response):
322
- full_response += chunk + " "
323
- time.sleep(0.01)
324
- message_placeholder.markdown(full_response + "▌")
325
- st.session_state.messages.append({"role": "assistant", "content": full_response})
326
- else:
327
- asyncio.run(initialize())
328
 
 
246
  output = reduce_chain.invoke([summaries])
247
  return output['output_text']
248
 
249
+ @st.cache_resource
250
+ def load_models_and_components():
251
  model_path = hf_hub_download(
252
  repo_id='NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF',
253
  filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
 
277
  providers=provider,
278
  batch_size=32
279
  )
280
+
281
+ return llm, dense_model, sparse_model
282
+
283
+ @st.cache_data
284
+ def chunk_documents(dense_model)
285
  docs = WikipediaLoader(query='Action-RPG').load()
286
 
287
  text_splitter = SemanticChunker(
 
290
  )
291
 
292
  documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
293
+ return documents
 
 
 
 
 
 
294
 
295
  if __name__ == '__main__':
296
  st.set_page_config(
297
  page_title="Video Game Assistant",
298
  layout="wide"
299
  )
300
+ with st.spinner(text='Please Wait...'):
301
+ llm, dense, sparse = load_models_and_components()
302
+ chunks = chunk_documents(dense)
303
 
304
+ if "messages" not in st.session_state:
305
+ st.session_state.messages = []
306
+
307
+ for message in st.session_state.messages:
308
+ with st.chat_message(message["role"]):
309
+ st.markdown(message["content"])
310
+
311
+ if prompt := st.chat_input("Message Video Game Assistant"):
312
+ st.chat_message("user").markdown(prompt)
313
+ st.session_state.messages.append({"role": "user", "content": prompt})
314
+
315
+ ai_response = asyncio.run(main(prompt, chunks, llm, dense, sparse))
316
+ response = f"Echo: {ai_response}"
317
+ with st.chat_message("assistant"):
318
+ message_placeholder = st.empty()
319
+ full_response = ""
320
+ for chunk in re.split(r'(\s+)', response):
321
+ full_response += chunk + " "
322
+ time.sleep(0.01)
323
+ message_placeholder.markdown(full_response + "")
324
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
 
 
 
 
 
 
325