lorrain_airag_assist

Sleeping

App Files Files Community

IAMTFRMZA commited on Oct 29, 2024

Commit

5686026

verified ·

1 Parent(s): 3432a8c

app.py

Browse files

Files changed (1) hide show

app.py +20 -25

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import shutil
 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
-import pandas as pd
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
@@ -15,16 +14,14 @@ from langchain_community.document_loaders import UnstructuredExcelLoader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.embeddings import HuggingFaceEmbeddings
-# Set API key environment variable
 os.environ["TOGETHER_API_KEY"] = os.getenv("TOGETHER_API_KEY")
 def inference(chain, input_query):
     """Invoke the processing chain with the input query."""
     result = chain.invoke(input_query)
     return result
 def create_chain(retriever, prompt, model):
     """Compose the processing chain with the specified components."""
     chain = (
@@ -35,7 +32,6 @@ def create_chain(retriever, prompt, model):
     )
     return chain
 def generate_prompt():
     """Define the prompt template for question answering."""
     template = """<s>[INST] Answer the question in a simple sentence based only on the following context:
@@ -44,7 +40,6 @@ def generate_prompt():
                """
     return ChatPromptTemplate.from_template(template)
 def configure_model():
     """Configure the language model with specified parameters."""
     return Together(
@@ -56,14 +51,12 @@ def configure_model():
         repetition_penalty=1.1,
     )
 def configure_retriever(documents):
     """Configure the retriever with embeddings and a FAISS vector store."""
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_db = FAISS.from_documents(documents, embeddings)
     return vector_db.as_retriever()
 def load_pdf_documents(path):
     """Load and preprocess PDF documents from the specified path."""
     documents = []
@@ -74,7 +67,6 @@ def load_pdf_documents(path):
             documents.extend(loader.load())
     return documents
 def load_word_documents(path):
     """Load and preprocess Word documents from the specified path."""
     documents = []
@@ -85,7 +77,6 @@ def load_word_documents(path):
             documents.extend(loader.load())
     return documents
 def load_excel_documents(path):
     """Load and preprocess Excel documents from the specified path."""
     documents = []
@@ -96,7 +87,6 @@ def load_excel_documents(path):
             documents.extend(loader.load())
     return documents
 def load_documents(path):
     """Load and preprocess documents from PDF, Word, and Excel files."""
     pdf_docs = load_pdf_documents(path)
@@ -104,7 +94,6 @@ def load_documents(path):
     excel_docs = load_excel_documents(path)
     return pdf_docs + word_docs + excel_docs
 def scrape_url(url):
     """Scrape content from a given URL and save it to a text file."""
     try:
@@ -112,10 +101,6 @@ def scrape_url(url):
         response.raise_for_status()  # Ensure we notice bad responses
         soup = BeautifulSoup(response.content, 'html.parser')
         text = soup.get_text()
-        # Ensure the data directory exists
-        os.makedirs("data", exist_ok=True)
         # Save the text content to a file for processing
         text_file_path = "data/scraped_content.txt"
         with open(text_file_path, "w") as file:
@@ -124,16 +109,22 @@ def scrape_url(url):
     except requests.RequestException as e:
         st.error(f"Error fetching the URL: {e}")
         return None
-    except Exception as e:
-        st.error(f"An unexpected error occurred: {e}")
-        return None
 def process_document(path, input_query):
     """Process the document by setting up the chain and invoking it with the input query."""
     documents = load_documents(path)
     text_splitter = CharacterTextSplitter(chunk_size=18000, chunk_overlap=10)
     split_docs = text_splitter.split_documents(documents)
     llm_model = configure_model()
     prompt = generate_prompt()
     retriever = configure_retriever(split_docs)
@@ -141,7 +132,6 @@ def process_document(path, input_query):
     response = inference(chain, input_query)
     return response
 def main():
     """Main function to run the Streamlit app."""
     tmp_folder = '/tmp/1'
@@ -164,7 +154,8 @@ def main():
             user_query = st.text_input("Ask a question:", key="query_input")
             if st.form_submit_button("Ask") and user_query:
                 response = process_document(tmp_folder, user_query)
-                st.session_state.chat_history.append({"question": user_query, "answer": response})
         if st.button("Clear Chat History"):
             st.session_state.chat_history = []
@@ -182,9 +173,13 @@ def main():
             file_path = scrape_url(url_input)
             if file_path:
                 documents = load_documents(tmp_folder)
-                response = process_document(tmp_folder, "What is the content of the URL?")
-                st.session_state.chat_history.append({"question": "What is the content of the URL?", "answer": response})
-                st.success("URL content processed successfully!")
             else:
                 st.error("Failed to process URL content.")
         else:

 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.embeddings import HuggingFaceEmbeddings
+# Set API key
 os.environ["TOGETHER_API_KEY"] = os.getenv("TOGETHER_API_KEY")
 def inference(chain, input_query):
     """Invoke the processing chain with the input query."""
     result = chain.invoke(input_query)
     return result
 def create_chain(retriever, prompt, model):
     """Compose the processing chain with the specified components."""
     chain = (
     )
     return chain
 def generate_prompt():
     """Define the prompt template for question answering."""
     template = """<s>[INST] Answer the question in a simple sentence based only on the following context:
                """
     return ChatPromptTemplate.from_template(template)
 def configure_model():
     """Configure the language model with specified parameters."""
     return Together(
         repetition_penalty=1.1,
     )
 def configure_retriever(documents):
     """Configure the retriever with embeddings and a FAISS vector store."""
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_db = FAISS.from_documents(documents, embeddings)
     return vector_db.as_retriever()
 def load_pdf_documents(path):
     """Load and preprocess PDF documents from the specified path."""
     documents = []
             documents.extend(loader.load())
     return documents
 def load_word_documents(path):
     """Load and preprocess Word documents from the specified path."""
     documents = []
             documents.extend(loader.load())
     return documents
 def load_excel_documents(path):
     """Load and preprocess Excel documents from the specified path."""
     documents = []
             documents.extend(loader.load())
     return documents
 def load_documents(path):
     """Load and preprocess documents from PDF, Word, and Excel files."""
     pdf_docs = load_pdf_documents(path)
     excel_docs = load_excel_documents(path)
     return pdf_docs + word_docs + excel_docs
 def scrape_url(url):
     """Scrape content from a given URL and save it to a text file."""
     try:
         response.raise_for_status()  # Ensure we notice bad responses
         soup = BeautifulSoup(response.content, 'html.parser')
         text = soup.get_text()
         # Save the text content to a file for processing
         text_file_path = "data/scraped_content.txt"
         with open(text_file_path, "w") as file:
     except requests.RequestException as e:
         st.error(f"Error fetching the URL: {e}")
         return None
 def process_document(path, input_query):
     """Process the document by setting up the chain and invoking it with the input query."""
     documents = load_documents(path)
+    if not documents:
+        st.error("No documents found. Please check the uploaded files or scraped content.")
+        return "No documents found."
     text_splitter = CharacterTextSplitter(chunk_size=18000, chunk_overlap=10)
     split_docs = text_splitter.split_documents(documents)
+    if not split_docs:
+        st.error("No text could be extracted from the documents.")
+        return "No text could be extracted."
     llm_model = configure_model()
     prompt = generate_prompt()
     retriever = configure_retriever(split_docs)
     response = inference(chain, input_query)
     return response
 def main():
     """Main function to run the Streamlit app."""
     tmp_folder = '/tmp/1'
             user_query = st.text_input("Ask a question:", key="query_input")
             if st.form_submit_button("Ask") and user_query:
                 response = process_document(tmp_folder, user_query)
+                if response:  # Check if response is not empty
+                    st.session_state.chat_history.append({"question": user_query, "answer": response})
         if st.button("Clear Chat History"):
             st.session_state.chat_history = []
             file_path = scrape_url(url_input)
             if file_path:
                 documents = load_documents(tmp_folder)
+                if documents:  # Check if documents are loaded after scraping
+                    response = process_document(tmp_folder, "What is the content of the URL?")
+                    if response:  # Check if response is not empty
+                        st.session_state.chat_history.append({"question": "What is the content of the URL?", "answer": response})
+                        st.success("URL content processed successfully!")
+                else:
+                    st.error("Failed to load any documents from the scraped URL content.")
             else:
                 st.error("Failed to process URL content.")
         else: