Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,17 +11,14 @@ import os
|
|
11 |
import json
|
12 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextStreamer, ConversationalPipeline
|
13 |
|
14 |
-
|
15 |
####CREDIT#####
|
16 |
-
#Credit to
|
17 |
-
#Sri LaxmiGithub Link: https://github.com/SriLaxmi1993/Document-Genie-using-RAG-Framwork
|
18 |
-
#Sri Laxmi Youtube:https://www.youtube.com/watch?v=SkY2u4UUr6M&t=112s
|
19 |
###############
|
20 |
os.system("pip install -r requirements.txt")
|
21 |
|
22 |
-
|
23 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
24 |
-
|
25 |
|
26 |
#tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
|
27 |
#model = AutoModelForCausalLM.from_pretrained("google/gemma-7b")
|
@@ -32,12 +29,13 @@ st.set_page_config(page_title="Gemini RAG", layout="wide")
|
|
32 |
# This is the first API key input; no need to repeat it in the main function.
|
33 |
api_key = 'AIzaSyCvXRggpO2yNwIpZmoMy_5Xhm2bDyD-pOo'
|
34 |
|
35 |
-
#os.mkdir('faiss_index')
|
36 |
|
37 |
-
#
|
|
|
|
|
38 |
def delete_files_in_folder(folder_path):
|
39 |
try:
|
40 |
-
|
41 |
chat_history_file = "chat_history.json"
|
42 |
if os.path.exists(chat_history_file):
|
43 |
os.remove(chat_history_file)
|
@@ -70,16 +68,19 @@ def get_pdf_text(pdf_docs):
|
|
70 |
text += page.extract_text()
|
71 |
return text
|
72 |
|
|
|
73 |
def get_text_chunks(text):
|
74 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CH_size, chunk_overlap=CH_overlap)
|
75 |
chunks = text_splitter.split_text(text)
|
76 |
return chunks
|
77 |
|
|
|
78 |
def get_vector_store(text_chunks, api_key):
|
79 |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
|
80 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
81 |
vector_store.save_local("faiss_index")
|
82 |
|
|
|
83 |
def get_conversational_chain():
|
84 |
prompt_template = """
|
85 |
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
|
@@ -94,7 +95,8 @@ def get_conversational_chain():
|
|
94 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
95 |
return chain
|
96 |
|
97 |
-
|
|
|
98 |
def update_chat_history(question, reply):
|
99 |
# Check if chat history file exists
|
100 |
chat_history_file = "chat_history.json"
|
@@ -146,7 +148,8 @@ def main():
|
|
146 |
with st.sidebar:
|
147 |
st.title("Menu:")
|
148 |
|
149 |
-
pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button",
|
|
|
150 |
if st.button("Submit & Process", key="process_button") and api_key:
|
151 |
with st.spinner("Processing..."):
|
152 |
raw_text = get_pdf_text(pdf_docs)
|
@@ -154,5 +157,6 @@ def main():
|
|
154 |
get_vector_store(text_chunks, api_key)
|
155 |
st.success("Done")
|
156 |
|
|
|
157 |
if __name__ == "__main__":
|
158 |
main()
|
|
|
11 |
import json
|
12 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextStreamer, ConversationalPipeline
|
13 |
|
|
|
14 |
####CREDIT#####
|
15 |
+
# Credit to author (Sri Laxmi) of original code reference: SriLaxmi1993
|
16 |
+
# Sri LaxmiGithub Link: https://github.com/SriLaxmi1993/Document-Genie-using-RAG-Framwork
|
17 |
+
# Sri Laxmi Youtube:https://www.youtube.com/watch?v=SkY2u4UUr6M&t=112s
|
18 |
###############
|
19 |
os.system("pip install -r requirements.txt")
|
20 |
|
21 |
+
# some model
|
|
|
|
|
22 |
|
23 |
#tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
|
24 |
#model = AutoModelForCausalLM.from_pretrained("google/gemma-7b")
|
|
|
29 |
# This is the first API key input; no need to repeat it in the main function.
|
30 |
api_key = 'AIzaSyCvXRggpO2yNwIpZmoMy_5Xhm2bDyD-pOo'
|
31 |
|
|
|
32 |
|
33 |
+
# os.mkdir('faiss_index')
|
34 |
+
|
35 |
+
# empty faise_index and chat_history.json
|
36 |
def delete_files_in_folder(folder_path):
|
37 |
try:
|
38 |
+
# Iterate over all the files in the folder
|
39 |
chat_history_file = "chat_history.json"
|
40 |
if os.path.exists(chat_history_file):
|
41 |
os.remove(chat_history_file)
|
|
|
68 |
text += page.extract_text()
|
69 |
return text
|
70 |
|
71 |
+
|
72 |
def get_text_chunks(text):
|
73 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CH_size, chunk_overlap=CH_overlap)
|
74 |
chunks = text_splitter.split_text(text)
|
75 |
return chunks
|
76 |
|
77 |
+
|
78 |
def get_vector_store(text_chunks, api_key):
|
79 |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
|
80 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
81 |
vector_store.save_local("faiss_index")
|
82 |
|
83 |
+
|
84 |
def get_conversational_chain():
|
85 |
prompt_template = """
|
86 |
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
|
|
|
95 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
96 |
return chain
|
97 |
|
98 |
+
|
99 |
+
# chat history functionality
|
100 |
def update_chat_history(question, reply):
|
101 |
# Check if chat history file exists
|
102 |
chat_history_file = "chat_history.json"
|
|
|
148 |
with st.sidebar:
|
149 |
st.title("Menu:")
|
150 |
|
151 |
+
pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button",
|
152 |
+
accept_multiple_files=True, key="pdf_uploader")
|
153 |
if st.button("Submit & Process", key="process_button") and api_key:
|
154 |
with st.spinner("Processing..."):
|
155 |
raw_text = get_pdf_text(pdf_docs)
|
|
|
157 |
get_vector_store(text_chunks, api_key)
|
158 |
st.success("Done")
|
159 |
|
160 |
+
|
161 |
if __name__ == "__main__":
|
162 |
main()
|