edjdhug3 commited on
Commit
ae2b518
·
1 Parent(s): 9b898eb

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +1 -0
  2. faiss_store_openai.pkl +3 -0
  3. main.py +72 -0
  4. requirements.txt +100 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY = 'AIzaSyAAUSqm46KolD0515tMAsgJuCR_oPI-FKw'
faiss_store_openai.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
main.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import time
5
+ from langchain.chains import RetrievalQAWithSourcesChain
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.document_loaders import UnstructuredURLLoader
8
+ import google.generativeai as palm
9
+ from langchain.embeddings import GooglePalmEmbeddings
10
+ from langchain.llms import GooglePalm
11
+ from langchain.vectorstores import FAISS
12
+
13
+ from dotenv import load_dotenv
14
+ load_dotenv() # take environment variables from .env (especially openai api key)
15
+
16
+ st.title("RockyBot: News Research Tool 📈")
17
+ st.sidebar.title("News Article URLs")
18
+
19
+ urls = []
20
+ for i in range(3):
21
+ url = st.sidebar.text_input(f"URL {i+1}")
22
+ urls.append(url)
23
+
24
+ process_url_clicked = st.sidebar.button("Process URLs")
25
+ file_path = "faiss_store_openai.pkl"
26
+
27
+ main_placeholder = st.empty()
28
+ llm = GooglePalm()
29
+
30
+ if process_url_clicked:
31
+ # load data
32
+ loader = UnstructuredURLLoader(urls=urls)
33
+ main_placeholder.text("Data Loading...Started...✅✅✅")
34
+ data = loader.load()
35
+ # split data
36
+ text_splitter = RecursiveCharacterTextSplitter(
37
+ separators=['\n\n', '\n', '.', ','],
38
+ chunk_size=1000
39
+ )
40
+ main_placeholder.text("Text Splitter...Started...✅✅✅")
41
+ docs = text_splitter.split_documents(data)
42
+ # create embeddings and save it to FAISS index
43
+ embeddings = GooglePalmEmbeddings()
44
+ vectorstore_openai = FAISS.from_documents(docs, embeddings)
45
+ main_placeholder.text("Embedding Vector Started Building...✅✅✅")
46
+ time.sleep(2)
47
+
48
+ # Save the FAISS index to a pickle file
49
+ with open(file_path, "wb") as f:
50
+ pickle.dumps(vectorstore_openai, f)
51
+
52
+ query = main_placeholder.text_input("Question: ")
53
+ if query:
54
+ if os.path.exists(file_path):
55
+ with open(file_path, "rb") as f:
56
+ vectorstore = pickle.load(f)
57
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
58
+ result = chain({"question": query}, return_only_outputs=True)
59
+ # result will be a dictionary of this format --> {"answer": "", "sources": [] }
60
+ st.header("Answer")
61
+ st.write(result["answer"])
62
+
63
+ # Display sources, if available
64
+ sources = result.get("sources", "")
65
+ if sources:
66
+ st.subheader("Sources:")
67
+ sources_list = sources.split("\n") # Split the sources by newline
68
+ for source in sources_list:
69
+ st.write(source)
70
+
71
+
72
+
requirements.txt ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.5
2
+ aiosignal==1.3.1
3
+ altair==5.1.2
4
+ annotated-types==0.5.0
5
+ anyio==3.7.1
6
+ async-timeout==4.0.3
7
+ attrs==23.1.0
8
+ backports.zoneinfo==0.2.1
9
+ beautifulsoup4==4.12.2
10
+ blinker==1.6.2
11
+ cachetools==5.3.1
12
+ certifi==2023.7.22
13
+ chardet==5.2.0
14
+ charset-normalizer==3.3.0
15
+ click==8.1.7
16
+ dataclasses-json==0.6.1
17
+ emoji==2.8.0
18
+ exceptiongroup==1.1.3
19
+ faiss-cpu==1.7.4
20
+ filetype==1.2.0
21
+ frozenlist==1.4.0
22
+ gitdb==4.0.10
23
+ GitPython==3.1.37
24
+ google-ai-generativelanguage==0.1.0
25
+ google-api-core==2.12.0
26
+ google-auth==2.23.2
27
+ google-generativeai==0.1.0rc1
28
+ googleapis-common-protos==1.60.0
29
+ greenlet==3.0.0
30
+ grpcio==1.59.0
31
+ grpcio-status==1.59.0
32
+ idna==3.4
33
+ importlib-metadata==6.8.0
34
+ importlib-resources==6.1.0
35
+ Jinja2==3.1.2
36
+ joblib==1.3.2
37
+ jsonpatch==1.33
38
+ jsonpointer==2.4
39
+ jsonschema==4.19.1
40
+ jsonschema-specifications==2023.7.1
41
+ langchain==0.0.309
42
+ langdetect==1.0.9
43
+ langsmith==0.0.42
44
+ lxml==4.9.3
45
+ markdown-it-py==3.0.0
46
+ MarkupSafe==2.1.3
47
+ marshmallow==3.20.1
48
+ mdurl==0.1.2
49
+ multidict==6.0.4
50
+ mypy-extensions==1.0.0
51
+ nltk==3.8.1
52
+ numpy==1.24.4
53
+ packaging==23.2
54
+ pandas==2.0.3
55
+ pickle5==0.0.11
56
+ Pillow==10.0.1
57
+ pkgutil_resolve_name==1.3.10
58
+ proto-plus==1.22.3
59
+ protobuf==4.24.4
60
+ pyarrow==13.0.0
61
+ pyasn1==0.5.0
62
+ pyasn1-modules==0.3.0
63
+ pydantic==2.4.2
64
+ pydantic_core==2.10.1
65
+ pydeck==0.8.1b0
66
+ Pygments==2.16.1
67
+ python-dateutil==2.8.2
68
+ python-dotenv==1.0.0
69
+ python-iso639==2023.6.15
70
+ python-magic==0.4.27
71
+ pytz==2023.3.post1
72
+ PyYAML==6.0.1
73
+ referencing==0.30.2
74
+ regex==2023.10.3
75
+ requests==2.31.0
76
+ rich==13.6.0
77
+ rpds-py==0.10.4
78
+ rsa==4.9
79
+ six==1.16.0
80
+ smmap==5.0.1
81
+ sniffio==1.3.0
82
+ soupsieve==2.5
83
+ SQLAlchemy==2.0.21
84
+ streamlit==1.27.2
85
+ tabulate==0.9.0
86
+ tenacity==8.2.3
87
+ toml==0.10.2
88
+ toolz==0.12.0
89
+ tornado==6.3.3
90
+ tqdm==4.66.1
91
+ typing-inspect==0.9.0
92
+ typing_extensions==4.8.0
93
+ tzdata==2023.3
94
+ tzlocal==5.1
95
+ unstructured==0.10.19
96
+ urllib3==2.0.6
97
+ validators==0.22.0
98
+ watchdog==3.0.0
99
+ yarl==1.9.2
100
+ zipp==3.17.0