kamau1 commited on
Commit
2397ad2
1 Parent(s): 9b00403

Create version/semapdf1.0.py

Browse files
Files changed (1) hide show
  1. version/semapdf1.0.py +105 -0
version/semapdf1.0.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.question_answering import load_qa_chain
2
+ from langchain.llms import OpenAI
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
6
+ from langchain.callbacks import get_openai_callback
7
+ from PyPDF2 import PdfReader
8
+ import json
9
+ import openai
10
+ import streamlit as st
11
+ import os
12
+ import requests
13
+
14
+
15
+ # Page configuration
16
+ st.set_page_config(page_title="PesaQ", page_icon="💸", layout="wide",)
17
+
18
+ #set Open-AI key
19
+ os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
20
+
21
+
22
+ # Sema Translator
23
+ def translate(userinput, target_lang, source_lang=None):
24
+ if source_lang:
25
+ url = "https://5d5c-44-208-85-154.ngrok-free.app/translate_enter/"
26
+ data = {
27
+ "userinput": userinput,
28
+ "source_lang": source_lang,
29
+ "target_lang": target_lang,
30
+ }
31
+ response = requests.post(url, json=data)
32
+ result = response.json()
33
+ print(type(result))
34
+ source_lange = source_lang
35
+ translation = result['translated_text']
36
+ return source_lange, translation
37
+ else:
38
+ url = "https://5d5c-44-208-85-154.ngrok-free.app/translate_detect/"
39
+ data = {
40
+ "userinput": userinput,
41
+ "target_lang": target_lang,
42
+ }
43
+
44
+ response = requests.post(url, json=data)
45
+ result = response.json()
46
+ source_lange = result['source_language']
47
+ translation = result['translated_text']
48
+ return source_lange, translation
49
+
50
+
51
+ def main():
52
+ st.title("📚 PesaDoc")
53
+ # upload file
54
+ pdf = st.file_uploader("Upload a financial Document and ask questions to get insights", type="pdf")
55
+
56
+ # extract the text
57
+ if pdf is not None:
58
+ reader = PdfReader(pdf)
59
+ pdf_text = ''
60
+ for page in (reader.pages):
61
+ text = page.extract_text()
62
+ if text:
63
+ pdf_text += text
64
+
65
+ # Define our text splitter
66
+ text_splitter = CharacterTextSplitter(
67
+ separator = "\n",
68
+ chunk_size = 1000, #thousand charctere
69
+ chunk_overlap = 200,
70
+ length_function = len,
71
+ )
72
+ #Apply splitting
73
+ text_chunks = text_splitter.split_text(pdf_text)
74
+
75
+ # Use embeddings from OpenAI
76
+ embeddings = OpenAIEmbeddings()
77
+ #Convert text to embeddings
78
+ pdf_embeddings = FAISS.from_texts(text_chunks, embeddings)
79
+ chain = load_qa_chain(OpenAI(), chain_type="stuff")
80
+
81
+ #user_question = st.text_input("Get insights into your finances ...")
82
+ # show user input
83
+ if "messages" not in st.session_state:
84
+ st.session_state.messages = []
85
+
86
+ for message in st.session_state.messages:
87
+ with st.chat_message(message["role"]):
88
+ st.markdown(message["content"])
89
+
90
+ if user_question := st.chat_input("Ask your document anything ......?"):
91
+ with st.chat_message("user"):
92
+ st.markdown(user_question)
93
+ user_langd, Queryd = translate(user_question, 'eng_Latn')
94
+ st.session_state.messages.append({"role": "user", "content": user_question})
95
+ docs = pdf_embeddings.similarity_search(Queryd)
96
+ # print(len(docs))
97
+ response = chain.run(input_documents=docs, question=Queryd)
98
+ output = translate(response, user_langd, 'eng_Latn')[1]
99
+ with st.chat_message("assistant"):
100
+ st.markdown(output)
101
+ st.session_state.messages.append({"role": "assistant", "content": output})
102
+
103
+
104
+ if __name__ == '__main__':
105
+ main()