|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.llms import OpenAI |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS |
|
from langchain.callbacks import get_openai_callback |
|
from PyPDF2 import PdfReader |
|
import json |
|
import openai |
|
import streamlit as st |
|
import os |
|
import requests |
|
|
|
|
|
|
|
st.set_page_config(page_title="PesaQ", page_icon="๐ธ", layout="wide",) |
|
|
|
|
|
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"] |
|
|
|
|
|
|
|
def translate(userinput, target_lang, source_lang=None): |
|
if source_lang: |
|
url = "https://5d5c-44-208-85-154.ngrok-free.app/translate_enter/" |
|
data = { |
|
"userinput": userinput, |
|
"source_lang": source_lang, |
|
"target_lang": target_lang, |
|
} |
|
response = requests.post(url, json=data) |
|
result = response.json() |
|
print(type(result)) |
|
source_lange = source_lang |
|
translation = result['translated_text'] |
|
return source_lange, translation |
|
else: |
|
url = "https://5d5c-44-208-85-154.ngrok-free.app/translate_detect/" |
|
data = { |
|
"userinput": userinput, |
|
"target_lang": target_lang, |
|
} |
|
|
|
response = requests.post(url, json=data) |
|
result = response.json() |
|
source_lange = result['source_language'] |
|
translation = result['translated_text'] |
|
return source_lange, translation |
|
|
|
|
|
def main(): |
|
st.title("๐ PesaDoc") |
|
|
|
pdf = st.file_uploader("Upload a financial Document and ask questions to get insights", type="pdf") |
|
|
|
|
|
if pdf is not None: |
|
reader = PdfReader(pdf) |
|
pdf_text = '' |
|
for page in (reader.pages): |
|
text = page.extract_text() |
|
if text: |
|
pdf_text += text |
|
|
|
|
|
text_splitter = CharacterTextSplitter( |
|
separator = "\n", |
|
chunk_size = 1000, |
|
chunk_overlap = 200, |
|
length_function = len, |
|
) |
|
|
|
text_chunks = text_splitter.split_text(pdf_text) |
|
|
|
|
|
embeddings = OpenAIEmbeddings() |
|
|
|
pdf_embeddings = FAISS.from_texts(text_chunks, embeddings) |
|
chain = load_qa_chain(OpenAI(), chain_type="stuff") |
|
|
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
if user_question := st.chat_input("Ask your document anything ......?"): |
|
with st.chat_message("user"): |
|
st.markdown(user_question) |
|
user_langd, Queryd = translate(user_question, 'eng_Latn') |
|
st.session_state.messages.append({"role": "user", "content": user_question}) |
|
docs = pdf_embeddings.similarity_search(Queryd) |
|
|
|
response = chain.run(input_documents=docs, question=Queryd) |
|
output = translate(response, user_langd, 'eng_Latn')[1] |
|
with st.chat_message("assistant"): |
|
st.markdown(output) |
|
st.session_state.messages.append({"role": "assistant", "content": output}) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|