from langchain.chains.question_answering import load_qa_chain from langchain.llms import OpenAI from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS from langchain.callbacks import get_openai_callback from PyPDF2 import PdfReader import json import openai import streamlit as st import os import requests # Page configuration st.set_page_config(page_title="PesaQ", page_icon="💸", layout="wide",) #set Open-AI key os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"] # Sema Translator def translate(userinput, target_lang, source_lang=None): if source_lang: url = "https://5d5c-44-208-85-154.ngrok-free.app/translate_enter/" data = { "userinput": userinput, "source_lang": source_lang, "target_lang": target_lang, } response = requests.post(url, json=data) result = response.json() print(type(result)) source_lange = source_lang translation = result['translated_text'] return source_lange, translation else: url = "https://5d5c-44-208-85-154.ngrok-free.app/translate_detect/" data = { "userinput": userinput, "target_lang": target_lang, } response = requests.post(url, json=data) result = response.json() source_lange = result['source_language'] translation = result['translated_text'] return source_lange, translation def main(): st.title("📚 PesaDoc") # upload file pdf = st.file_uploader("Upload a financial Document and ask questions to get insights", type="pdf") # extract the text if pdf is not None: reader = PdfReader(pdf) pdf_text = '' for page in (reader.pages): text = page.extract_text() if text: pdf_text += text # Define our text splitter text_splitter = CharacterTextSplitter( separator = "\n", chunk_size = 1000, #thousand charctere chunk_overlap = 200, length_function = len, ) #Apply splitting text_chunks = text_splitter.split_text(pdf_text) # Use embeddings from OpenAI embeddings = OpenAIEmbeddings() #Convert text to embeddings pdf_embeddings = FAISS.from_texts(text_chunks, embeddings) chain = load_qa_chain(OpenAI(), chain_type="stuff") #user_question = st.text_input("Get insights into your finances ...") # show user input if "messages" not in st.session_state: st.session_state.messages = [] for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if user_question := st.chat_input("Ask your document anything ......?"): with st.chat_message("user"): st.markdown(user_question) user_langd, Queryd = translate(user_question, 'eng_Latn') st.session_state.messages.append({"role": "user", "content": user_question}) docs = pdf_embeddings.similarity_search(Queryd) # print(len(docs)) response = chain.run(input_documents=docs, question=Queryd) output = translate(response, user_langd, 'eng_Latn')[1] with st.chat_message("assistant"): st.markdown(output) st.session_state.messages.append({"role": "assistant", "content": output}) if __name__ == '__main__': main()