File size: 1,344 Bytes
783ca6e 6e9927d 783ca6e 6e9927d 25e1473 783ca6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import streamlit as st
from dotenv import load_dotenv
from admin_utils import *
def main():
load_dotenv()
st.set_page_config(page_title="Dump PDF to Pinecone - Vector Store")
st.title("Please upload your files here...📁 ")
# Upload the pdf file...
pdf = st.file_uploader("Only PDF files allowed", type=["pdf"])
# Extract the whole text from the uploaded pdf file
if pdf is not None:
with st.spinner('Wait for it...'):
text=read_pdf_data(pdf)
st.write("👉Reading PDF done")
# Create chunks
docs_chunks=split_data(text)
#st.write(docs_chunks)
st.write("👉Splitting data into chunks done")
# Create the embeddings
embeddings=create_embeddings_load_data()
st.write("👉Creating embeddings instance done")
# Build the vector store (Push the PDF data embeddings)
#Recent changes by langchain team, expects ""PINECONE_API_KEY" environment variable for Pinecone usage! So we are creating it here
push_to_pinecone("pcsk_4etRhj_Lc37c2KWzUgdTSPaShQKgxeZvC331qJcVWjK9LfpDARwkG23kXZoN5ZCHVLyYWZ","us-east-1","ticket",embeddings,docs_chunks)
st.success("Successfully pushed the embeddings to Pinecone")
if __name__ == '__main__':
main() |