qorgh346 commited on
Commit
15d201d
1 Parent(s): dd9ce97

update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -26,17 +26,17 @@ def get_pdf_text(pdf_docs):
26
  temp_file.write(pdf_docs.getvalue())
27
  temp_file.seek(0)
28
  pdf_loader = PyPDFLoader(temp_file.name)
29
- print('pdf_loader = ', pdf_loader)
30
  pdf_doc = pdf_loader.load()
31
- print('pdf_doc = ',pdf_doc)
32
  return pdf_doc
33
 
34
 
35
- def get_text_chunks(text):
36
- print('text = ',text)
37
  text_splitter = RecursiveCharacterTextSplitter(
38
- chunk_size = 256,
39
- chunk_overlap = 50,
40
  length_function= len
41
  )
42
  # text_splitter = CharacterTextSplitter(
@@ -45,9 +45,9 @@ def get_text_chunks(text):
45
  # chunk_overlap=200,
46
  # length_function=len
47
  # )
48
- chunks = text_splitter.split_text(text)
49
- print('chunks = ', chunks)
50
- return chunks
51
 
52
 
53
  def get_vectorstore(text_chunks):
@@ -58,7 +58,7 @@ def get_vectorstore(text_chunks):
58
  # embeddings = OpenAIEmbeddings()sentence-transformers/all-MiniLM-L6-v2
59
  # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
60
  # model_kwargs={'device':'cpu'})
61
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
62
  # vectorstore = Chroma.from_texts(texts=text_chunks, embedding=embeddings)
63
 
64
  return vectorstore
@@ -186,7 +186,7 @@ def main():
186
 
187
 
188
  # get the text chunks
189
- text_chunks = get_text_chunks(raw_text)
190
 
191
  # create vector store
192
  vectorstore = get_vectorstore(text_chunks)
 
26
  temp_file.write(pdf_docs.getvalue())
27
  temp_file.seek(0)
28
  pdf_loader = PyPDFLoader(temp_file.name)
29
+ # print('pdf_loader = ', pdf_loader)
30
  pdf_doc = pdf_loader.load()
31
+ # print('pdf_doc = ',pdf_doc)
32
  return pdf_doc
33
 
34
 
35
+ def get_text_chunks(documents):
36
+
37
  text_splitter = RecursiveCharacterTextSplitter(
38
+ chunk_size = 1000,
39
+ chunk_overlap = 200,
40
  length_function= len
41
  )
42
  # text_splitter = CharacterTextSplitter(
 
45
  # chunk_overlap=200,
46
  # length_function=len
47
  # )
48
+ documents = text_splitter.split_documents(documents)
49
+ print('documents = ', documents)
50
+ return documents
51
 
52
 
53
  def get_vectorstore(text_chunks):
 
58
  # embeddings = OpenAIEmbeddings()sentence-transformers/all-MiniLM-L6-v2
59
  # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
60
  # model_kwargs={'device':'cpu'})
61
+ vectorstore = FAISS.from_documents(texts=text_chunks, embedding=embeddings)
62
  # vectorstore = Chroma.from_texts(texts=text_chunks, embedding=embeddings)
63
 
64
  return vectorstore
 
186
 
187
 
188
  # get the text chunks
189
+ text_chunks = get_text_chunks(doc_list)
190
 
191
  # create vector store
192
  vectorstore = get_vectorstore(text_chunks)