devve1 commited on
Commit
77ed01c
1 Parent(s): 165e041

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -317,13 +317,6 @@ def load_models_and_documents():
317
 
318
  return client, collection_name, llm, dense_model, sparse_model
319
 
320
- def create_document(text: str, i: int):
321
- index = -1
322
- for chunk in text_splitter.split_text(text):
323
- if text_splitter._add_start_index:
324
- index = text.find(chunk, index + 1)
325
- documents.append(chunk)
326
-
327
  def chunk_documents(docs, dense_model, sparse_model):
328
  text_splitter = SemanticChunker(
329
  dense_model,
@@ -333,8 +326,16 @@ def chunk_documents(docs, dense_model, sparse_model):
333
  texts = []
334
  for doc in docs:
335
  texts.append(doc.page_content)
336
-
337
  documents = []
 
 
 
 
 
 
 
 
338
  joblib.Parallel(n_jobs=joblib.cpu_count(), verbose=1, require='sharedmem')(
339
  joblib.delayed(create_document)(text, i) for i, text in enumerate(texts))
340
 
 
317
 
318
  return client, collection_name, llm, dense_model, sparse_model
319
 
 
 
 
 
 
 
 
320
  def chunk_documents(docs, dense_model, sparse_model):
321
  text_splitter = SemanticChunker(
322
  dense_model,
 
326
  texts = []
327
  for doc in docs:
328
  texts.append(doc.page_content)
329
+
330
  documents = []
331
+
332
+ def create_document(text: str, i: int):
333
+ index = -1
334
+ for chunk in text_splitter.split_text(text):
335
+ if text_splitter._add_start_index:
336
+ index = text.find(chunk, index + 1)
337
+ documents.append(chunk)
338
+
339
  joblib.Parallel(n_jobs=joblib.cpu_count(), verbose=1, require='sharedmem')(
340
  joblib.delayed(create_document)(text, i) for i, text in enumerate(texts))
341