Spaces:
Starting
on
T4
Starting
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -317,13 +317,6 @@ def load_models_and_documents():
|
|
317 |
|
318 |
return client, collection_name, llm, dense_model, sparse_model
|
319 |
|
320 |
-
def create_document(text: str, i: int):
|
321 |
-
index = -1
|
322 |
-
for chunk in text_splitter.split_text(text):
|
323 |
-
if text_splitter._add_start_index:
|
324 |
-
index = text.find(chunk, index + 1)
|
325 |
-
documents.append(chunk)
|
326 |
-
|
327 |
def chunk_documents(docs, dense_model, sparse_model):
|
328 |
text_splitter = SemanticChunker(
|
329 |
dense_model,
|
@@ -333,8 +326,16 @@ def chunk_documents(docs, dense_model, sparse_model):
|
|
333 |
texts = []
|
334 |
for doc in docs:
|
335 |
texts.append(doc.page_content)
|
336 |
-
|
337 |
documents = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
joblib.Parallel(n_jobs=joblib.cpu_count(), verbose=1, require='sharedmem')(
|
339 |
joblib.delayed(create_document)(text, i) for i, text in enumerate(texts))
|
340 |
|
|
|
317 |
|
318 |
return client, collection_name, llm, dense_model, sparse_model
|
319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
def chunk_documents(docs, dense_model, sparse_model):
|
321 |
text_splitter = SemanticChunker(
|
322 |
dense_model,
|
|
|
326 |
texts = []
|
327 |
for doc in docs:
|
328 |
texts.append(doc.page_content)
|
329 |
+
|
330 |
documents = []
|
331 |
+
|
332 |
+
def create_document(text: str, i: int):
|
333 |
+
index = -1
|
334 |
+
for chunk in text_splitter.split_text(text):
|
335 |
+
if text_splitter._add_start_index:
|
336 |
+
index = text.find(chunk, index + 1)
|
337 |
+
documents.append(chunk)
|
338 |
+
|
339 |
joblib.Parallel(n_jobs=joblib.cpu_count(), verbose=1, require='sharedmem')(
|
340 |
joblib.delayed(create_document)(text, i) for i, text in enumerate(texts))
|
341 |
|