clementsan
commited on
Commit
•
aa98840
1
Parent(s):
4ce7fc5
Add ASCII transliteration of unicode text
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ from langchain_community.llms import HuggingFaceEndpoint
|
|
13 |
|
14 |
from pathlib import Path
|
15 |
import chromadb
|
|
|
16 |
|
17 |
from transformers import AutoTokenizer
|
18 |
import transformers
|
@@ -188,6 +189,8 @@ def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Pr
|
|
188 |
# Fix potential issues from naming convention
|
189 |
## Remove space
|
190 |
collection_name = collection_name.replace(" ","-")
|
|
|
|
|
191 |
## Limit lenght to 50 characters
|
192 |
collection_name = collection_name[:50]
|
193 |
## Enforce start and end as alphanumeric character
|
|
|
13 |
|
14 |
from pathlib import Path
|
15 |
import chromadb
|
16 |
+
from unidecode import unidecode
|
17 |
|
18 |
from transformers import AutoTokenizer
|
19 |
import transformers
|
|
|
189 |
# Fix potential issues from naming convention
|
190 |
## Remove space
|
191 |
collection_name = collection_name.replace(" ","-")
|
192 |
+
## ASCII transliterations of Unicode text
|
193 |
+
collection_name = unidecode(collection_name)
|
194 |
## Limit lenght to 50 characters
|
195 |
collection_name = collection_name[:50]
|
196 |
## Enforce start and end as alphanumeric character
|