Spaces:
Build error
Build error
heikowagner
commited on
Commit
•
4f0dc21
1
Parent(s):
fbb697c
remove data
Browse files- .gitignore +4 -1
- app/7fd9b7dc7c8bd6c62fec03923bb04b6ba28e6c736017b43e67f3e808c92438c7.pkl +0 -3
- app/VectorStore/chroma-embeddings.parquet +2 -2
- app/VectorStore/index/id_to_uuid_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl +2 -2
- app/VectorStore/index/index_52984ff2-d9c3-459b-acc0-0b0aa559d50f.bin +2 -2
- app/VectorStore/index/index_metadata_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl +1 -1
- app/VectorStore/index/uuid_to_id_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl +2 -2
- app/fce3ad7ed281744c55c5838aa5677e15468ae6bae3aa5fe76e6aac20d2c19f2f.pkl +0 -3
- app/load_vectors.py +1 -1
- app/utils.py +25 -9
.gitignore
CHANGED
@@ -6,4 +6,7 @@
|
|
6 |
root
|
7 |
*.ipynb_checkpoints*
|
8 |
.vscode
|
9 |
-
/app/mymodels
|
|
|
|
|
|
|
|
6 |
root
|
7 |
*.ipynb_checkpoints*
|
8 |
.vscode
|
9 |
+
/app/mymodels
|
10 |
+
/app/.cache
|
11 |
+
/app/VectorStore*
|
12 |
+
*chroma-embeddings.parquet*
|
app/7fd9b7dc7c8bd6c62fec03923bb04b6ba28e6c736017b43e67f3e808c92438c7.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:09bac093b25dfef86ce79a7325d893cf826fb0de7bcdf122b8ab0ec5692425c4
|
3 |
-
size 3500346
|
|
|
|
|
|
|
|
app/VectorStore/chroma-embeddings.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df81409ab51d0acf63d8e0ad64f6af92daa192afb80667f08fe48fdcba095b1a
|
3 |
+
size 9201921
|
app/VectorStore/index/id_to_uuid_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25fca5ee26534c9df1cfaf6fe82d8aaea2c65bdc8e81e3c047c7546c7ea33466
|
3 |
+
size 153298
|
app/VectorStore/index/index_52984ff2-d9c3-459b-acc0-0b0aa559d50f.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:111dc06e8480103cd32c60d5719a1091ac939ce09dbbdf0a979ed72590dd3095
|
3 |
+
size 15211024
|
app/VectorStore/index/index_metadata_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 74
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:950e65792fc96c61638586850536a6b92d603d0d8e357fa7028dee790eff793d
|
3 |
size 74
|
app/VectorStore/index/uuid_to_id_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d98cc469e10b5a1d092e046f96a528edf2f3b302da4340e56f27846728b5b853
|
3 |
+
size 179261
|
app/fce3ad7ed281744c55c5838aa5677e15468ae6bae3aa5fe76e6aac20d2c19f2f.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a98a5f02d12b69d1b62de62ffef9fc98fbe229f2369e002e3f47ca78bdefeb3e
|
3 |
-
size 29884059
|
|
|
|
|
|
|
|
app/load_vectors.py
CHANGED
@@ -94,7 +94,7 @@ def load_from_file(files):
|
|
94 |
|
95 |
def load_from_web(urls, cache=True):
|
96 |
docs_list = urls
|
97 |
-
filename=f"
|
98 |
|
99 |
isFile = os.path.isfile(filename)
|
100 |
|
|
|
94 |
|
95 |
def load_from_web(urls, cache=True):
|
96 |
docs_list = urls
|
97 |
+
filename=f"./.cache/{sha256(str(urls).encode('utf-8')).hexdigest()}.pkl"
|
98 |
|
99 |
isFile = os.path.isfile(filename)
|
100 |
|
app/utils.py
CHANGED
@@ -4,7 +4,7 @@ from langchain.docstore.document import Document
|
|
4 |
import chromadb
|
5 |
from chromadb.config import Settings
|
6 |
import load_model
|
7 |
-
from load_vectors import load_from_file, load_and_split, create_and_add
|
8 |
persist_directory = load_model.persist_directory
|
9 |
|
10 |
def format_document(document: Document):
|
@@ -52,15 +52,31 @@ def load_files():
|
|
52 |
retrieve_collections.clear()
|
53 |
collections = retrieve_collections()
|
54 |
|
55 |
-
st.
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
else:
|
65 |
collection = st.text_area('Name of your new collection:', '')
|
66 |
if st.button('Create'):
|
|
|
4 |
import chromadb
|
5 |
from chromadb.config import Settings
|
6 |
import load_model
|
7 |
+
from load_vectors import load_from_file, load_and_split, create_and_add, load_from_web
|
8 |
persist_directory = load_model.persist_directory
|
9 |
|
10 |
def format_document(document: Document):
|
|
|
52 |
retrieve_collections.clear()
|
53 |
collections = retrieve_collections()
|
54 |
|
55 |
+
option = st.radio(
|
56 |
+
"",
|
57 |
+
options=["Upload Files", "Download Files"],
|
58 |
+
)
|
59 |
+
if option == "Upload Files":
|
60 |
+
st.write('Source Documents:')
|
61 |
+
uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True)
|
62 |
+
chunk_size = st.text_area('chunk Size:', 1000)
|
63 |
+
|
64 |
+
if st.button('Upload'):
|
65 |
+
docs = load_from_file(uploaded_files)
|
66 |
+
sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
|
67 |
+
create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large")
|
68 |
+
uploaded_files=None
|
69 |
+
else:
|
70 |
+
st.write('Source Documents (Comma separated):')
|
71 |
+
urls = chunk_size = st.text_area('Urls:', '')
|
72 |
+
chunk_size = st.text_area('chunk Size:', 1000)
|
73 |
+
urls = urls.replace(",", "" ).replace('"', "" ).split(',')
|
74 |
|
75 |
+
if st.button('Upload'):
|
76 |
+
docs = load_from_web(urls)
|
77 |
+
sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
|
78 |
+
create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large")
|
79 |
+
uploaded_files=None
|
80 |
else:
|
81 |
collection = st.text_area('Name of your new collection:', '')
|
82 |
if st.button('Create'):
|