Spaces:
Build error
Build error
heikowagner
commited on
Commit
•
7009660
1
Parent(s):
bcd81f1
Add Files
Browse files- .gitignore +8 -0
- Dockerfile +14 -0
- README.md +8 -5
- app/7fd9b7dc7c8bd6c62fec03923bb04b6ba28e6c736017b43e67f3e808c92438c7.pkl +3 -0
- app/VectorStore/chroma-collections.parquet +3 -0
- app/VectorStore/chroma-embeddings.parquet +3 -0
- app/VectorStore/index/id_to_uuid_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl +3 -0
- app/VectorStore/index/id_to_uuid_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl +3 -0
- app/VectorStore/index/index_52984ff2-d9c3-459b-acc0-0b0aa559d50f.bin +3 -0
- app/VectorStore/index/index_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.bin +3 -0
- app/VectorStore/index/index_metadata_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl +3 -0
- app/VectorStore/index/index_metadata_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl +3 -0
- app/VectorStore/index/uuid_to_id_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl +3 -0
- app/VectorStore/index/uuid_to_id_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl +3 -0
- app/app.py +52 -0
- app/fce3ad7ed281744c55c5838aa5677e15468ae6bae3aa5fe76e6aac20d2c19f2f.pkl +3 -0
- app/load_docs.py +155 -0
- app/load_model.py +128 -0
- app/load_vectors.py +102 -0
- app/requirements.txt +12 -0
- app/result.pkl +3 -0
- app/run.py +12 -0
- app/st_render_doc.py +9 -0
- app/utils.py +19 -0
- docker-compose.yaml +23 -0
.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
./docker/zeppelin/logs/*
|
2 |
+
*.log
|
3 |
+
*.log.*
|
4 |
+
*__pycache__*
|
5 |
+
root
|
6 |
+
*.ipynb_checkpoints*
|
7 |
+
.vscode
|
8 |
+
/app/mymodels
|
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM tensorflow/tensorflow:latest-gpu
|
2 |
+
WORKDIR /app
|
3 |
+
# RUN apt-get upgrade -y
|
4 |
+
RUN apt-get update -y
|
5 |
+
RUN apt-get install -y git
|
6 |
+
RUN apt install -y make wget git gcc g++ lhasa libgmp-dev libmpfr-dev libmpc-dev flex bison gettext texinfo ncurses-dev autoconf rsync
|
7 |
+
COPY ./app .
|
8 |
+
RUN pip install -r requirements.txt
|
9 |
+
RUN ls -la
|
10 |
+
#RUN python load_docs.py
|
11 |
+
RUN python run.py
|
12 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8080"]
|
13 |
+
#CMD ls -la
|
14 |
+
EXPOSE 8080
|
README.md
CHANGED
@@ -1,10 +1,13 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk:
|
|
|
|
|
7 |
pinned: false
|
|
|
8 |
---
|
9 |
|
10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: MyGPT
|
3 |
+
emoji: 🏢
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.17.0
|
8 |
+
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app/7fd9b7dc7c8bd6c62fec03923bb04b6ba28e6c736017b43e67f3e808c92438c7.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09bac093b25dfef86ce79a7325d893cf826fb0de7bcdf122b8ab0ec5692425c4
|
3 |
+
size 3500346
|
app/VectorStore/chroma-collections.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26740390ba936629dcb106c9948b55752ac6c763915bf0e7ad4a1273ac9ba084
|
3 |
+
size 745
|
app/VectorStore/chroma-embeddings.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b79fe220db8ba8a7a77617dd295bf51f5438257e676b00ddd28a1fcf62c757fb
|
3 |
+
size 240218512
|
app/VectorStore/index/id_to_uuid_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f64ceca9dc08a1fa345c23f4012132ef11f0c472dd64c6f80e445a65f29f536e
|
3 |
+
size 104759
|
app/VectorStore/index/id_to_uuid_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a848348a950ba8fcd605e4b04a2fd81f938587e0234abbb3b4db7ad693bf8d0d
|
3 |
+
size 1445149
|
app/VectorStore/index/index_52984ff2-d9c3-459b-acc0-0b0aa559d50f.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:518d2424133c568190196628f29226cca2e9c198616b03990347f2ce0c11ea7e
|
3 |
+
size 10402668
|
app/VectorStore/index/index_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6278607ce39747eb13ed7e117bc114cc231f4b8f4638269a2f3536e4729e338
|
3 |
+
size 143197708
|
app/VectorStore/index/index_metadata_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8e41a80750fa05ef9dd656d26239c0d9f06c1d278825090d6fd4f9645756d35
|
3 |
+
size 74
|
app/VectorStore/index/index_metadata_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff10f5fcfc8d53e24171409bb560e5a7addbac73f57a25d3c8c71a3a01645d80
|
3 |
+
size 74
|
app/VectorStore/index/uuid_to_id_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90ccb948caa40144e512f5ab70fb9d8fe4b08dcf18b3d7e4c368bee1f0283a47
|
3 |
+
size 122516
|
app/VectorStore/index/uuid_to_id_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91cdf0f3a7086cd4c5c96b8e54b8dd930a0aa4f29ab587809eb061a1511c5cb2
|
3 |
+
size 1689782
|
app/app.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import langchain
|
3 |
+
import load_model
|
4 |
+
import utils as ut
|
5 |
+
import chromadb
|
6 |
+
from chromadb.config import Settings
|
7 |
+
import os
|
8 |
+
|
9 |
+
persist_directory = load_model.persist_directory
|
10 |
+
|
11 |
+
st.title('myGPT')
|
12 |
+
st.header('An GPT example brought to you by Heiko Wagner')
|
13 |
+
|
14 |
+
st.markdown('*\"Parametrised models are simply functions that depend on inputs and trainable parameters. There is no fundamental difference between the two, except that trainable parameters are shared across training samples whereas the input varies from sample to sample.\"* [(Yann LeCun, Deep learning course)](https://atcold.github.io/pytorch-Deep-Learning/en/week02/02-1/#Parametrised-models)')
|
15 |
+
|
16 |
+
st.latex(r'''h(\boldsymbol x, \boldsymbol w)= \sum_{k=1}^{K}\boldsymbol w_{k} \phi_{k}(\boldsymbol x)''')
|
17 |
+
|
18 |
+
import torch
|
19 |
+
torch.cuda.empty_cache()
|
20 |
+
|
21 |
+
model_type = st.selectbox(
|
22 |
+
'Select the Documents to be used to answer your question',
|
23 |
+
('OpenAI', 'local_model') )
|
24 |
+
|
25 |
+
if model_type=='OpenAI':
|
26 |
+
openai_key= st.text_area('OpenAI Key:', '')
|
27 |
+
os.environ["OPENAI_API_KEY"] = openai_key
|
28 |
+
llm= load_model.load_openai_model()
|
29 |
+
else:
|
30 |
+
llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
|
31 |
+
|
32 |
+
|
33 |
+
client = chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
|
34 |
+
persist_directory=persist_directory
|
35 |
+
))
|
36 |
+
|
37 |
+
collections = tuple( [collection.name for collection in client.list_collections()] )
|
38 |
+
print(collections)
|
39 |
+
option = st.selectbox(
|
40 |
+
'Select the Documents to be used to answer your question',
|
41 |
+
collections )
|
42 |
+
|
43 |
+
st.write('You selected:', option)
|
44 |
+
|
45 |
+
chain = load_model.create_chain(llm, collection=option)
|
46 |
+
try:
|
47 |
+
query = st.text_area('Ask a question:', 'Hallo how are you today?')
|
48 |
+
result = chain({"query": query})
|
49 |
+
ut.format_result_set(result)
|
50 |
+
finally:
|
51 |
+
del chain
|
52 |
+
torch.cuda.empty_cache()
|
app/fce3ad7ed281744c55c5838aa5677e15468ae6bae3aa5fe76e6aac20d2c19f2f.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a98a5f02d12b69d1b62de62ffef9fc98fbe229f2369e002e3f47ca78bdefeb3e
|
3 |
+
size 29884059
|
app/load_docs.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
from load_vectors import load_from_web, load_and_split, create_and_add
|
3 |
+
|
4 |
+
docs = [
|
5 |
+
"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/85ec0278-bf2f-4392-94b9-c086717fa8f6_axa_urd2022_accessible_va.pdf"
|
6 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/d97a94ff-a848-474b-b802-c22afc8311cd_axa_half_year_2022_financial_report.pdf"
|
7 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/51954d53-c0cf-4f90-84f7-53ee27dbe4e6_axa_ri2021_va_accessible.pdf"
|
8 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/e3f52b5e-d4aa-4fc8-8bcd-f432df86e804_axa_urd_2021_en_accessible.pdf"
|
9 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/4f303cec-a12d-480b-accb-7b56f706f60e_axa-ri2020-en-accessible.pdf"
|
10 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/d6aef906-e41f-40c7-ac9c-29044e98939d_AXA_URD_2020_EN_accessible_b.pdf"
|
11 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F47b47783-ddd1-47c3-912f-bc6e318ebbb3_axa_half_year_2020_financial_report.pdf"
|
12 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ffd5a8bd8-9ef1-40eb-b953-c268c0ab4bf9_axa-ri2019-en-accessible.pdf"
|
13 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F90abd6c7-80c4-48ef-84bf-1d038670d9b7_axa-urd2019-en.pdf"
|
14 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F3ef6a9cc-6215-4e58-83b5-756774ef5b73_axa_half_year_2019_financial_report2.pdf"
|
15 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F0a5e0bd9-78f2-4ef8-b32c-1d3d35ddce80_axa-ri2018-en-accessible.pdf"
|
16 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F913d1869-3d11-4eb2-b013-4caedb747fab_axa-ddr2018b-en.pdf"
|
17 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F476f79c9-c0c7-4ce3-88ed-4f99b3d22259_axa_half_year_2018_financial_report.pdf"
|
18 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F01f6966b-c26c-4935-91dc-1b296511ba8c_axa_ri2017_gb_planche.pdf"
|
19 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fec440dc9-69df-41b5-a3af-5b5f4fc29670_axa_reference_document_2017c.pdf"
|
20 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F72c59a61-8124-4066-a86d-bece5f41ce53_axa_us_statutory_statements_fy17.pdf"
|
21 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9237d78f-c1ac-43ca-9623-d0382a5aaaec_axa_us_statutory_statements_3q17.pdf"
|
22 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ffdd639e0-2ea6-4c3f-8a42-8bca4359e858_axa_us_statutory_statements_2q17.pdf"
|
23 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F877e30a9-df72-480f-ac25-edcfcd4049c2_axa_us_statutory_statements_1q17.pdf"
|
24 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F6f3108fd-fabc-4dc6-a984-23eb0dca7a19_axa-ri2016-en_01.pdf"
|
25 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F268bab7a-2e78-4843-844a-fd3ad2d340bc_axa_reference_document_2016.pdf"
|
26 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fd2f66d05-e6ad-47a2-ab72-9bc727bd49c2_axa_half_year_2016_financial_report.pdf"
|
27 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F7a5f0af2-03c3-4a82-a077-46fdc52e5685_axa_us_statutory_statements_fy16.pdf"
|
28 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fdd643342-e975-473d-af54-c64491252a19_axa_us_statutory_statements_3q16.pdf"
|
29 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F53e10a7a-9348-40dc-935e-01fb0a1d0441_axa_us_statutory_statements_2q16.pdf"
|
30 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F8906bad6-14cb-4594-b7c0-029f8fc2172d_axa_us_statutory_statements_1q16.pdf"
|
31 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F2d8e525a-1161-453a-a14f-817f0f070f79_axa_activity_cr_report_2015_accessible.pdf"
|
32 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F73719a96-c3b1-456b-abaf-63b80c06968c_axa_reference_document_2015.pdf"
|
33 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fe2936c1a-65f0-40db-b34b-bef9c27e91c0_axa_2015_half_year_financial_report.pdf"
|
34 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fdaac2a30-a3b8-4839-9331-041805836a6f_axa_us_statutory_statements_fy15.pdf"
|
35 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F61a6c98a-08fb-4cb1-b6c0-4d1ef0f72aa9_axa_us_statutory_statements_3q15.pdf"
|
36 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fe0689ffc-5aec-4388-a10e-26d1d1a7eb9a_axa_us_statutory_statements_2q15.pdf"
|
37 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fbfa8ef5b-6533-4773-8502-5170a51735c9_axa_us_statutory_statements_1q15.pdf"
|
38 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fbbb94857-f5d4-4afd-81d0-e85666883936_axa_annual+financial+report_2014.pdf"
|
39 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fb826839c-76c9-48c7-b8c1-9eda7fe3b032_axa_activity_csr_report_2014_va_b.pdf"
|
40 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fac63e0f9-60ba-47c2-9e23-f1d25731c7ee_axa_2014_half_year_financial_report.pdf"
|
41 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fb7db2a55-8eb6-4131-bc03-698e4bc756d6_axa_us_statutory_statements_fy2014.pdf"
|
42 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F417b48df-c585-4cb6-9d10-719d81228756_axa_us_statutory_statements_3q14.pdf"
|
43 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F4586d978-6fb8-4c44-b934-e15c14143b6d_axa_us_statutory_statements_2q14.pdf"
|
44 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F29cc016e-aff9-49c5-bb04-d55598aab844_axa_us_statutory_statements_1q14.pdf"
|
45 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F25fee379-c187-40e7-bf3a-5fe1423cec0f_axa_annual+financial+report_2013.pdf"
|
46 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F37614ed4-1fe0-483e-a0eb-0acefdedd065_axa_2013_half_year_financial_report.pdf"
|
47 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Febb51afc-af0e-4aff-9494-5b852b3233e5_axa_us_statutory_statements_fy2013.pdf"
|
48 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fdd4cd68e-710e-4e00-ba96-c7560d738a43_axa_us_statutory_statements_3q13.pdf"
|
49 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Feab93a81-859a-487c-941c-11e4ce08d5f0_axa_us_statutory_statements_2q13.pdf"
|
50 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F879d09d7-8ff7-4c43-9a24-7ee44ee55404_axa_us_statutory_statements_1q13.pdf"
|
51 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9224097f-d703-4efd-8050-6553ef4336f8_axa_annual+financial+report_2012b.pdf"
|
52 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fcb9d1279-948a-4238-ab8f-754e9e10f2a5_axa_activity_csr_report_2012b_va.pdf"
|
53 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fbed64ef2-5078-425a-a616-ffb1947e0b65_axa_2012_half_year_financial_report.pdf"
|
54 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fa148165a-b818-4ea1-b7ee-7949cc86ff9a_axa_us_statutory_statements_fy2012.pdf"
|
55 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F601ed5e8-189d-4e59-b0d4-d1c1eedb2ffe_axa_us_statutory_statements_3q12.pdf"
|
56 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9637b674-c740-4115-9c90-3a8827516cc0_axa_us_statutory_statements_2q12.pdf"
|
57 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F319a5964-ea51-4d51-96c8-cf6838047b72_axa_us_statutory_statements_1q12.pdf"
|
58 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F0b75d1fe-4b11-4462-9883-4e3bc7532bf4_axa_annual+financial+report_2011.pdf"
|
59 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F17e098ab-3335-4ee1-ade7-058517a952c4_axa_activity_csr_report_2011_vab.pdf"
|
60 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F03996908-7e75-465e-8082-b44f02da326a_axa_us_statutory_statements_fy2011.pdf"
|
61 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fea813e84-7d08-4cf2-bea1-3a01fd4bdf62_axa_us_statutory_statements_3q11.pdf"
|
62 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fda28b496-275b-451d-bffd-108714eb2c39_axa_us_statutory_statements_2q11.pdf"
|
63 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fd6aa2b39-896e-47cf-9882-9985c8d44276_axa_us_statutory_statements_1q11.pdf"
|
64 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fc76f47f4-0917-4fb1-b1ae-78e2a4fbcef5_axa_annual+financial+report_2010c+%281%29.pdf"
|
65 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F68c2771e-5ed8-41d9-bb59-f37f6403b4bf_axa_activity_csr_report_2010_vac.pdf"
|
66 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9b7812a1-a1a2-4e17-9bf2-88c11aac4e08_axa_2010_half_year_financial_report.pdf.pdf"
|
67 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F3f4cc3bd-6823-4ccf-a918-f0c9d9063c2a_axa_us_statutory_statements_fy2010.pdf"
|
68 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F5a8a399f-9a0a-4475-8fbd-5bc0ca1dffe6_axa_us_statutory_statements_3q10.pdf"
|
69 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F67ba6c6d-7063-41d4-ad4e-75d86b15da43_axa_us_statutory_statements_1q10.pdf"
|
70 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fa151a532-da4f-4d12-8b3b-9867df4f9724_axa_annual+financial+report_2009.pdf"
|
71 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F5f89c4dd-d935-47fe-ac69-23fada9bfc96_axa_2009_half_year_financial_report.pdf"
|
72 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ff322c77a-e2a2-4cd7-88a0-edd8ad4cd021_axa_annual+financial+report_2008.pdf"
|
73 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fb7f88f05-053a-460b-aa4d-6163d3644cfc_axa_activity_csr_report_2008_vad.pdf"
|
74 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ff657a419-e066-485a-a58e-1d2870a6a035_axa_2008_half_year_financial_report.pdf"
|
75 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F820b669d-b3b5-4c14-986d-2223e2bcbcfb_axa_annual+financial+report_2007.pdf"
|
76 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F2741b55e-9349-47ef-9704-3cbca0853b76_axa_activity_csr_report_2007.pdf"
|
77 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F42159571-a3f1-4d36-b4b9-a5493fcc95e3_axa_2007_half_year_financial_report.pdf"
|
78 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F40f9da2a-1bcb-4e5e-9380-18f64b3ce86e_axa_annual+financial+report_2006b.pdf"
|
79 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fa6a14e0c-62cd-4812-a2d0-3a0aae8c862d_axa_activity_csr_report_2006b.pdf"
|
80 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Faf242b66-1308-4331-829f-fa91bd0db43e_axa_annual+financial+report_2005.pdf"
|
81 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F6b3313d1-3b72-4f28-bc7b-f445b9b3190c_axa_activity_csr_report_2005.pdf"
|
82 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F048b0d90-b28f-4fc3-bc30-b02cf8e0d6fc_axa_annual+financial+report_2004_ci.pdf"
|
83 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F02acbd05-712f-4b73-93f0-dffa37e2faa2_axa_annual+financial+report_2004_ci.pdf"
|
84 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fcf0b84a5-6da9-499d-985f-530559940494_axa_activity_csr_report_2004.pdf"
|
85 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fafa397b5-d613-40f3-a28f-81bde0d461e2_axa_annual+financial+report_2003.pdf"
|
86 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F2a31ebb9-ba04-4998-982e-9dd336abca1f_axa_annual+financial+report_2002.pdf"
|
87 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F51e5f017-954b-4f81-84f9-15a086bf1e33_axa_annual+financial+report_2002_ci01.pdf"
|
88 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F05fea38c-c626-4aaf-9ead-10e9c8f849c1_axa_annual+financial+report_2002_ci02.pdf"
|
89 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F3e41d00d-42b3-4bfd-babc-8b9f76b73d95_axa_activity_csr_report_2002.pdf"
|
90 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F4f2676f4-d36c-4d2e-b088-ef26878ff28b_axa_annual+financial+report_2001.pdf"
|
91 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ffdfa0941-6fb5-4ce8-9f42-3b0152e72ce2_axa_activity_csr_report_2001.pdf"
|
92 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F98922150-f1c5-4df4-9006-a8ef17a514cd_axa_annual+financial+report_2000.pdf"
|
93 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F1a645a94-1c56-43be-9a5a-94495e902a23_axa_activity_csr_report_2000.pdf"
|
94 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F51c109ca-2bba-45b3-a03b-78fdd16faeca_axa_annual+financial+report_1999.pdf"
|
95 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F21cdedc6-c082-4ae6-abb3-4c57f0cf9dd8_axa_annual+financial+report_1998.pdf"
|
96 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fd3132d9d-b656-470d-ba4f-fe8d51586e4b_axa_activity_csr_report_1998.pdf"
|
97 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F746d88d3-a4f7-4126-b539-a5da353f53d7_axa_annual+financial+report_1997.pdf"
|
98 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F97097956-6cd5-4fb4-a6ea-9aeb32fd9023_axa_activity_csr_report_1997.pdf"
|
99 |
+
]
|
100 |
+
|
101 |
+
|
102 |
+
docs_tarifs= [
|
103 |
+
"https://www.axa.de/site/axa-de/get/documents_E1805589786/axade/medien/privatkunden/fahrzeugversicherungen/kfz-versicherung/start-and-drive/start-and-drive-versicherungsbedingungen.pdf",
|
104 |
+
"https://www.axa.de/site/axa-de/get/documents_E-298610932/axade/medien/privatkunden/haftpflicht-und-recht/rechtsschutz/versicherungsbedingungen-roland-rechtsschutz.pdf",
|
105 |
+
"https://www.axa.de/site/axa-de/get/documents_E101690225/axade/medien/privatkunden/haftpflicht-und-recht/private%20haftpflichtversicherung/privathaftpflicht-versicherungsbedingungen-leistungspaket-S-5-mio.pdf",
|
106 |
+
"https://www.axa.de/site/axa-de/get/documents_E-1067805129/axade/medien/privatkunden/haftpflicht-und-recht/private%20haftpflichtversicherung/privathaftpflicht-versicherungsbedingungen-leistungspaket-S-10-mio.pdf",
|
107 |
+
"https://www.axa.de/site/axa-de/get/documents_E1026401604/axade/medien/privatkunden/haftpflicht-und-recht/private%20haftpflichtversicherung/privathaftpflicht-versicherungsbedingungen-leistungspaket-M.pdf",
|
108 |
+
"https://www.axa.de/site/axa-de/get/documents_E1450059874/axade/medien/privatkunden/haftpflicht-und-recht/private%20haftpflichtversicherung/privathaftpflicht-versicherungsbedingungen-leistungspaket-L.pdf",
|
109 |
+
"https://www.axa.de/site/axa-de/get/documents_E1636759799/axade/medien/privatkunden/haus-und-wohnen/hausratversicherung/hausrat-versicherungsbedingungen-S.pdf",
|
110 |
+
"https://www.axa.de/site/axa-de/get/documents_E1147682774/axade/medien/privatkunden/haus-und-wohnen/hausratversicherung/hausrat-versicherungsbedingungen-M-20%25.pdf",
|
111 |
+
"https://www.axa.de/site/axa-de/get/documents_E1642308493/axade/medien/privatkunden/haus-und-wohnen/hausratversicherung/hausrat-versicherungsbedingungen-M-40%25.pdf",
|
112 |
+
"https://www.axa.de/site/axa-de/get/documents_E1883536226/axade/medien/privatkunden/haus-und-wohnen/hausratversicherung/hausrat-versicherungsbedingungen-L.pdf",
|
113 |
+
]
|
114 |
+
|
115 |
+
docs_list = [
|
116 |
+
"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/85ec0278-bf2f-4392-94b9-c086717fa8f6_axa_urd2022_accessible_va.pdf"
|
117 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/e3f52b5e-d4aa-4fc8-8bcd-f432df86e804_axa_urd_2021_en_accessible.pdf"
|
118 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/d6aef906-e41f-40c7-ac9c-29044e98939d_AXA_URD_2020_EN_accessible_b.pdf"
|
119 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ffd5a8bd8-9ef1-40eb-b953-c268c0ab4bf9_axa-ri2019-en-accessible.pdf"
|
120 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F0a5e0bd9-78f2-4ef8-b32c-1d3d35ddce80_axa-ri2018-en-accessible.pdf"
|
121 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F01f6966b-c26c-4935-91dc-1b296511ba8c_axa_ri2017_gb_planche.pdf"
|
122 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F6f3108fd-fabc-4dc6-a984-23eb0dca7a19_axa-ri2016-en_01.pdf"
|
123 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fe2936c1a-65f0-40db-b34b-bef9c27e91c0_axa_2015_half_year_financial_report.pdf"
|
124 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fbbb94857-f5d4-4afd-81d0-e85666883936_axa_annual+financial+report_2014.pdf"
|
125 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F25fee379-c187-40e7-bf3a-5fe1423cec0f_axa_annual+financial+report_2013.pdf"
|
126 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9224097f-d703-4efd-8050-6553ef4336f8_axa_annual+financial+report_2012b.pdf"
|
127 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F0b75d1fe-4b11-4462-9883-4e3bc7532bf4_axa_annual+financial+report_2011.pdf"
|
128 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fc76f47f4-0917-4fb1-b1ae-78e2a4fbcef5_axa_annual+financial+report_2010c+%281%29.pdf"
|
129 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fa151a532-da4f-4d12-8b3b-9867df4f9724_axa_annual+financial+report_2009.pdf"
|
130 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ff322c77a-e2a2-4cd7-88a0-edd8ad4cd021_axa_annual+financial+report_2008.pdf"
|
131 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F820b669d-b3b5-4c14-986d-2223e2bcbcfb_axa_annual+financial+report_2007.pdf"
|
132 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F40f9da2a-1bcb-4e5e-9380-18f64b3ce86e_axa_annual+financial+report_2006b.pdf"
|
133 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Faf242b66-1308-4331-829f-fa91bd0db43e_axa_annual+financial+report_2005.pdf"
|
134 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F02acbd05-712f-4b73-93f0-dffa37e2faa2_axa_annual+financial+report_2004_ci.pdf"
|
135 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fafa397b5-d613-40f3-a28f-81bde0d461e2_axa_annual+financial+report_2003.pdf"
|
136 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F2a31ebb9-ba04-4998-982e-9dd336abca1f_axa_annual+financial+report_2002.pdf"
|
137 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F4f2676f4-d36c-4d2e-b088-ef26878ff28b_axa_annual+financial+report_2001.pdf"
|
138 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F98922150-f1c5-4df4-9006-a8ef17a514cd_axa_annual+financial+report_2000.pdf"
|
139 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F51c109ca-2bba-45b3-a03b-78fdd16faeca_axa_annual+financial+report_1999.pdf"
|
140 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F21cdedc6-c082-4ae6-abb3-4c57f0cf9dd8_axa_annual+financial+report_1998.pdf"
|
141 |
+
,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F746d88d3-a4f7-4126-b539-a5da353f53d7_axa_annual+financial+report_1997.pdf"
|
142 |
+
]
|
143 |
+
|
144 |
+
|
145 |
+
docs = load_from_web(docs_tarifs)
|
146 |
+
sub_docs = load_and_split(docs, chunk_size=700)
|
147 |
+
|
148 |
+
# %%
|
149 |
+
create_and_add("axa_terms", sub_docs, "hkunlp/instructor-large")
|
150 |
+
|
151 |
+
docs = load_from_web(docs_list)
|
152 |
+
sub_docs = load_and_split(docs)
|
153 |
+
|
154 |
+
# %%
|
155 |
+
create_and_add("axa_gpt", sub_docs, "hkunlp/instructor-large")
|
app/load_model.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
# git clone https://huggingface.co/nyanko7/LLaMA-7B
|
3 |
+
# python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu117/torch2.00/index.html
|
4 |
+
# apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
|
5 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
6 |
+
from langchain.embeddings import LlamaCppEmbeddings, HuggingFaceInstructEmbeddings, OpenAIEmbeddings
|
7 |
+
from langchain.llms import LlamaCpp, HuggingFacePipeline
|
8 |
+
from langchain.vectorstores import Chroma
|
9 |
+
from transformers import pipeline
|
10 |
+
import torch
|
11 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
12 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
13 |
+
import streamlit as st
|
14 |
+
import cloudpickle
|
15 |
+
import os
|
16 |
+
from langchain.chains import RetrievalQA
|
17 |
+
from langchain.indexes import VectorstoreIndexCreator
|
18 |
+
from langchain.llms import OpenAI
|
19 |
+
|
20 |
+
from chromadb.config import Settings
|
21 |
+
import chromadb
|
22 |
+
|
23 |
+
import pathlib
|
24 |
+
|
25 |
+
current_path = str( pathlib.Path(__file__).parent.resolve() )
|
26 |
+
print(current_path)
|
27 |
+
persist_directory = current_path + "/VectorStore"
|
28 |
+
|
29 |
+
# %%
|
30 |
+
os.environ["OPENAI_API_KEY"] = "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"
|
31 |
+
llm =OpenAI(temperature=0.9)
|
32 |
+
llm
|
33 |
+
|
34 |
+
@st.cache_resource
|
35 |
+
def load_cpu_model():
|
36 |
+
"""Does not work atm, bc cpu model is not persisted"""
|
37 |
+
model_path= "./llama.cpp/models/LLaMA-7B/ggml-model-q4_0.bin"
|
38 |
+
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
|
39 |
+
llm = LlamaCpp(
|
40 |
+
model_path=model_path,
|
41 |
+
n_ctx=6000,
|
42 |
+
n_threads=16,
|
43 |
+
temperature=0.6,
|
44 |
+
top_p=0.95
|
45 |
+
)
|
46 |
+
|
47 |
+
llama_embeddings = LlamaCppEmbeddings(model_path=model_path)
|
48 |
+
return llm
|
49 |
+
|
50 |
+
@st.cache_resource(max_entries =1)
|
51 |
+
def load_gpu_model(used_model = "chavinlo/gpt4-x-alpaca"):
|
52 |
+
torch.cuda.empty_cache()
|
53 |
+
tokenizer = LlamaTokenizer.from_pretrained(used_model)
|
54 |
+
|
55 |
+
if not torch.cuda.is_available():
|
56 |
+
device_map = {
|
57 |
+
"": "cpu"
|
58 |
+
}
|
59 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
|
60 |
+
torch_dtype=torch.float32
|
61 |
+
load_in_8bit=False
|
62 |
+
else:
|
63 |
+
device_map="auto"
|
64 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True) #atm no offload, bc device_map="auto"
|
65 |
+
|
66 |
+
|
67 |
+
base_model = LlamaForCausalLM.from_pretrained(
|
68 |
+
used_model,
|
69 |
+
device_map=device_map,
|
70 |
+
offload_folder=current_path + "/models_gpt/",
|
71 |
+
low_cpu_mem_usage=True,
|
72 |
+
quantization_config=quantization_config,
|
73 |
+
cache_dir = current_path + "/mymodels/"
|
74 |
+
)
|
75 |
+
pipe = pipeline(
|
76 |
+
"text-generation",
|
77 |
+
model=base_model,
|
78 |
+
tokenizer=tokenizer,
|
79 |
+
max_length=8000,
|
80 |
+
temperature=0.6,
|
81 |
+
top_p=0.95,
|
82 |
+
repetition_penalty=1.2
|
83 |
+
)
|
84 |
+
llm = HuggingFacePipeline(pipeline=pipe)
|
85 |
+
return llm
|
86 |
+
|
87 |
+
#@st.cache_resource
|
88 |
+
def load_openai_model():
|
89 |
+
return OpenAI(temperature=0.9)
|
90 |
+
|
91 |
+
@st.cache_resource
|
92 |
+
def load_openai_embedding():
|
93 |
+
return OpenAIEmbeddings()
|
94 |
+
|
95 |
+
@st.cache_resource
|
96 |
+
def load_embedding(model_name):
|
97 |
+
embeddings = HuggingFaceInstructEmbeddings(
|
98 |
+
query_instruction="Represent the query for retrieval: ",
|
99 |
+
model_name = model_name,
|
100 |
+
cache_folder=current_path + "/mymodels/"
|
101 |
+
)
|
102 |
+
return embeddings
|
103 |
+
|
104 |
+
def load_vectorstore(model_name, collection):
|
105 |
+
embeddings = load_embedding(model_name)
|
106 |
+
|
107 |
+
client_settings = Settings(
|
108 |
+
chroma_db_impl="duckdb+parquet",
|
109 |
+
persist_directory=persist_directory,
|
110 |
+
anonymized_telemetry=False
|
111 |
+
)
|
112 |
+
vectorstore = Chroma(
|
113 |
+
collection_name=collection,
|
114 |
+
embedding_function=embeddings,
|
115 |
+
client_settings=client_settings,
|
116 |
+
persist_directory=persist_directory,
|
117 |
+
)
|
118 |
+
return vectorstore
|
119 |
+
|
120 |
+
def add_document_to_vectorstore(vectorstore, docs):
|
121 |
+
pass
|
122 |
+
|
123 |
+
def create_chain(_llm, collection, model_name = "hkunlp/instructor-large"):
|
124 |
+
vectorstore = load_vectorstore(model_name, collection)
|
125 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
|
126 |
+
chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
|
127 |
+
return chain
|
128 |
+
# %%
|
app/load_vectors.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
import nltk
|
3 |
+
from langchain.indexes import VectorstoreIndexCreator
|
4 |
+
from langchain.text_splitter import CharacterTextSplitter, NLTKTextSplitter
|
5 |
+
from langchain.document_loaders import OnlinePDFLoader
|
6 |
+
from langchain.vectorstores import Chroma
|
7 |
+
from langchain.embeddings import LlamaCppEmbeddings, HuggingFaceInstructEmbeddings
|
8 |
+
from chromadb.config import Settings
|
9 |
+
import chromadb
|
10 |
+
from chromadb.utils import embedding_functions
|
11 |
+
from hashlib import sha256
|
12 |
+
import cloudpickle
|
13 |
+
import logging
|
14 |
+
import os
|
15 |
+
from load_model import load_embedding
|
16 |
+
import torch
|
17 |
+
import re
|
18 |
+
import pathlib
|
19 |
+
|
20 |
+
current_path = str( pathlib.Path(__file__).parent.resolve() )
|
21 |
+
|
22 |
+
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
|
23 |
+
nltk.download('punkt')
|
24 |
+
|
25 |
+
persist_directory = current_path + "/VectorStore"
|
26 |
+
logger = logging.getLogger()
|
27 |
+
|
28 |
+
|
29 |
+
# %%
|
30 |
+
|
31 |
+
def create_collection(collection_name, model_name, client):
|
32 |
+
"""Not used atm"""
|
33 |
+
if not torch.cuda.is_available():
|
34 |
+
device= "cpu"
|
35 |
+
else:
|
36 |
+
device= "cuda"
|
37 |
+
ef = embedding_functions.InstructorEmbeddingFunction(
|
38 |
+
model_name=model_name, device=device)
|
39 |
+
client.get_or_create_collection(collection_name, embedding_function=ef)
|
40 |
+
return True
|
41 |
+
|
42 |
+
def create_and_add(collection_name, sub_docs, model_name):
|
43 |
+
client_settings = chromadb.config.Settings(
|
44 |
+
chroma_db_impl="duckdb+parquet",
|
45 |
+
persist_directory=persist_directory,
|
46 |
+
anonymized_telemetry=False
|
47 |
+
)
|
48 |
+
|
49 |
+
client = chromadb.Client(client_settings)
|
50 |
+
collection_name = collection_name + "_" + re.sub('[^A-Za-z0-9]+', '', model_name)
|
51 |
+
|
52 |
+
embeddings = load_embedding(model_name)
|
53 |
+
logging.info(f"Adding documents to {collection_name}")
|
54 |
+
vectorstore = Chroma(
|
55 |
+
collection_name=collection_name,
|
56 |
+
embedding_function=embeddings,
|
57 |
+
client_settings=client_settings,
|
58 |
+
persist_directory=persist_directory,
|
59 |
+
)
|
60 |
+
vectorstore.add_documents(documents=sub_docs, embedding=embeddings)
|
61 |
+
vectorstore.persist()
|
62 |
+
|
63 |
+
# Test Vectorstore
|
64 |
+
vectorstore2 = Chroma(
|
65 |
+
collection_name=collection_name,
|
66 |
+
embedding_function=embeddings,
|
67 |
+
client_settings=client_settings,
|
68 |
+
persist_directory=persist_directory,
|
69 |
+
)
|
70 |
+
print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
|
71 |
+
|
72 |
+
return vectorstore
|
73 |
+
|
74 |
+
def load_from_web(urls, cache=True):
|
75 |
+
docs_list = urls
|
76 |
+
filename=f"./{sha256(str(urls).encode('utf-8')).hexdigest()}.pkl"
|
77 |
+
|
78 |
+
isFile = os.path.isfile(filename)
|
79 |
+
|
80 |
+
if cache and isFile:
|
81 |
+
logger.info("Using Cache")
|
82 |
+
pikd = open(filename, "rb")
|
83 |
+
docs = cloudpickle.load(pikd)
|
84 |
+
else:
|
85 |
+
loaders=[OnlinePDFLoader(pdf) for pdf in docs_list]
|
86 |
+
docs = []
|
87 |
+
for loader in loaders:
|
88 |
+
docs.extend(loader.load())
|
89 |
+
with open(filename, 'wb') as output:
|
90 |
+
cloudpickle.dump(docs, output)
|
91 |
+
|
92 |
+
#update metadata
|
93 |
+
i=0
|
94 |
+
for doc in docs:
|
95 |
+
doc.metadata = {'source': docs_list[i], 'url': docs_list[i], 'company':'AXA'}
|
96 |
+
i=i+1
|
97 |
+
return docs
|
98 |
+
|
99 |
+
def load_and_split(docs, chunk_size=700):
|
100 |
+
text_splitter = NLTKTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
|
101 |
+
sub_docs = text_splitter.split_documents(docs)
|
102 |
+
return sub_docs
|
app/requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/hwchase17/langchain.git
|
2 |
+
git+https://github.com/huggingface/transformers.git
|
3 |
+
git+https://github.com/chroma-core/chroma.git
|
4 |
+
accelerate
|
5 |
+
bitsandbytes
|
6 |
+
InstructorEmbedding
|
7 |
+
cloudpickle
|
8 |
+
streamlit
|
9 |
+
requests==2.20.1
|
10 |
+
latex2markdown
|
11 |
+
openai
|
12 |
+
unstructured
|
app/result.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5b61c0f601cb65f2779f18fdbe5bf47f88d61f23dfbe2afdafb64c951207da8
|
3 |
+
size 429
|
app/run.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
import load_model
|
3 |
+
import cloudpickle
|
4 |
+
|
5 |
+
# %%
|
6 |
+
# llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
|
7 |
+
llm= load_model.load_openai_model()
|
8 |
+
|
9 |
+
# %%
|
10 |
+
chain = load_model.create_chain(llm, collection="axa_terms_hkunlpinstructorlarge")
|
11 |
+
result = chain({"query": "What are AXAs green Goals?"})
|
12 |
+
print(result)
|
app/st_render_doc.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import utils as ut
|
3 |
+
import cloudpickle
|
4 |
+
|
5 |
+
filename="./result.pkl"
|
6 |
+
pikd = open(filename, "rb")
|
7 |
+
result = dict( cloudpickle.load(pikd) )
|
8 |
+
del pikd
|
9 |
+
ut.format_result_set(result)
|
app/utils.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import latex2markdown
|
3 |
+
from langchain.docstore.document import Document
|
4 |
+
|
5 |
+
def format_document(document: Document):
|
6 |
+
"""TODO: Implement a nice style"""
|
7 |
+
return document.dict()
|
8 |
+
|
9 |
+
def format_result_set(result):
|
10 |
+
st.write(latex2markdown.LaTeX2Markdown(result["result"]).to_markdown())
|
11 |
+
|
12 |
+
agree = st.checkbox('Show source documents')
|
13 |
+
source_documents = result["source_documents"]
|
14 |
+
if agree:
|
15 |
+
st.write('Source Documents:')
|
16 |
+
for document in source_documents:
|
17 |
+
st.write(format_document(document))
|
18 |
+
|
19 |
+
|
docker-compose.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: "3.9"
|
2 |
+
services:
|
3 |
+
streamlit_app:
|
4 |
+
build: .
|
5 |
+
tty: true
|
6 |
+
ports:
|
7 |
+
- 8080:8080
|
8 |
+
deploy:
|
9 |
+
resources:
|
10 |
+
reservations:
|
11 |
+
devices:
|
12 |
+
- capabilities: [gpu]
|
13 |
+
dev_app:
|
14 |
+
image: tensorflow/tensorflow:latest-gpu
|
15 |
+
tty: true
|
16 |
+
volumes:
|
17 |
+
- ./app:/app
|
18 |
+
- ./root:/root
|
19 |
+
deploy:
|
20 |
+
resources:
|
21 |
+
reservations:
|
22 |
+
devices:
|
23 |
+
- capabilities: [gpu]
|