Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,7 @@ import hydralit_components as hc
|
|
17 |
from transformers import AutoTokenizer
|
18 |
from qdrant_client import QdrantClient
|
19 |
from optimum_encoder import OptimumEncoder
|
|
|
20 |
from unstructured.partition.auto import partition
|
21 |
from fastembed import SparseEmbedding, SparseTextEmbedding
|
22 |
from unstructured.nlp.tokenize import download_nltk_packages
|
@@ -144,12 +145,12 @@ def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, den
|
|
144 |
@st.cache_resource
|
145 |
def load_models_and_documents():
|
146 |
with st.spinner('Load models...'):
|
147 |
-
|
148 |
|
149 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
150 |
|
151 |
llm = vllm.LLM(
|
152 |
-
|
153 |
tensor_parallel_size=1,
|
154 |
max_model_len=32768,
|
155 |
trust_remote_code=True,
|
|
|
17 |
from transformers import AutoTokenizer
|
18 |
from qdrant_client import QdrantClient
|
19 |
from optimum_encoder import OptimumEncoder
|
20 |
+
from huggingface_hub import snapshot_download
|
21 |
from unstructured.partition.auto import partition
|
22 |
from fastembed import SparseEmbedding, SparseTextEmbedding
|
23 |
from unstructured.nlp.tokenize import download_nltk_packages
|
|
|
145 |
@st.cache_resource
|
146 |
def load_models_and_documents():
|
147 |
with st.spinner('Load models...'):
|
148 |
+
model_path = snapshot_download("LnL-AI/glm-4-9b-chat-gptq-4bit-qubitium-r1")
|
149 |
|
150 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
151 |
|
152 |
llm = vllm.LLM(
|
153 |
+
model_path,
|
154 |
tensor_parallel_size=1,
|
155 |
max_model_len=32768,
|
156 |
trust_remote_code=True,
|