devve1 commited on
Commit
a04bced
1 Parent(s): 6effb6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -17,6 +17,7 @@ import hydralit_components as hc
17
  from transformers import AutoTokenizer
18
  from qdrant_client import QdrantClient
19
  from optimum_encoder import OptimumEncoder
 
20
  from unstructured.partition.auto import partition
21
  from fastembed import SparseEmbedding, SparseTextEmbedding
22
  from unstructured.nlp.tokenize import download_nltk_packages
@@ -144,12 +145,12 @@ def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, den
144
  @st.cache_resource
145
  def load_models_and_documents():
146
  with st.spinner('Load models...'):
147
- model_name = "LnL-AI/glm-4-9b-chat-gptq-4bit-qubitium-r1"
148
 
149
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
150
 
151
  llm = vllm.LLM(
152
- model_name,
153
  tensor_parallel_size=1,
154
  max_model_len=32768,
155
  trust_remote_code=True,
 
17
  from transformers import AutoTokenizer
18
  from qdrant_client import QdrantClient
19
  from optimum_encoder import OptimumEncoder
20
+ from huggingface_hub import snapshot_download
21
  from unstructured.partition.auto import partition
22
  from fastembed import SparseEmbedding, SparseTextEmbedding
23
  from unstructured.nlp.tokenize import download_nltk_packages
 
145
  @st.cache_resource
146
  def load_models_and_documents():
147
  with st.spinner('Load models...'):
148
+ model_path = snapshot_download("LnL-AI/glm-4-9b-chat-gptq-4bit-qubitium-r1")
149
 
150
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
151
 
152
  llm = vllm.LLM(
153
+ model_path,
154
  tensor_parallel_size=1,
155
  max_model_len=32768,
156
  trust_remote_code=True,