Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Sleeping

devve1 commited on Jul 18

Commit

a04bced

•

1 Parent(s): 6effb6c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ import hydralit_components as hc
 from transformers import AutoTokenizer
 from qdrant_client import QdrantClient
 from optimum_encoder import OptimumEncoder
 from unstructured.partition.auto import partition
 from fastembed import SparseEmbedding, SparseTextEmbedding
 from unstructured.nlp.tokenize import download_nltk_packages
@@ -144,12 +145,12 @@ def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, den
 @st.cache_resource
 def load_models_and_documents():
     with st.spinner('Load models...'):
-        model_name = "LnL-AI/glm-4-9b-chat-gptq-4bit-qubitium-r1"
-        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
         llm = vllm.LLM(
-            model_name,
             tensor_parallel_size=1,
             max_model_len=32768,
             trust_remote_code=True,

 from transformers import AutoTokenizer
 from qdrant_client import QdrantClient
 from optimum_encoder import OptimumEncoder
+from huggingface_hub import snapshot_download
 from unstructured.partition.auto import partition
 from fastembed import SparseEmbedding, SparseTextEmbedding
 from unstructured.nlp.tokenize import download_nltk_packages
 @st.cache_resource
 def load_models_and_documents():
     with st.spinner('Load models...'):
+        model_path = snapshot_download("LnL-AI/glm-4-9b-chat-gptq-4bit-qubitium-r1")
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
         llm = vllm.LLM(
+            model_path,
             tensor_parallel_size=1,
             max_model_len=32768,
             trust_remote_code=True,