Spaces:

shoaibmoghal
/

resume-parser-fastapi

Sleeping

shoaibmoghal commited on 23 days ago

Commit

b76bab7

verified ·

1 Parent(s): 96eac61

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,19 +5,24 @@ from PyPDF2 import PdfReader
 from docx import Document
 from huggingface_hub import hf_hub_download
-# ✅ Use a Smaller Quantized Model for Faster CPU Inference
 MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
-MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q4_K_M.gguf"  # ✅ Using Q4_K_M instead of Q5_K_M
-CACHE_DIR = "/tmp/hf_cache"  # ✅ Hugging Face cache to avoid re-downloading
-# ✅ Load Model from Hugging Face Cache
-MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR)
-print(f"✅ Model cached at {MODEL_PATH}")
 # ✅ Load Mistral 7B with Optimized Settings
-print(f"🔹 Loading Mistral 7B (Q4_K_M) from {MODEL_PATH} (This may take a while)")
-llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_gpu_layers=0)  # ✅ Reduced context length & forced CPU
 print("✅ Model loaded successfully!")
 app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")

 from docx import Document
 from huggingface_hub import hf_hub_download
+# ✅ Define model details
 MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
+MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q3_K_M.gguf"  # ✅ Use smaller Q3 model for faster CPU inference
+CACHE_DIR = "/tmp/hf_cache"
+# ✅ Check if model exists in cache before downloading
+LOCAL_MODEL_PATH = os.path.join(CACHE_DIR, MODEL_FILE)
+if not os.path.exists(LOCAL_MODEL_PATH):
+    print(f"🔹 Model not found locally. Downloading from Hugging Face...")
+    LOCAL_MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR)
+    print(f"✅ Model downloaded and cached at {LOCAL_MODEL_PATH}")
+else:
+    print(f"✅ Model already cached at {LOCAL_MODEL_PATH}")
 # ✅ Load Mistral 7B with Optimized Settings
+print(f"🔹 Loading Mistral 7B (Q3_K_M) from {LOCAL_MODEL_PATH} (This may take a while)")
+llm = Llama(model_path=LOCAL_MODEL_PATH, n_ctx=2048, n_gpu_layers=0)  # ✅ Reduce context length for speed
 print("✅ Model loaded successfully!")
 app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")