shoaibmoghal commited on
Commit
b76bab7
·
verified ·
1 Parent(s): 96eac61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -5,19 +5,24 @@ from PyPDF2 import PdfReader
5
  from docx import Document
6
  from huggingface_hub import hf_hub_download
7
 
8
- # ✅ Use a Smaller Quantized Model for Faster CPU Inference
9
  MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
10
- MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q4_K_M.gguf" # ✅ Using Q4_K_M instead of Q5_K_M
11
- CACHE_DIR = "/tmp/hf_cache" # ✅ Hugging Face cache to avoid re-downloading
12
 
13
- # ✅ Load Model from Hugging Face Cache
14
- MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR)
15
 
16
- print(f"✅ Model cached at {MODEL_PATH}")
 
 
 
 
 
17
 
18
  # ✅ Load Mistral 7B with Optimized Settings
19
- print(f"🔹 Loading Mistral 7B (Q4_K_M) from {MODEL_PATH} (This may take a while)")
20
- llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_gpu_layers=0) # ✅ Reduced context length & forced CPU
21
  print("✅ Model loaded successfully!")
22
 
23
  app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")
 
5
  from docx import Document
6
  from huggingface_hub import hf_hub_download
7
 
8
+ # ✅ Define model details
9
  MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
10
+ MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q3_K_M.gguf" # ✅ Use smaller Q3 model for faster CPU inference
11
+ CACHE_DIR = "/tmp/hf_cache"
12
 
13
+ # ✅ Check if model exists in cache before downloading
14
+ LOCAL_MODEL_PATH = os.path.join(CACHE_DIR, MODEL_FILE)
15
 
16
+ if not os.path.exists(LOCAL_MODEL_PATH):
17
+ print(f"🔹 Model not found locally. Downloading from Hugging Face...")
18
+ LOCAL_MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR)
19
+ print(f"✅ Model downloaded and cached at {LOCAL_MODEL_PATH}")
20
+ else:
21
+ print(f"✅ Model already cached at {LOCAL_MODEL_PATH}")
22
 
23
  # ✅ Load Mistral 7B with Optimized Settings
24
+ print(f"🔹 Loading Mistral 7B (Q3_K_M) from {LOCAL_MODEL_PATH} (This may take a while)")
25
+ llm = Llama(model_path=LOCAL_MODEL_PATH, n_ctx=2048, n_gpu_layers=0) # ✅ Reduce context length for speed
26
  print("✅ Model loaded successfully!")
27
 
28
  app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")