Spaces:
Running
Running
Dev Paragiri
commited on
Commit
·
39ea97d
1
Parent(s):
913f46e
hushh jobs v1
Browse files- .gitattributes +0 -35
- .gitignore +133 -0
- README.md +4 -11
- candidate.py +24 -0
- embeddings.py +11 -0
- llm_config.py +20 -0
- main.py +124 -0
- rank.py +53 -0
- requirements.txt +123 -0
- shortlisted.csv +2 -0
- template.py +25 -0
.gitattributes
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
*.egg-info/
|
24 |
+
.installed.cfg
|
25 |
+
*.egg
|
26 |
+
|
27 |
+
# PyInstaller
|
28 |
+
# Usually these files are written by a python script from a template
|
29 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
30 |
+
*.manifest
|
31 |
+
*.spec
|
32 |
+
|
33 |
+
# Installer logs
|
34 |
+
pip-log.txt
|
35 |
+
pip-delete-this-directory.txt
|
36 |
+
|
37 |
+
# Unit test / coverage reports
|
38 |
+
htmlcov/
|
39 |
+
.tox/
|
40 |
+
.coverage
|
41 |
+
.coverage.*
|
42 |
+
.cache
|
43 |
+
nosetests.xml
|
44 |
+
coverage.xml
|
45 |
+
*.cover
|
46 |
+
.hypothesis/
|
47 |
+
.pytest_cache/
|
48 |
+
|
49 |
+
# Translations
|
50 |
+
*.mo
|
51 |
+
*.pot
|
52 |
+
|
53 |
+
# Django stuff:
|
54 |
+
*.log
|
55 |
+
local_settings.py
|
56 |
+
db.sqlite3
|
57 |
+
db.sqlite3-journal
|
58 |
+
|
59 |
+
# Flask stuff:
|
60 |
+
instance/
|
61 |
+
.webassets-cache
|
62 |
+
|
63 |
+
# Scrapy stuff:
|
64 |
+
.scrapy
|
65 |
+
|
66 |
+
# Sphinx documentation
|
67 |
+
docs/_build/
|
68 |
+
|
69 |
+
# PyBuilder
|
70 |
+
target/
|
71 |
+
|
72 |
+
# Jupyter Notebook
|
73 |
+
.ipynb_checkpoints
|
74 |
+
|
75 |
+
# IPython
|
76 |
+
profile_default/
|
77 |
+
ipython_config.py
|
78 |
+
|
79 |
+
# pyenv
|
80 |
+
.python-version
|
81 |
+
|
82 |
+
# pipenv
|
83 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
84 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
85 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
86 |
+
# install all needed dependencies.
|
87 |
+
#Pipfile.lock
|
88 |
+
|
89 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
90 |
+
__pypackages__/
|
91 |
+
|
92 |
+
# Celery stuff
|
93 |
+
celerybeat-schedule
|
94 |
+
celerybeat.pid
|
95 |
+
|
96 |
+
# SageMath parsed files
|
97 |
+
*.sage.py
|
98 |
+
|
99 |
+
# Environments
|
100 |
+
.env
|
101 |
+
.venv
|
102 |
+
env/
|
103 |
+
venv/
|
104 |
+
ENV/
|
105 |
+
env.bak/
|
106 |
+
venv.bak/
|
107 |
+
|
108 |
+
# Spyder project settings
|
109 |
+
.spyderproject
|
110 |
+
.spyproject
|
111 |
+
|
112 |
+
# Rope project settings
|
113 |
+
.ropeproject
|
114 |
+
|
115 |
+
# mkdocs documentation
|
116 |
+
/site
|
117 |
+
|
118 |
+
# mypy
|
119 |
+
.mypy_cache/
|
120 |
+
.dmypy.json
|
121 |
+
dmypy.json
|
122 |
+
|
123 |
+
# Pyre type checker
|
124 |
+
.pyre/
|
125 |
+
|
126 |
+
# pytype static type analyzer
|
127 |
+
.pytype/
|
128 |
+
|
129 |
+
# Cython debug symbols
|
130 |
+
cython_debug/
|
131 |
+
|
132 |
+
# Ignore VSCode settings
|
133 |
+
.vscode/
|
README.md
CHANGED
@@ -1,12 +1,5 @@
|
|
1 |
-
|
2 |
-
title: Hushh Jobs V1
|
3 |
-
emoji: 🚀
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: blue
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.31.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
|
12 |
-
|
|
|
|
|
|
1 |
+
# hushh-jobs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
The Resume Shortlisting Tool is a project designed to streamline the end to end hiring process.
|
4 |
+
|
5 |
+
It takes multiple resumes and a job descrition as input and shortlists resumes based on the job descrition. It returns a downloadable csv file with structured details of shortlisted candidates.
|
candidate.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from langchain_core.pydantic_v1 import BaseModel, Field
|
3 |
+
|
4 |
+
|
5 |
+
class Candidate(BaseModel):
|
6 |
+
name: str = Field(description="First name and last name of the candidate.")
|
7 |
+
email: str = Field(description="Email address of the candidate.")
|
8 |
+
phone: str = Field(description="Contact number with country code of the candidate.")
|
9 |
+
location: str = Field(description="City and state where the candidate resides.")
|
10 |
+
degree: List[str] = Field(description="List of the candidate's college degrees.")
|
11 |
+
college: List[str] = Field(description="List of all the colleges candidate went to")
|
12 |
+
skills: List[str] = Field(description="List of technical skills of the user.")
|
13 |
+
companies: List[str] = Field(
|
14 |
+
description="List only the name of the companies the user has worked at."
|
15 |
+
)
|
16 |
+
roles: List[str] = Field(
|
17 |
+
description="List all the job roles of the user at previous companies."
|
18 |
+
)
|
19 |
+
degree_year: int = Field(
|
20 |
+
description="The year in which candidate completed their degree."
|
21 |
+
)
|
22 |
+
experience: float = Field(
|
23 |
+
description="Number of years of professional experience of the candidate"
|
24 |
+
)
|
embeddings.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import os
|
3 |
+
os.environ["HF_TOKEN"]
|
4 |
+
model_id = "sentence-transformers/all-MiniLM-L6-v2"
|
5 |
+
hf_token = os.environ.get('HF_TOKEN')
|
6 |
+
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
|
7 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
8 |
+
|
9 |
+
def text_embedding(texts):
|
10 |
+
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
|
11 |
+
return response.json()
|
llm_config.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.output_parsers import PydanticOutputParser
|
2 |
+
from langchain.prompts import PromptTemplate
|
3 |
+
from langchain.llms import OpenAI
|
4 |
+
from candidate import Candidate
|
5 |
+
import os
|
6 |
+
|
7 |
+
|
8 |
+
def instantiate_llm():
|
9 |
+
model_name = "gpt-3.5-turbo-instruct"
|
10 |
+
temperature = 0.0
|
11 |
+
model = OpenAI(model_name=model_name, temperature=temperature, max_tokens=600)
|
12 |
+
parser = PydanticOutputParser(pydantic_object=Candidate)
|
13 |
+
|
14 |
+
prompt = PromptTemplate(
|
15 |
+
template="Answer the user query.\n{format_instructions}\n{query}\n",
|
16 |
+
input_variables=["query"],
|
17 |
+
partial_variables={"format_instructions": parser.get_format_instructions()},
|
18 |
+
)
|
19 |
+
|
20 |
+
return model, prompt
|
main.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import csv
|
3 |
+
import json
|
4 |
+
import streamlit as st
|
5 |
+
from PyPDF2 import PdfReader
|
6 |
+
from llm_config import instantiate_llm
|
7 |
+
from langchain.callbacks import get_openai_callback
|
8 |
+
from langchain.llms import OpenAI
|
9 |
+
from langchain.chains import LLMChain
|
10 |
+
from langchain.prompts import PromptTemplate
|
11 |
+
from langchain.output_parsers import PydanticOutputParser
|
12 |
+
from template import prompt_template
|
13 |
+
import pandas as pd
|
14 |
+
from candidate import Candidate
|
15 |
+
import logging
|
16 |
+
from rank import extract_and_rank
|
17 |
+
|
18 |
+
|
19 |
+
def extract_resume(resume):
|
20 |
+
reader = PdfReader(resume)
|
21 |
+
return "".join(page.extract_text() for page in reader.pages)
|
22 |
+
|
23 |
+
|
24 |
+
def main():
|
25 |
+
st.set_page_config(layout="wide", page_title="Hushh Jobs")
|
26 |
+
st.header("Hushh Jobs")
|
27 |
+
model, prompt = instantiate_llm()
|
28 |
+
col1, col2, col3 = st.columns(3)
|
29 |
+
|
30 |
+
with col1:
|
31 |
+
resumes = st.file_uploader(
|
32 |
+
"Upload resumes here!", accept_multiple_files=True, type="pdf"
|
33 |
+
)
|
34 |
+
|
35 |
+
|
36 |
+
with col2:
|
37 |
+
no_of_resumes = st.number_input("Enter the number of resumes you want to shortlist",step=1)
|
38 |
+
|
39 |
+
with col3:
|
40 |
+
job_description = st.text_area("Enter the job description here!", height=250)
|
41 |
+
rank_btn = st.button("Rank")
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
if resumes and rank_btn:
|
46 |
+
|
47 |
+
if len(job_description) < 1:
|
48 |
+
st.warning(
|
49 |
+
"Invalid or Empty job description! Please make sure your job description has atleast 25 characters!"
|
50 |
+
)
|
51 |
+
|
52 |
+
else:
|
53 |
+
dict_object = {}
|
54 |
+
rows = []
|
55 |
+
ranked_resumes, embeddings_bank, text_bank = extract_and_rank(
|
56 |
+
resumes, job_description
|
57 |
+
)
|
58 |
+
|
59 |
+
no_of_resumes=int(no_of_resumes)
|
60 |
+
for selected_resume in ranked_resumes[:no_of_resumes]:
|
61 |
+
resume_text = text_bank[selected_resume[0]]
|
62 |
+
|
63 |
+
doc_query = f"Return only a json based on this candidate's resume information: {resume_text}"
|
64 |
+
input = prompt.format_prompt(query=doc_query)
|
65 |
+
|
66 |
+
#using PydanticOutputParser for structuring language model responses into a coherent, JSON-like format.
|
67 |
+
parser = PydanticOutputParser(pydantic_object=Candidate)
|
68 |
+
|
69 |
+
with get_openai_callback() as cb:
|
70 |
+
try:
|
71 |
+
result = model(input.to_string())
|
72 |
+
st.success(result)
|
73 |
+
class_object= parser.parse(result) #using the above defined pydantic output parser to structure the response in a json-format
|
74 |
+
dict_object=class_object.__dict__
|
75 |
+
#dict_object = json.loads(result)
|
76 |
+
rows.append(dict_object)
|
77 |
+
except Exception as error:
|
78 |
+
print(error)
|
79 |
+
field_names = [
|
80 |
+
"name",
|
81 |
+
"email",
|
82 |
+
"phone",
|
83 |
+
"location",
|
84 |
+
"degree",
|
85 |
+
"college",
|
86 |
+
"skills",
|
87 |
+
"companies",
|
88 |
+
"roles",
|
89 |
+
"degree_year",
|
90 |
+
"experience",
|
91 |
+
]
|
92 |
+
user_csv = "shortlisted.csv"
|
93 |
+
write_csv(user_csv=user_csv, field_names=field_names, rows=rows)
|
94 |
+
df = pd.read_csv(user_csv)
|
95 |
+
st.dataframe(df)
|
96 |
+
|
97 |
+
|
98 |
+
#def write_csv(user_csv, field_names, rows):
|
99 |
+
def write_csv(user_csv, field_names, rows):
|
100 |
+
with open(user_csv, "w") as csvfile:
|
101 |
+
writer = csv.DictWriter(csvfile, fieldnames=field_names)
|
102 |
+
writer.writeheader()
|
103 |
+
for row in rows:
|
104 |
+
writer.writerow(row)
|
105 |
+
|
106 |
+
|
107 |
+
def write_response(user_csv, response: str):
|
108 |
+
"""
|
109 |
+
Write a response from an agent to a Streamlit app.
|
110 |
+
|
111 |
+
Args:
|
112 |
+
response_dict: The response from the agent.
|
113 |
+
|
114 |
+
Returns:
|
115 |
+
None.
|
116 |
+
"""
|
117 |
+
|
118 |
+
df = pd.read_csv(user_csv)
|
119 |
+
data = eval(response)
|
120 |
+
st.dataframe(data=data, use_container_width=True)
|
121 |
+
|
122 |
+
|
123 |
+
if __name__ == "__main__":
|
124 |
+
main()
|
rank.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PyPDF2 import PdfReader
|
2 |
+
import streamlit as st
|
3 |
+
from embeddings import text_embedding
|
4 |
+
import scipy
|
5 |
+
|
6 |
+
|
7 |
+
def extract_and_rank(resumes, job_description):
|
8 |
+
out_embed_dict = {}
|
9 |
+
out_text_dict = {}
|
10 |
+
for resume in resumes:
|
11 |
+
reader = PdfReader(resume)
|
12 |
+
raw_text = "".join(page.extract_text() for page in reader.pages)
|
13 |
+
embedding = text_embedding(raw_text)
|
14 |
+
d1 = {resume.name: (embedding)}
|
15 |
+
d2 = {resume.name: raw_text}
|
16 |
+
out_embed_dict.update(d1)
|
17 |
+
out_text_dict.update(d2)
|
18 |
+
ranked_output = rankings(out_dict=out_embed_dict, query=job_description)
|
19 |
+
return ranked_output, out_embed_dict, out_text_dict
|
20 |
+
|
21 |
+
|
22 |
+
def get_sim(query_embedding, average_vec):
|
23 |
+
try:
|
24 |
+
sim = [(1 - scipy.spatial.distance.cosine(query_embedding, average_vec))]
|
25 |
+
return sim
|
26 |
+
except:
|
27 |
+
return [0]
|
28 |
+
|
29 |
+
|
30 |
+
def rankings(out_dict, query):
|
31 |
+
query_embedding = text_embedding(query)
|
32 |
+
rank = []
|
33 |
+
for k, v in out_dict.items():
|
34 |
+
rank.append((k, get_sim(query_embedding, v)))
|
35 |
+
rank = sorted(rank, key=lambda t: t[1], reverse=True)
|
36 |
+
return rank
|
37 |
+
|
38 |
+
|
39 |
+
# def data_clean(text):
|
40 |
+
# pattern = r'[^a-zA-Z0-9\s]'
|
41 |
+
# text = re.sub(pattern,'',' '.join(text))
|
42 |
+
# tokens = [token.strip() for token in text.split()]
|
43 |
+
# filtered = [token for token in tokens if token.lower() not in stopword_list]
|
44 |
+
# filtered = ' '.join(filtered)
|
45 |
+
# return filtered
|
46 |
+
|
47 |
+
|
48 |
+
# def embeddings(word):
|
49 |
+
# # print(word)
|
50 |
+
# if word in wv.key_to_index:
|
51 |
+
# return wv.get_vector(word)
|
52 |
+
# else:
|
53 |
+
# return np.zeros(300)
|
requirements.txt
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.9.1
|
2 |
+
aiosignal==1.3.1
|
3 |
+
altair==5.2.0
|
4 |
+
annotated-types==0.6.0
|
5 |
+
anyio==4.2.0
|
6 |
+
arrow==1.3.0
|
7 |
+
attrs==23.1.0
|
8 |
+
av==10.0.0
|
9 |
+
black==23.12.1
|
10 |
+
blinker==1.7.0
|
11 |
+
cachetools==5.3.2
|
12 |
+
certifi==2023.11.17
|
13 |
+
charset-normalizer==3.3.2
|
14 |
+
click==8.1.7
|
15 |
+
coloredlogs==15.0.1
|
16 |
+
ctranslate2==3.18.0
|
17 |
+
dataclasses-json==0.6.3
|
18 |
+
distro==1.8.0
|
19 |
+
faster-whisper==0.7.1
|
20 |
+
filelock==3.12.2
|
21 |
+
Flask==2.3.3
|
22 |
+
Flask-Cors==4.0.0
|
23 |
+
flatbuffers==23.5.26
|
24 |
+
frozenlist==1.4.1
|
25 |
+
fsspec==2023.6.0
|
26 |
+
gensim==4.3.2
|
27 |
+
gitdb==4.0.11
|
28 |
+
GitPython==3.1.40
|
29 |
+
google-api-core==2.14.0
|
30 |
+
google-auth==2.23.4
|
31 |
+
google-cloud==0.34.0
|
32 |
+
google-cloud-core==2.3.3
|
33 |
+
google-cloud-speech==2.22.0
|
34 |
+
google-cloud-storage==2.13.0
|
35 |
+
google-crc32c==1.5.0
|
36 |
+
google-resumable-media==2.6.0
|
37 |
+
googleapis-common-protos==1.61.0
|
38 |
+
greenlet==3.0.1
|
39 |
+
grpcio==1.59.3
|
40 |
+
grpcio-status==1.59.3
|
41 |
+
gunicorn==21.2.0
|
42 |
+
h11==0.14.0
|
43 |
+
httpcore==1.0.2
|
44 |
+
httpx==0.26.0
|
45 |
+
huggingface-hub==0.16.4
|
46 |
+
humanfriendly==10.0
|
47 |
+
idna==3.6
|
48 |
+
importlib-metadata==6.11.0
|
49 |
+
itsdangerous==2.1.2
|
50 |
+
Jinja2==3.1.2
|
51 |
+
jinja2-time==0.2.0
|
52 |
+
joblib==1.3.2
|
53 |
+
jsonpatch==1.33
|
54 |
+
jsonpointer==2.4
|
55 |
+
jsonschema==4.20.0
|
56 |
+
jsonschema-specifications==2023.11.2
|
57 |
+
langchain==0.0.352
|
58 |
+
langchain-community==0.0.5
|
59 |
+
langchain-core==0.1.2
|
60 |
+
langsmith==0.0.72
|
61 |
+
markdown-it-py==3.0.0
|
62 |
+
MarkupSafe==2.1.3
|
63 |
+
marshmallow==3.20.1
|
64 |
+
mdurl==0.1.2
|
65 |
+
mpmath==1.3.0
|
66 |
+
msgpack==1.0.7
|
67 |
+
multidict==6.0.4
|
68 |
+
mypy-extensions==1.0.0
|
69 |
+
nltk==3.8.1
|
70 |
+
numpy==1.26.2
|
71 |
+
onnxruntime==1.15.1
|
72 |
+
openai==1.6.0
|
73 |
+
packaging==23.2
|
74 |
+
pandas==2.1.4
|
75 |
+
pathspec==0.12.1
|
76 |
+
Pillow==10.1.0
|
77 |
+
platformdirs==4.1.0
|
78 |
+
proto-plus==1.22.3
|
79 |
+
protobuf==4.25.1
|
80 |
+
pyarrow==14.0.2
|
81 |
+
pyasn1==0.5.0
|
82 |
+
pyasn1-modules==0.3.0
|
83 |
+
pydantic==2.5.2
|
84 |
+
pydantic_core==2.14.5
|
85 |
+
pydeck==0.8.1b0
|
86 |
+
Pygments==2.17.2
|
87 |
+
pynvim==0.4.3
|
88 |
+
PyPDF2==3.0.1
|
89 |
+
python-dateutil==2.8.2
|
90 |
+
python-dotenv==1.0.0
|
91 |
+
pytz==2023.3.post1
|
92 |
+
PyYAML==6.0.1
|
93 |
+
referencing==0.32.0
|
94 |
+
regex==2023.10.3
|
95 |
+
requests==2.31.0
|
96 |
+
rich==13.7.0
|
97 |
+
rpds-py==0.15.2
|
98 |
+
rsa==4.9
|
99 |
+
scipy==1.11.4
|
100 |
+
six==1.16.0
|
101 |
+
smart-open==6.4.0
|
102 |
+
smmap==5.0.1
|
103 |
+
sniffio==1.3.0
|
104 |
+
SQLAlchemy==2.0.23
|
105 |
+
streamlit==1.29.0
|
106 |
+
sympy==1.12
|
107 |
+
tenacity==8.2.3
|
108 |
+
tokenizers==0.13.3
|
109 |
+
toml==0.10.2
|
110 |
+
toolz==0.12.0
|
111 |
+
tornado==6.4
|
112 |
+
tqdm==4.66.1
|
113 |
+
types-python-dateutil==2.8.19.14
|
114 |
+
typing-inspect==0.9.0
|
115 |
+
typing_extensions==4.9.0
|
116 |
+
tzdata==2023.3
|
117 |
+
tzlocal==5.2
|
118 |
+
urllib3==2.1.0
|
119 |
+
validators==0.22.0
|
120 |
+
watchdog==3.0.0
|
121 |
+
Werkzeug==2.3.7
|
122 |
+
yarl==1.9.4
|
123 |
+
zipp==3.17.0
|
shortlisted.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
name,email,phone,location,degree,college,skills,companies,roles,degree_year,experience
|
2 |
+
Ayushi Bhatnagar,[email protected],9873047199,"Ghaziabad, U.P.",['B.Tech'],['Krishna Engineering College(AKTU)'],"['Swift', 'Dart', 'Core Java', 'SwiftUI', 'UIkit', 'Flutter', 'MSSQL', 'Xcode', 'Android Studio', 'Postman', 'Gitlab', 'Github Desktop', 'Zeplin', 'Figma', 'MS Word', 'MS Excel', 'Google Sheets']","['Augurs Technologies', 'Innefu Labs', 'Devarty Technologies']","['Executive (iOS)', 'Software Engineer(iOS)']",2020,2.0
|
template.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
prompt_template = """
|
2 |
+
|
3 |
+
|
4 |
+
Question: {question}\
|
5 |
+
The response must ONLY contain the code snippet and NOTHING else.
|
6 |
+
The response must be one single line which contains only the query and must not be assigned to a variable.
|
7 |
+
|
8 |
+
Make sure you follow the instructions/thought process below.
|
9 |
+
Return a pandas DF query based on the question and CSV file schema below.
|
10 |
+
|
11 |
+
Instructions:
|
12 |
+
Make sure that the pandas query always accounts for search results which are very similar to the one asked in the question.
|
13 |
+
|
14 |
+
Example 1:
|
15 |
+
Question: Candidates who have worked at a bank
|
16 |
+
df[df['companies'].str.contains('bank', case=False, na=False)]
|
17 |
+
|
18 |
+
Example 2:
|
19 |
+
Question: Candidates from Gurgaon
|
20 |
+
df[df['location'].str.contains('Gurgaon', case=False, na=False)]
|
21 |
+
|
22 |
+
CSV file schema:
|
23 |
+
You have access to a resume candidates CSV file which has the name, email, location, degree, college, skills, companies, roles, degree_year, and experience as columns.
|
24 |
+
|
25 |
+
"""
|