Dev Paragiri commited on
Commit
39ea97d
·
1 Parent(s): 913f46e

hushh jobs v1

Browse files
Files changed (11) hide show
  1. .gitattributes +0 -35
  2. .gitignore +133 -0
  3. README.md +4 -11
  4. candidate.py +24 -0
  5. embeddings.py +11 -0
  6. llm_config.py +20 -0
  7. main.py +124 -0
  8. rank.py +53 -0
  9. requirements.txt +123 -0
  10. shortlisted.csv +2 -0
  11. template.py +25 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ # Usually these files are written by a python script from a template
29
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
30
+ *.manifest
31
+ *.spec
32
+
33
+ # Installer logs
34
+ pip-log.txt
35
+ pip-delete-this-directory.txt
36
+
37
+ # Unit test / coverage reports
38
+ htmlcov/
39
+ .tox/
40
+ .coverage
41
+ .coverage.*
42
+ .cache
43
+ nosetests.xml
44
+ coverage.xml
45
+ *.cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Translations
50
+ *.mo
51
+ *.pot
52
+
53
+ # Django stuff:
54
+ *.log
55
+ local_settings.py
56
+ db.sqlite3
57
+ db.sqlite3-journal
58
+
59
+ # Flask stuff:
60
+ instance/
61
+ .webassets-cache
62
+
63
+ # Scrapy stuff:
64
+ .scrapy
65
+
66
+ # Sphinx documentation
67
+ docs/_build/
68
+
69
+ # PyBuilder
70
+ target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # IPython
76
+ profile_default/
77
+ ipython_config.py
78
+
79
+ # pyenv
80
+ .python-version
81
+
82
+ # pipenv
83
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
84
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
85
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
86
+ # install all needed dependencies.
87
+ #Pipfile.lock
88
+
89
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
90
+ __pypackages__/
91
+
92
+ # Celery stuff
93
+ celerybeat-schedule
94
+ celerybeat.pid
95
+
96
+ # SageMath parsed files
97
+ *.sage.py
98
+
99
+ # Environments
100
+ .env
101
+ .venv
102
+ env/
103
+ venv/
104
+ ENV/
105
+ env.bak/
106
+ venv.bak/
107
+
108
+ # Spyder project settings
109
+ .spyderproject
110
+ .spyproject
111
+
112
+ # Rope project settings
113
+ .ropeproject
114
+
115
+ # mkdocs documentation
116
+ /site
117
+
118
+ # mypy
119
+ .mypy_cache/
120
+ .dmypy.json
121
+ dmypy.json
122
+
123
+ # Pyre type checker
124
+ .pyre/
125
+
126
+ # pytype static type analyzer
127
+ .pytype/
128
+
129
+ # Cython debug symbols
130
+ cython_debug/
131
+
132
+ # Ignore VSCode settings
133
+ .vscode/
README.md CHANGED
@@ -1,12 +1,5 @@
1
- ---
2
- title: Hushh Jobs V1
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.31.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+ # hushh-jobs
 
 
 
 
 
 
 
 
 
2
 
3
+ The Resume Shortlisting Tool is a project designed to streamline the end to end hiring process.
4
+
5
+ It takes multiple resumes and a job descrition as input and shortlists resumes based on the job descrition. It returns a downloadable csv file with structured details of shortlisted candidates.
candidate.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from langchain_core.pydantic_v1 import BaseModel, Field
3
+
4
+
5
+ class Candidate(BaseModel):
6
+ name: str = Field(description="First name and last name of the candidate.")
7
+ email: str = Field(description="Email address of the candidate.")
8
+ phone: str = Field(description="Contact number with country code of the candidate.")
9
+ location: str = Field(description="City and state where the candidate resides.")
10
+ degree: List[str] = Field(description="List of the candidate's college degrees.")
11
+ college: List[str] = Field(description="List of all the colleges candidate went to")
12
+ skills: List[str] = Field(description="List of technical skills of the user.")
13
+ companies: List[str] = Field(
14
+ description="List only the name of the companies the user has worked at."
15
+ )
16
+ roles: List[str] = Field(
17
+ description="List all the job roles of the user at previous companies."
18
+ )
19
+ degree_year: int = Field(
20
+ description="The year in which candidate completed their degree."
21
+ )
22
+ experience: float = Field(
23
+ description="Number of years of professional experience of the candidate"
24
+ )
embeddings.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ os.environ["HF_TOKEN"]
4
+ model_id = "sentence-transformers/all-MiniLM-L6-v2"
5
+ hf_token = os.environ.get('HF_TOKEN')
6
+ api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
7
+ headers = {"Authorization": f"Bearer {hf_token}"}
8
+
9
+ def text_embedding(texts):
10
+ response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
11
+ return response.json()
llm_config.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.output_parsers import PydanticOutputParser
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain.llms import OpenAI
4
+ from candidate import Candidate
5
+ import os
6
+
7
+
8
+ def instantiate_llm():
9
+ model_name = "gpt-3.5-turbo-instruct"
10
+ temperature = 0.0
11
+ model = OpenAI(model_name=model_name, temperature=temperature, max_tokens=600)
12
+ parser = PydanticOutputParser(pydantic_object=Candidate)
13
+
14
+ prompt = PromptTemplate(
15
+ template="Answer the user query.\n{format_instructions}\n{query}\n",
16
+ input_variables=["query"],
17
+ partial_variables={"format_instructions": parser.get_format_instructions()},
18
+ )
19
+
20
+ return model, prompt
main.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import csv
3
+ import json
4
+ import streamlit as st
5
+ from PyPDF2 import PdfReader
6
+ from llm_config import instantiate_llm
7
+ from langchain.callbacks import get_openai_callback
8
+ from langchain.llms import OpenAI
9
+ from langchain.chains import LLMChain
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.output_parsers import PydanticOutputParser
12
+ from template import prompt_template
13
+ import pandas as pd
14
+ from candidate import Candidate
15
+ import logging
16
+ from rank import extract_and_rank
17
+
18
+
19
+ def extract_resume(resume):
20
+ reader = PdfReader(resume)
21
+ return "".join(page.extract_text() for page in reader.pages)
22
+
23
+
24
+ def main():
25
+ st.set_page_config(layout="wide", page_title="Hushh Jobs")
26
+ st.header("Hushh Jobs")
27
+ model, prompt = instantiate_llm()
28
+ col1, col2, col3 = st.columns(3)
29
+
30
+ with col1:
31
+ resumes = st.file_uploader(
32
+ "Upload resumes here!", accept_multiple_files=True, type="pdf"
33
+ )
34
+
35
+
36
+ with col2:
37
+ no_of_resumes = st.number_input("Enter the number of resumes you want to shortlist",step=1)
38
+
39
+ with col3:
40
+ job_description = st.text_area("Enter the job description here!", height=250)
41
+ rank_btn = st.button("Rank")
42
+
43
+
44
+
45
+ if resumes and rank_btn:
46
+
47
+ if len(job_description) < 1:
48
+ st.warning(
49
+ "Invalid or Empty job description! Please make sure your job description has atleast 25 characters!"
50
+ )
51
+
52
+ else:
53
+ dict_object = {}
54
+ rows = []
55
+ ranked_resumes, embeddings_bank, text_bank = extract_and_rank(
56
+ resumes, job_description
57
+ )
58
+
59
+ no_of_resumes=int(no_of_resumes)
60
+ for selected_resume in ranked_resumes[:no_of_resumes]:
61
+ resume_text = text_bank[selected_resume[0]]
62
+
63
+ doc_query = f"Return only a json based on this candidate's resume information: {resume_text}"
64
+ input = prompt.format_prompt(query=doc_query)
65
+
66
+ #using PydanticOutputParser for structuring language model responses into a coherent, JSON-like format.
67
+ parser = PydanticOutputParser(pydantic_object=Candidate)
68
+
69
+ with get_openai_callback() as cb:
70
+ try:
71
+ result = model(input.to_string())
72
+ st.success(result)
73
+ class_object= parser.parse(result) #using the above defined pydantic output parser to structure the response in a json-format
74
+ dict_object=class_object.__dict__
75
+ #dict_object = json.loads(result)
76
+ rows.append(dict_object)
77
+ except Exception as error:
78
+ print(error)
79
+ field_names = [
80
+ "name",
81
+ "email",
82
+ "phone",
83
+ "location",
84
+ "degree",
85
+ "college",
86
+ "skills",
87
+ "companies",
88
+ "roles",
89
+ "degree_year",
90
+ "experience",
91
+ ]
92
+ user_csv = "shortlisted.csv"
93
+ write_csv(user_csv=user_csv, field_names=field_names, rows=rows)
94
+ df = pd.read_csv(user_csv)
95
+ st.dataframe(df)
96
+
97
+
98
+ #def write_csv(user_csv, field_names, rows):
99
+ def write_csv(user_csv, field_names, rows):
100
+ with open(user_csv, "w") as csvfile:
101
+ writer = csv.DictWriter(csvfile, fieldnames=field_names)
102
+ writer.writeheader()
103
+ for row in rows:
104
+ writer.writerow(row)
105
+
106
+
107
+ def write_response(user_csv, response: str):
108
+ """
109
+ Write a response from an agent to a Streamlit app.
110
+
111
+ Args:
112
+ response_dict: The response from the agent.
113
+
114
+ Returns:
115
+ None.
116
+ """
117
+
118
+ df = pd.read_csv(user_csv)
119
+ data = eval(response)
120
+ st.dataframe(data=data, use_container_width=True)
121
+
122
+
123
+ if __name__ == "__main__":
124
+ main()
rank.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ import streamlit as st
3
+ from embeddings import text_embedding
4
+ import scipy
5
+
6
+
7
+ def extract_and_rank(resumes, job_description):
8
+ out_embed_dict = {}
9
+ out_text_dict = {}
10
+ for resume in resumes:
11
+ reader = PdfReader(resume)
12
+ raw_text = "".join(page.extract_text() for page in reader.pages)
13
+ embedding = text_embedding(raw_text)
14
+ d1 = {resume.name: (embedding)}
15
+ d2 = {resume.name: raw_text}
16
+ out_embed_dict.update(d1)
17
+ out_text_dict.update(d2)
18
+ ranked_output = rankings(out_dict=out_embed_dict, query=job_description)
19
+ return ranked_output, out_embed_dict, out_text_dict
20
+
21
+
22
+ def get_sim(query_embedding, average_vec):
23
+ try:
24
+ sim = [(1 - scipy.spatial.distance.cosine(query_embedding, average_vec))]
25
+ return sim
26
+ except:
27
+ return [0]
28
+
29
+
30
+ def rankings(out_dict, query):
31
+ query_embedding = text_embedding(query)
32
+ rank = []
33
+ for k, v in out_dict.items():
34
+ rank.append((k, get_sim(query_embedding, v)))
35
+ rank = sorted(rank, key=lambda t: t[1], reverse=True)
36
+ return rank
37
+
38
+
39
+ # def data_clean(text):
40
+ # pattern = r'[^a-zA-Z0-9\s]'
41
+ # text = re.sub(pattern,'',' '.join(text))
42
+ # tokens = [token.strip() for token in text.split()]
43
+ # filtered = [token for token in tokens if token.lower() not in stopword_list]
44
+ # filtered = ' '.join(filtered)
45
+ # return filtered
46
+
47
+
48
+ # def embeddings(word):
49
+ # # print(word)
50
+ # if word in wv.key_to_index:
51
+ # return wv.get_vector(word)
52
+ # else:
53
+ # return np.zeros(300)
requirements.txt ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.1
2
+ aiosignal==1.3.1
3
+ altair==5.2.0
4
+ annotated-types==0.6.0
5
+ anyio==4.2.0
6
+ arrow==1.3.0
7
+ attrs==23.1.0
8
+ av==10.0.0
9
+ black==23.12.1
10
+ blinker==1.7.0
11
+ cachetools==5.3.2
12
+ certifi==2023.11.17
13
+ charset-normalizer==3.3.2
14
+ click==8.1.7
15
+ coloredlogs==15.0.1
16
+ ctranslate2==3.18.0
17
+ dataclasses-json==0.6.3
18
+ distro==1.8.0
19
+ faster-whisper==0.7.1
20
+ filelock==3.12.2
21
+ Flask==2.3.3
22
+ Flask-Cors==4.0.0
23
+ flatbuffers==23.5.26
24
+ frozenlist==1.4.1
25
+ fsspec==2023.6.0
26
+ gensim==4.3.2
27
+ gitdb==4.0.11
28
+ GitPython==3.1.40
29
+ google-api-core==2.14.0
30
+ google-auth==2.23.4
31
+ google-cloud==0.34.0
32
+ google-cloud-core==2.3.3
33
+ google-cloud-speech==2.22.0
34
+ google-cloud-storage==2.13.0
35
+ google-crc32c==1.5.0
36
+ google-resumable-media==2.6.0
37
+ googleapis-common-protos==1.61.0
38
+ greenlet==3.0.1
39
+ grpcio==1.59.3
40
+ grpcio-status==1.59.3
41
+ gunicorn==21.2.0
42
+ h11==0.14.0
43
+ httpcore==1.0.2
44
+ httpx==0.26.0
45
+ huggingface-hub==0.16.4
46
+ humanfriendly==10.0
47
+ idna==3.6
48
+ importlib-metadata==6.11.0
49
+ itsdangerous==2.1.2
50
+ Jinja2==3.1.2
51
+ jinja2-time==0.2.0
52
+ joblib==1.3.2
53
+ jsonpatch==1.33
54
+ jsonpointer==2.4
55
+ jsonschema==4.20.0
56
+ jsonschema-specifications==2023.11.2
57
+ langchain==0.0.352
58
+ langchain-community==0.0.5
59
+ langchain-core==0.1.2
60
+ langsmith==0.0.72
61
+ markdown-it-py==3.0.0
62
+ MarkupSafe==2.1.3
63
+ marshmallow==3.20.1
64
+ mdurl==0.1.2
65
+ mpmath==1.3.0
66
+ msgpack==1.0.7
67
+ multidict==6.0.4
68
+ mypy-extensions==1.0.0
69
+ nltk==3.8.1
70
+ numpy==1.26.2
71
+ onnxruntime==1.15.1
72
+ openai==1.6.0
73
+ packaging==23.2
74
+ pandas==2.1.4
75
+ pathspec==0.12.1
76
+ Pillow==10.1.0
77
+ platformdirs==4.1.0
78
+ proto-plus==1.22.3
79
+ protobuf==4.25.1
80
+ pyarrow==14.0.2
81
+ pyasn1==0.5.0
82
+ pyasn1-modules==0.3.0
83
+ pydantic==2.5.2
84
+ pydantic_core==2.14.5
85
+ pydeck==0.8.1b0
86
+ Pygments==2.17.2
87
+ pynvim==0.4.3
88
+ PyPDF2==3.0.1
89
+ python-dateutil==2.8.2
90
+ python-dotenv==1.0.0
91
+ pytz==2023.3.post1
92
+ PyYAML==6.0.1
93
+ referencing==0.32.0
94
+ regex==2023.10.3
95
+ requests==2.31.0
96
+ rich==13.7.0
97
+ rpds-py==0.15.2
98
+ rsa==4.9
99
+ scipy==1.11.4
100
+ six==1.16.0
101
+ smart-open==6.4.0
102
+ smmap==5.0.1
103
+ sniffio==1.3.0
104
+ SQLAlchemy==2.0.23
105
+ streamlit==1.29.0
106
+ sympy==1.12
107
+ tenacity==8.2.3
108
+ tokenizers==0.13.3
109
+ toml==0.10.2
110
+ toolz==0.12.0
111
+ tornado==6.4
112
+ tqdm==4.66.1
113
+ types-python-dateutil==2.8.19.14
114
+ typing-inspect==0.9.0
115
+ typing_extensions==4.9.0
116
+ tzdata==2023.3
117
+ tzlocal==5.2
118
+ urllib3==2.1.0
119
+ validators==0.22.0
120
+ watchdog==3.0.0
121
+ Werkzeug==2.3.7
122
+ yarl==1.9.4
123
+ zipp==3.17.0
shortlisted.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ name,email,phone,location,degree,college,skills,companies,roles,degree_year,experience
2
+ Ayushi Bhatnagar,[email protected],9873047199,"Ghaziabad, U.P.",['B.Tech'],['Krishna Engineering College(AKTU)'],"['Swift', 'Dart', 'Core Java', 'SwiftUI', 'UIkit', 'Flutter', 'MSSQL', 'Xcode', 'Android Studio', 'Postman', 'Gitlab', 'Github Desktop', 'Zeplin', 'Figma', 'MS Word', 'MS Excel', 'Google Sheets']","['Augurs Technologies', 'Innefu Labs', 'Devarty Technologies']","['Executive (iOS)', 'Software Engineer(iOS)']",2020,2.0
template.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompt_template = """
2
+
3
+
4
+ Question: {question}\
5
+ The response must ONLY contain the code snippet and NOTHING else.
6
+ The response must be one single line which contains only the query and must not be assigned to a variable.
7
+
8
+ Make sure you follow the instructions/thought process below.
9
+ Return a pandas DF query based on the question and CSV file schema below.
10
+
11
+ Instructions:
12
+ Make sure that the pandas query always accounts for search results which are very similar to the one asked in the question.
13
+
14
+ Example 1:
15
+ Question: Candidates who have worked at a bank
16
+ df[df['companies'].str.contains('bank', case=False, na=False)]
17
+
18
+ Example 2:
19
+ Question: Candidates from Gurgaon
20
+ df[df['location'].str.contains('Gurgaon', case=False, na=False)]
21
+
22
+ CSV file schema:
23
+ You have access to a resume candidates CSV file which has the name, email, location, degree, college, skills, companies, roles, degree_year, and experience as columns.
24
+
25
+ """