Prernas19 commited on
Commit
dede9ea
·
verified ·
1 Parent(s): b51d76e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -0
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import fitz # Importing PyMuPDF for PDF text extraction
4
+ import nltk
5
+ from gensim.models.doc2vec import Doc2Vec, TaggedDocument
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ import pandas as pd
9
+ import gradio as gr
10
+
11
+ # Download NLTK data files
12
+ nltk.download('punkt')
13
+ nltk.download('stopwords')
14
+
15
+ # Function to preprocess text
16
+ def preprocess_text(text):
17
+ text = re.sub(r'\W+', ' ', text.lower()) # Remove non-alphanumeric characters and lower case
18
+ return text
19
+
20
+ # Function to extract keywords using TF-IDF
21
+ def extract_keywords_tfidf(text, max_features=50):
22
+ vectorizer = TfidfVectorizer(stop_words='english', max_features=max_features)
23
+ tfidf_matrix = vectorizer.fit_transform([text])
24
+ feature_names = vectorizer.get_feature_names_out()
25
+ tfidf_scores = tfidf_matrix.toarray().flatten()
26
+ keyword_scores = sorted(zip(tfidf_scores, feature_names), reverse=True)
27
+ return [keyword for score, keyword in keyword_scores]
28
+
29
+ # Function to extract text from a PDF
30
+ def extract_text_from_pdf(pdf_path):
31
+ document = fitz.open(pdf_path)
32
+ text = ""
33
+ for page_num in range(len(document)):
34
+ page = document.load_page(page_num)
35
+ text += page.get_text()
36
+ return text
37
+
38
+ # Function to give feedback on resume
39
+ def give_feedback(resume_text, job_description):
40
+ feedback = []
41
+
42
+ # Check formatting (example: consistency in bullet points)
43
+ if '•' in resume_text and '-' in resume_text:
44
+ feedback.append("Consider using a consistent bullet point style throughout your resume.")
45
+
46
+ # Check for grammar and spelling
47
+ if not any(re.findall(r'\bexperience\b|\beducation\b|\bskills\b', resume_text.lower())):
48
+ feedback.append("Make sure your resume includes sections like Experience, Education, and Skills.")
49
+
50
+ # Extract keywords and check relevance
51
+ jd_keywords = extract_keywords_tfidf(preprocess_text(job_description))
52
+ resume_keywords = extract_keywords_tfidf(preprocess_text(resume_text))
53
+
54
+ common_keywords = set(jd_keywords).intersection(set(resume_keywords))
55
+ if len(common_keywords) < 8:
56
+ feedback.append(f"Your resume could better match the job description. Consider adding keywords such as: {', '.join(jd_keywords[:5])}.")
57
+
58
+ # Check for action verbs
59
+ action_verbs = ["managed", "led", "developed", "designed", "implemented", "created"]
60
+ if not any(verb in resume_text.lower() for verb in action_verbs):
61
+ feedback.append("Consider using strong action verbs to describe your achievements and responsibilities.")
62
+
63
+ if not re.search(r'\bsummary\b|\bobjective\b', resume_text, re.IGNORECASE):
64
+ feedback.append("Consider adding a professional summary or objective statement to provide a quick overview of your qualifications.")
65
+
66
+ # Check for quantifiable achievements
67
+ if not re.findall(r'\d+', resume_text):
68
+ feedback.append("Include quantifiable achievements in your experience section (e.g., increased sales by 20%).")
69
+
70
+ # Provide positive feedback if none of the above conditions are met
71
+ if not feedback:
72
+ feedback.append("Your resume is well-aligned with the job description. Ensure to keep it updated with relevant keywords and achievements.")
73
+
74
+ return feedback
75
+
76
+ # Function to calculate TF-IDF cosine similarity score
77
+ def tfidf_cosine_similarity(resume, jd):
78
+ documents = [resume, jd]
79
+ vectorizer = TfidfVectorizer()
80
+ tfidf_matrix = vectorizer.fit_transform(documents)
81
+
82
+ cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
83
+ return cosine_sim[0][0]
84
+
85
+ # Function to calculate Doc2Vec cosine similarity score
86
+ def doc2vec_cosine_similarity(resume, jd, model):
87
+ resume_vector = model.infer_vector(resume.split())
88
+ jd_vector = model.infer_vector(jd.split())
89
+
90
+ cosine_sim = cosine_similarity([resume_vector], [jd_vector])
91
+ return cosine_sim[0][0]
92
+
93
+ # Function to extract years of experience from resume
94
+ def extract_years_of_experience(text):
95
+ years = re.findall(r'(\d+)\s+year[s]*', text, re.IGNORECASE)
96
+ if years:
97
+ return sum(map(int, years))
98
+ return 0
99
+
100
+ # Function to extract information from resumes in a folder
101
+ def extract_info_from_resumes(resume_files, job_description):
102
+ data = []
103
+
104
+ # Train Doc2Vec model on resumes and job description
105
+ documents = []
106
+ for file in resume_files:
107
+ text = extract_text_from_pdf(file.name)
108
+ documents.append(preprocess_text(text))
109
+
110
+ documents.append(preprocess_text(job_description))
111
+ tagged_docs = [TaggedDocument(doc.split(), [i]) for i, doc in enumerate(documents)]
112
+ model = Doc2Vec(tagged_docs, vector_size=50, window=2, min_count=1, workers=4)
113
+
114
+ for file in resume_files:
115
+ text = extract_text_from_pdf(file.name)
116
+
117
+ preprocessed_text = preprocess_text(text)
118
+ resume_keywords = extract_keywords_tfidf(preprocessed_text)
119
+ years_of_experience = extract_years_of_experience(text)
120
+
121
+ # Append years of experience to the resume keywords
122
+ if years_of_experience > 0:
123
+ resume_keywords.append(f"{years_of_experience} years experience")
124
+
125
+ name = os.path.splitext(os.path.basename(file.name))[0]
126
+
127
+ feedback = give_feedback(text, job_description)
128
+
129
+ # Calculate scores
130
+ jd_keywords = extract_keywords_tfidf(preprocess_text(job_description))
131
+ common_keywords = set(jd_keywords).intersection(set(resume_keywords))
132
+ keyword_match_score = len(common_keywords) # Count of common keywords as a whole number
133
+ tfidf_score = tfidf_cosine_similarity(text, job_description)
134
+ doc2vec_score = doc2vec_cosine_similarity(preprocessed_text, preprocess_text(job_description), model)
135
+
136
+ data.append({
137
+ 'Name': name,
138
+ 'Keyword_Match_Score': keyword_match_score, # Whole number
139
+ 'TFIDF_Score': tfidf_score,
140
+ 'Doc2Vec_Score': doc2vec_score,
141
+ 'Years_of_Experience': years_of_experience,
142
+ 'Feedback': '; '.join(feedback), # Combine feedback into a single string
143
+ })
144
+
145
+ return data
146
+
147
+ # Function to save data to an Excel file
148
+ def save_to_excel(data, output_file):
149
+ df = pd.DataFrame(data)
150
+ try:
151
+ df.to_excel(output_file, index=False)
152
+ return output_file
153
+ except Exception as e:
154
+ return f"Error saving file: {e}"
155
+
156
+ # Gradio interface function
157
+ def gradio_interface(resume_files, job_description):
158
+ if resume_files:
159
+ output_file = '/content/Resume_Analysis.xlsx'
160
+ resumes = extract_info_from_resumes(resume_files, job_description)
161
+ result = save_to_excel(resumes, output_file)
162
+ else:
163
+ result = "No resumes to process."
164
+
165
+ return result
166
+
167
+
168
+ # Gradio UI setup
169
+ iface = gr.Interface(
170
+ fn=gradio_interface,
171
+ inputs=[
172
+ gr.Files(label="Upload multiple Resumes", type="filepath"), # Accept multiple file uploads
173
+ gr.Textbox(label="Job Description", lines=5, placeholder="Enter the job description here...")
174
+ ],
175
+ outputs=gr.File(label="Download Results"), # Provide the output file
176
+
177
+ description="Upload multiple resume PDFs and provide a job description to analyze the resumes and get an Excel file with the results."
178
+ )
179
+
180
+ # Launch the Gradio interface
181
+ iface.launch()