import streamlit as st
import plotly.express as px
import pandas as pd
import numpy as np
import pickle as pkl
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
nlp = spacy.load('en_core_web_lg')
import re
import docx2txt
from spacy.matcher import PhraseMatcher
# from transformers import BertForSequenceClassification
# from transformers import BertTokenizer
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# tokenizer = AutoTokenizer.from_pretrained("liberatoratif/BERT-resume-job-recommender")
# model = AutoModelForSequenceClassification.from_pretrained("liberatoratif/BERT-resume-job-recommender")
matcher = PhraseMatcher(nlp.vocab)
import torch
st.set_page_config(
page_title="Resume Scanner",
page_icon="๐",
layout="wide",
initial_sidebar_state="expanded",
)
# output_dir = "model_save"
enc_dir = "target_encodings.pkl"
matcher_dir = "linkedin_skill.txt"
# @st.cache
def bert():
# model_loaded_temp = BertForSequenceClassification.from_pretrained(output_dir)
model_loaded_temp = AutoModelForSequenceClassification.from_pretrained("liberatoratif/BERT-resume-job-recommender")
return model_loaded_temp
# @st.cache
def bert_token():
# tokenizer_loaded_temp = BertTokenizer.from_pretrained(output_dir)
tokenizer_loaded_temp = AutoTokenizer.from_pretrained("liberatoratif/BERT-resume-job-recommender")
return tokenizer_loaded_temp
# @st.cache
def label_enc():
enc = pkl.load(open(enc_dir, 'rb'))
return enc
# @st.cache
def ph_match():
with open(matcher_dir, 'r', encoding='utf-8') as file:
text = file.read()
return text
label_encoder = label_enc()
model_loaded = bert()
tokenizer_loaded = bert_token()
txt = ph_match()
st.markdown(
"""
""",
unsafe_allow_html=True,
)
st.markdown("
RESUME/CV SCANNER
",
unsafe_allow_html=True)
st.markdown("Know which domain fit's your resume :)",
unsafe_allow_html=True)
stops = list(STOP_WORDS)
def extract_text_from_docx(docx_path):
txt = docx2txt.process(docx_path)
if txt:
return txt.replace('\t', ' ')
return None
def cleanResume(resumeText):
resumeText = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ",resumeText).split())
resumeText = re.sub(r'[^\x00-\x7F]+',r' ', resumeText)
resumeText = ''.join(resumeText.splitlines())
return resumeText
def complete_pack(x):
demo = nlp(x)
lst = [i.text.lower() for i in demo if i.text.lower() not in stops]
return lst
with st.sidebar:
global resume_text, upload
global resume_text_spacy, re_temp
upload = st.file_uploader("DRAG AND DROP YOUR RESUME NOW")
st.markdown("Only .docx type files accepted",
unsafe_allow_html=True)
if upload:
try:
resume_text = extract_text_from_docx(upload)
resume_text = resume_text.replace('\n\n', ' ')
re_temp = cleanResume(resume_text)
resume_text_spacy = nlp(re_temp)
except Exception as e:
st.error('WRONG FILE FORMAT : Only .docx(WORD DOC) type of files are accepted')
scan = st.button('SCAN ๐')
if scan:
try:
emails = re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", resume_text)
phone = re.findall(r'[\+\(]?[1-9][0-9 .\-\(\)]{8,}[0-9]', resume_text)
links = re.findall(r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?ยซยปโโโโ]))", resume_text)
txt = txt.split('\n')
ev = [nlp.make_doc(i) for i in txt]
matcher.add("SKILLS", None, *ev)
get_skills = matcher(resume_text_spacy)
demo = []
for match_id, start, end in get_skills:
span = resume_text_spacy[start : end]
demo.append(span.text)
re_text = ' '.join(demo)
my_skills_re_text = re_text
my_skills_clean_re_text = cleanResume(my_skills_re_text)
skills = complete_pack(my_skills_clean_re_text)
skills = ' '.join(skills)
lst = []
lst.append(skills)
model_loaded.eval()
# Tokenize the input text
input_ids = tokenizer_loaded.encode(lst[0], add_special_tokens=True)
input_ids = torch.tensor(input_ids).unsqueeze(0) # Add batch dimension
# Move the input tensor to the same device as the model
# input_ids = input_ids.to(device)
# model_loaded = model_loaded.to(device)
# Perform the forward pass to get the model's predictions
with torch.no_grad():
result = model_loaded(input_ids, token_type_ids=None, attention_mask=None, return_dict=True)
logits = result.logits
# Move the logits to the CPU and convert to numpy array
logits = logits.detach().cpu().numpy()
# Get the predicted label
predicted_label = np.argmax(logits)
# Print the predicted label
# st.write("Predicted Label:", predicted_label)
probs = logits[0]
# print("text:", lst[0])
# print("predictions:", probs)
pred_idx = np.argmax(probs)
# kp = list(pred_idx)
d = {}
ind = 0
for i in probs:
d[label_encoder.inverse_transform([ind])[0]] = i
ind+=1
# st.write("Your skills are matching to : ", label_encoder.inverse_transform([pred_idx])[0])
domain = label_encoder.inverse_transform([pred_idx])[0]
data = pd.DataFrame({'Domains' : list(d.keys()), 'Probs' : list(d.values())})
# st.markdown(f"**Your skills are matching to:** {domain}", unsafe_allow_html=True) #BF3EFF
st.markdown(f"**Your skills are matching to :** {domain}", unsafe_allow_html=True)
datacpy = data.copy()
datacpy['Probs'] = datacpy['Probs']*10
datacpy.rename(columns={'Probs': 'Percentage Prediction of your Domain'}, inplace=True)
st.markdown("PERCENT OF YOUR DOMAIN MATCH
",
unsafe_allow_html=True)
st.dataframe(datacpy.sort_values('Percentage Prediction of your Domain', ascending=False))
domains = px.bar(data, x = 'Domains', y = 'Probs',width=800, height=400)
st.plotly_chart(domains)
if len(list(set(emails))) > 0:
st.markdown("EMAIL โ๏ธ ",
unsafe_allow_html=True)
st.success(list(set(emails)))
else:
st.markdown("EMAIL โ ",
unsafe_allow_html=True)
st.error('Email-Id is not present try including it in your Resume')
if len(list(set(phone))) > 0:
st.markdown("MOBILE NO โ๏ธ ",
unsafe_allow_html=True)
st.success(list(set(phone)))
else:
st.markdown("MOBILE NO โ ",
unsafe_allow_html=True)
st.error('Mobile number is not present try including it in your Resume')
if len(list(set(links))) > 0:
st.markdown("LINKS โ๏ธ ",
unsafe_allow_html=True)
st.success(list(set(links)))
else:
st.markdown("LINKS โ",
unsafe_allow_html=True)
st.error("Link's are not present try including your Github or LinkedIn Profile in your Resume")
except Exception as e:
st.write(e)
st.error("๐ฒ Try uploading your file again")