Spaces:
Sleeping
Sleeping
Jeet Paul
commited on
Commit
·
e249a3c
1
Parent(s):
00e257b
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
from tika import parser
|
3 |
import pandas as pd
|
4 |
from sklearn.preprocessing import LabelEncoder
|
5 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
@@ -7,6 +6,8 @@ from sklearn.multiclass import OneVsRestClassifier
|
|
7 |
from sklearn.neighbors import KNeighborsClassifier
|
8 |
import re
|
9 |
import pickle
|
|
|
|
|
10 |
|
11 |
def cleanResume(resumeText):
|
12 |
# Your existing cleanResume function remains unchanged
|
@@ -35,9 +36,8 @@ model = OneVsRestClassifier(KNeighborsClassifier())
|
|
35 |
model.fit(WordFeatures, target)
|
36 |
|
37 |
def pdf_to_text(file):
|
38 |
-
# Use
|
39 |
-
|
40 |
-
text = file_data['content']
|
41 |
return text
|
42 |
|
43 |
def predict_category(resumes_data, selected_category):
|
@@ -64,7 +64,7 @@ def main():
|
|
64 |
st.title("Resume Ranking App")
|
65 |
st.text("Upload resumes and select a category to rank them.")
|
66 |
|
67 |
-
|
68 |
|
69 |
resumes_data = []
|
70 |
selected_category = ""
|
@@ -73,10 +73,8 @@ def main():
|
|
73 |
files = st.file_uploader("Upload resumes", type=["pdf"], accept_multiple_files=True)
|
74 |
if files:
|
75 |
for file in files:
|
76 |
-
|
77 |
-
text = cleanResume(pdf_to_text(file_data))
|
78 |
resumes_data.append({'ResumeText': text, 'FileName': file.name})
|
79 |
-
|
80 |
selected_category = st.selectbox("Select a category to rank by", label.classes_)
|
81 |
|
82 |
if st.button("Rank Resumes"):
|
|
|
1 |
import streamlit as st
|
|
|
2 |
import pandas as pd
|
3 |
from sklearn.preprocessing import LabelEncoder
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
6 |
from sklearn.neighbors import KNeighborsClassifier
|
7 |
import re
|
8 |
import pickle
|
9 |
+
import pdfminer
|
10 |
+
from pdfminer.high_level import extract_text
|
11 |
|
12 |
def cleanResume(resumeText):
|
13 |
# Your existing cleanResume function remains unchanged
|
|
|
36 |
model.fit(WordFeatures, target)
|
37 |
|
38 |
def pdf_to_text(file):
|
39 |
+
# Use pdfminer.six to extract text from the PDF file
|
40 |
+
text = extract_text(file)
|
|
|
41 |
return text
|
42 |
|
43 |
def predict_category(resumes_data, selected_category):
|
|
|
64 |
st.title("Resume Ranking App")
|
65 |
st.text("Upload resumes and select a category to rank them.")
|
66 |
|
67 |
+
|
68 |
|
69 |
resumes_data = []
|
70 |
selected_category = ""
|
|
|
73 |
files = st.file_uploader("Upload resumes", type=["pdf"], accept_multiple_files=True)
|
74 |
if files:
|
75 |
for file in files:
|
76 |
+
text = cleanResume(pdf_to_text(file))
|
|
|
77 |
resumes_data.append({'ResumeText': text, 'FileName': file.name})
|
|
|
78 |
selected_category = st.selectbox("Select a category to rank by", label.classes_)
|
79 |
|
80 |
if st.button("Rank Resumes"):
|