Souha Ben Hassine commited on
Commit
54a099a
·
1 Parent(s): e1e02fe
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -12,8 +12,7 @@ import nltk
12
  from nltk.corpus import stopwords
13
  from nltk.stem import WordNetLemmatizer
14
  nltk.download(['stopwords','wordnet'])
15
-
16
-
17
  # Load the CSV file into a DataFrame
18
  dataset_path = "Resume.csv"
19
  data = pd.read_csv(dataset_path)
@@ -30,7 +29,6 @@ ruler = nlp.add_pipe("entity_ruler")
30
  # Load skill patterns from disk into the entity ruler
31
  ruler.from_disk(skill_pattern_path)
32
 
33
-
34
  def get_unique_skills(text):
35
  doc = nlp(text)
36
  skills = set()
@@ -54,6 +52,12 @@ def preprocess_resume(resume_str):
54
  review = " ".join(review)
55
  return review
56
 
 
 
 
 
 
 
57
  Job_cat = data["Category"].unique()
58
  Job_cat = np.append(Job_cat, "ALL")
59
  Job_Category = "INFORMATION-TECHNOLOGY"
 
12
  from nltk.corpus import stopwords
13
  from nltk.stem import WordNetLemmatizer
14
  nltk.download(['stopwords','wordnet'])
15
+ nltk.download('omw-1.4')
 
16
  # Load the CSV file into a DataFrame
17
  dataset_path = "Resume.csv"
18
  data = pd.read_csv(dataset_path)
 
29
  # Load skill patterns from disk into the entity ruler
30
  ruler.from_disk(skill_pattern_path)
31
 
 
32
  def get_unique_skills(text):
33
  doc = nlp(text)
34
  skills = set()
 
52
  review = " ".join(review)
53
  return review
54
 
55
+ # Apply the preprocess_resume function to each resume string and store the result in a new column
56
+ data["Clean_Resume"] = data["Resume_str"].apply(preprocess_resume)
57
+
58
+ # Extract skills from each preprocessed resume and store them in a new column
59
+ data["skills"] = data["Clean_Resume"].str.lower().apply(get_unique_skills)
60
+
61
  Job_cat = data["Category"].unique()
62
  Job_cat = np.append(Job_cat, "ALL")
63
  Job_Category = "INFORMATION-TECHNOLOGY"