Commit
b74ac0a
1 Parent(s): 89d1633

Update preprocessor.py (#5)

Browse files

- Update preprocessor.py (6a9cf4ef901e44d35d48a8d3dae20e5bc512dfd7)


Co-authored-by: Marcelo Moreno <[email protected]>

Files changed (1) hide show
  1. preprocessor.py +0 -1
preprocessor.py CHANGED
@@ -62,7 +62,6 @@ def preprocess_whatsapp_messages(file_path, file_type):
62
  # Additional preprocessing steps:
63
  # Remove URLs and convert text to lowercase
64
  df['text'] = df['text'].apply(lambda x: re.sub(r'https?:\/\/\S+', '', x)) # Remove URLs
65
- df['text'] = df['text'].apply(lambda x: x.lower()) # Convert text to lowercase
66
 
67
  # Remove emojis, images, stickers, documents while preserving colons after sender names
68
  df['text'] = df['text'].apply(lambda x: re.sub(r'(?<!\w)(:\s|\s:\s|\s:)', '', x)) # Remove colons that are not part of sender's name
 
62
  # Additional preprocessing steps:
63
  # Remove URLs and convert text to lowercase
64
  df['text'] = df['text'].apply(lambda x: re.sub(r'https?:\/\/\S+', '', x)) # Remove URLs
 
65
 
66
  # Remove emojis, images, stickers, documents while preserving colons after sender names
67
  df['text'] = df['text'].apply(lambda x: re.sub(r'(?<!\w)(:\s|\s:\s|\s:)', '', x)) # Remove colons that are not part of sender's name