Spaces:
Runtime error
Runtime error
Commit
•
b74ac0a
1
Parent(s):
89d1633
Update preprocessor.py (#5)
Browse files- Update preprocessor.py (6a9cf4ef901e44d35d48a8d3dae20e5bc512dfd7)
Co-authored-by: Marcelo Moreno <[email protected]>
- preprocessor.py +0 -1
preprocessor.py
CHANGED
@@ -62,7 +62,6 @@ def preprocess_whatsapp_messages(file_path, file_type):
|
|
62 |
# Additional preprocessing steps:
|
63 |
# Remove URLs and convert text to lowercase
|
64 |
df['text'] = df['text'].apply(lambda x: re.sub(r'https?:\/\/\S+', '', x)) # Remove URLs
|
65 |
-
df['text'] = df['text'].apply(lambda x: x.lower()) # Convert text to lowercase
|
66 |
|
67 |
# Remove emojis, images, stickers, documents while preserving colons after sender names
|
68 |
df['text'] = df['text'].apply(lambda x: re.sub(r'(?<!\w)(:\s|\s:\s|\s:)', '', x)) # Remove colons that are not part of sender's name
|
|
|
62 |
# Additional preprocessing steps:
|
63 |
# Remove URLs and convert text to lowercase
|
64 |
df['text'] = df['text'].apply(lambda x: re.sub(r'https?:\/\/\S+', '', x)) # Remove URLs
|
|
|
65 |
|
66 |
# Remove emojis, images, stickers, documents while preserving colons after sender names
|
67 |
df['text'] = df['text'].apply(lambda x: re.sub(r'(?<!\w)(:\s|\s:\s|\s:)', '', x)) # Remove colons that are not part of sender's name
|