File size: 432 Bytes
467b421 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
import re
def preprocess_text(text):
# Remove special characters and digits
text = re.sub(r'\W', ' ', text)
text = re.sub(r'\s+', ' ', text)
return text.strip()
with open("data.txt", "r") as file:
data = file.readlines()
cleaned_data = [preprocess_text(line) for line in data]
# Save the cleaned data
with open("cleaned_data.txt", "w") as file:
for entry in cleaned_data:
file.write(entry + "\n") |