import re | |
def preprocess_text(text): | |
# Remove special characters and digits | |
text = re.sub(r'\W', ' ', text) | |
text = re.sub(r'\s+', ' ', text) | |
return text.strip() | |
with open("data.csv", "r") as file: | |
data = file.readlines() | |
cleaned_data = [preprocess_text(line) for line in data] | |
# Save the cleaned data | |
with open("cleaned_data.txt", "w") as file: | |
for entry in cleaned_data: | |
file.write(entry + "\n") |