File size: 432 Bytes
467b421
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import re

def preprocess_text(text):
    # Remove special characters and digits
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

with open("data.txt", "r") as file:
    data = file.readlines()

cleaned_data = [preprocess_text(line) for line in data]

# Save the cleaned data
with open("cleaned_data.txt", "w") as file:
    for entry in cleaned_data:
        file.write(entry + "\n")