import pandas as pd def load_stopwords(file_path): with open(file_path, 'r', encoding='utf-8') as f: stopwords = f.read().splitlines() # Her satır bir stopword olacak şekilde yükle return set(stopwords) stop_words = load_stopwords('stopwords.txt') df = pd.read_csv('veriler_cleaned.csv') def remove_stopwords_without_nltk(text): if isinstance(text, str): words = text.split() filtered_words = [word for word in words if word.lower() not in stop_words] return ' '.join(filtered_words) else: return "" df['stopwords_text'] = df['cleaned_text'].apply(remove_stopwords_without_nltk) print(df[['cleaned_text', 'stopwords_text']].head()) df.to_csv('temizlenmis_veri.csv', index=False) """ import pandas as pd import nltk from nltk.tokenize import word_tokenize nltk.download('stopwords') nltk.download('punkt') from nltk.corpus import stopwords stop_words = set(stopwords.words('turkish')) def load_custom_stopwords(file_path): with open(file_path, 'r', encoding='utf-8') as f: custom_stopwords = f.read().splitlines() return set(custom_stopwords) custom_stopwords = load_custom_stopwords('stopwords.txt') stop_words.update(custom_stopwords) df = pd.read_csv('veriler_cleaned.csv') def remove_stopwords(text): if isinstance(text, str): words = word_tokenize(text) filtered_words = [word for word in words if word.lower() not in stop_words] # Stopwords'leri çıkar return ' '.join(filtered_words) else: return "" df['stopwords_text'] = df['cleaned_text'].apply(remove_stopwords) print(df[['cleaned_text', 'stopwords_text']].head()) df.to_csv('temizlenmis_veri.csv', index=False) """