Spaces:

yonkasoft
/

makaleChatbotu

Build error

App Files Files Community

makaleChatbotu / stop_words.py

yonkasoft

Upload 13 files

3a1020a verified 3 months ago

raw

history blame contribute delete

1.82 kB

	import pandas as pd


	def load_stopwords(file_path):
	with open(file_path, 'r', encoding='utf-8') as f:
	stopwords = f.read().splitlines() # Her satır bir stopword olacak şekilde yükle
	return set(stopwords)


	stop_words = load_stopwords('stopwords.txt')


	df = pd.read_csv('veriler_cleaned.csv')


	def remove_stopwords_without_nltk(text):
	if isinstance(text, str):
	words = text.split()
	filtered_words = [word for word in words if word.lower() not in stop_words]
	return ' '.join(filtered_words)
	else:
	return ""


	df['stopwords_text'] = df['cleaned_text'].apply(remove_stopwords_without_nltk)


	print(df[['cleaned_text', 'stopwords_text']].head())


	df.to_csv('temizlenmis_veri.csv', index=False)




	"""
	import pandas as pd
	import nltk
	from nltk.tokenize import word_tokenize

	nltk.download('stopwords')
	nltk.download('punkt')


	from nltk.corpus import stopwords
	stop_words = set(stopwords.words('turkish'))

	def load_custom_stopwords(file_path):
	with open(file_path, 'r', encoding='utf-8') as f:
	custom_stopwords = f.read().splitlines()
	return set(custom_stopwords)


	custom_stopwords = load_custom_stopwords('stopwords.txt')
	stop_words.update(custom_stopwords)


	df = pd.read_csv('veriler_cleaned.csv')


	def remove_stopwords(text):
	if isinstance(text, str):
	words = word_tokenize(text)
	filtered_words = [word for word in words if word.lower() not in stop_words] # Stopwords'leri çıkar
	return ' '.join(filtered_words)
	else:
	return ""


	df['stopwords_text'] = df['cleaned_text'].apply(remove_stopwords)


	print(df[['cleaned_text', 'stopwords_text']].head())


	df.to_csv('temizlenmis_veri.csv', index=False)
	"""