Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
import numpy as np | |
import re | |
from turkish.deasciifier import Deasciifier | |
# Model ve tokenizer initialization | |
tokenizer = AutoTokenizer.from_pretrained("TURKCELL/bert-offensive-lang-detection-tr") | |
model = AutoModelForSequenceClassification.from_pretrained("TURKCELL/bert-offensive-lang-detection-tr") | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
def deasciifier(text): | |
deasciifier = Deasciifier(text) | |
return deasciifier.convert_to_turkish() | |
def remove_circumflex(text): | |
circumflex_map = { | |
'â': 'a', | |
'î': 'i', | |
'û': 'u', | |
'ô': 'o', | |
'Â': 'A', | |
'Î': 'I', | |
'Û': 'U', | |
'Ô': 'O' | |
} | |
return ''.join(circumflex_map.get(c, c) for c in text) | |
def turkish_lower(text): | |
turkish_map = { | |
'I': 'ı', | |
'İ': 'i', | |
'Ç': 'ç', | |
'Ş': 'ş', | |
'Ğ': 'ğ', | |
'Ü': 'ü', | |
'Ö': 'ö' | |
} | |
return ''.join(turkish_map.get(c, c).lower() for c in text) | |
def clean_text(text): | |
# Metindeki şapkalı harfleri kaldırma | |
text = remove_circumflex(text) | |
# Metni küçük harfe dönüştürme | |
text = turkish_lower(text) | |
# deasciifier | |
text = deasciifier(text) | |
# Kullanıcı adlarını kaldırma | |
text = re.sub(r"@\S*", " ", text) | |
# Hashtag'leri kaldırma | |
text = re.sub(r'#\S+', ' ', text) | |
# URL'leri kaldırma | |
text = re.sub(r"http\S+|www\S+|https\S+", ' ', text, flags=re.MULTILINE) | |
# Noktalama işaretlerini ve metin tabanlı emojileri kaldırma | |
text = re.sub(r'[^\w\s]|(:\)|:\(|:D|:P|:o|:O|;\))', ' ', text) | |
# Emojileri kaldırma | |
emoji_pattern = re.compile("[" | |
u"\U0001F600-\U0001F64F" # emoticons | |
u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
u"\U0001F680-\U0001F6FF" # transport & map symbols | |
u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
u"\U00002702-\U000027B0" | |
u"\U000024C2-\U0001F251" | |
"]+", flags=re.UNICODE) | |
text = emoji_pattern.sub(r' ', text) | |
# Birden fazla boşluğu tek boşlukla değiştirme | |
text = re.sub(r'\s+', ' ', text).strip() | |
return text | |
def is_offensive(sentence): | |
normalize_text = clean_text(sentence) | |
test_sample = tokenizer(normalize_text, padding=True, truncation=True, max_length=256, return_tensors='pt') | |
test_sample = {k: v.to(device) for k, v in test_sample.items()} | |
output = model(**test_sample) | |
y_pred = np.argmax(output.logits.detach().cpu().numpy(), axis=1) | |
d = {0: 'non-offensive', 1: 'offensive'} | |
return d[y_pred[0]] | |
iface = gr.Interface( | |
fn=is_offensive, | |
inputs=gr.Textbox(lines=2, placeholder="Enter sentence here..."), | |
outputs="text", | |
title="Offensive Language Detection", | |
description="Offensive language detection for Turkish" | |
) | |
iface.launch() | |