import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import numpy as np import re from turkish.deasciifier import Deasciifier # Model ve tokenizer initialization tokenizer = AutoTokenizer.from_pretrained("TURKCELL/bert-offensive-lang-detection-tr") model = AutoModelForSequenceClassification.from_pretrained("TURKCELL/bert-offensive-lang-detection-tr") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def deasciifier(text): deasciifier = Deasciifier(text) return deasciifier.convert_to_turkish() def remove_circumflex(text): circumflex_map = { 'â': 'a', 'î': 'i', 'û': 'u', 'ô': 'o', 'Â': 'A', 'Î': 'I', 'Û': 'U', 'Ô': 'O' } return ''.join(circumflex_map.get(c, c) for c in text) def turkish_lower(text): turkish_map = { 'I': 'ı', 'İ': 'i', 'Ç': 'ç', 'Ş': 'ş', 'Ğ': 'ğ', 'Ü': 'ü', 'Ö': 'ö' } return ''.join(turkish_map.get(c, c).lower() for c in text) def clean_text(text): # Metindeki şapkalı harfleri kaldırma text = remove_circumflex(text) # Metni küçük harfe dönüştürme text = turkish_lower(text) # deasciifier text = deasciifier(text) # Kullanıcı adlarını kaldırma text = re.sub(r"@\S*", " ", text) # Hashtag'leri kaldırma text = re.sub(r'#\S+', ' ', text) # URL'leri kaldırma text = re.sub(r"http\S+|www\S+|https\S+", ' ', text, flags=re.MULTILINE) # Noktalama işaretlerini ve metin tabanlı emojileri kaldırma text = re.sub(r'[^\w\s]|(:\)|:\(|:D|:P|:o|:O|;\))', ' ', text) # Emojileri kaldırma emoji_pattern = re.compile("[" u"\U0001F600-\U0001F64F" # emoticons u"\U0001F300-\U0001F5FF" # symbols & pictographs u"\U0001F680-\U0001F6FF" # transport & map symbols u"\U0001F1E0-\U0001F1FF" # flags (iOS) u"\U00002702-\U000027B0" u"\U000024C2-\U0001F251" "]+", flags=re.UNICODE) text = emoji_pattern.sub(r' ', text) # Birden fazla boşluğu tek boşlukla değiştirme text = re.sub(r'\s+', ' ', text).strip() return text def is_offensive(sentence): normalize_text = clean_text(sentence) test_sample = tokenizer(normalize_text, padding=True, truncation=True, max_length=256, return_tensors='pt') test_sample = {k: v.to(device) for k, v in test_sample.items()} output = model(**test_sample) y_pred = np.argmax(output.logits.detach().cpu().numpy(), axis=1) d = {0: 'non-offensive', 1: 'offensive'} return d[y_pred[0]] iface = gr.Interface( fn=is_offensive, inputs=gr.Textbox(lines=2, placeholder="Enter sentence here..."), outputs="text", title="Offensive Language Detection", description="Offensive language detection for Turkish" ) iface.launch()