Spaces:
Running
Running
File size: 10,514 Bytes
ed4c418 936bb70 e7ac5fa dc61da1 67cde5c 936bb70 198afc2 c9670f4 198afc2 e7ac5fa 198afc2 e7ac5fa 198afc2 e7ac5fa 198afc2 dc61da1 67cde5c dc61da1 936bb70 dc61da1 936bb70 dc61da1 31613b6 dc61da1 31613b6 dc61da1 198afc2 dc61da1 198afc2 dc61da1 198afc2 dc61da1 198afc2 dc61da1 198afc2 dc61da1 198afc2 dc61da1 198afc2 dc61da1 936bb70 dc61da1 936bb70 9d76542 936bb70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import shap
from transformers import (AutoTokenizer,
AutoModelForSequenceClassification,
TextClassificationPipeline)
import requests
import re
from collections import Counter
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from keybert import KeyBERT
import torch
from deep_translator import DeeplTranslator
import torch
import torch.nn.functional as F
api_key_deepl = "69f73328-5f95-4eda-813a-16af8c688404:fx"
# Buraya İngilizce modelinizi yazın
model = AutoModelForSequenceClassification.from_pretrained("OsBaran/Roberta-Classification-Model")
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Cihazı kontrol et
def predict_with_roberta(model, tokenizer, input_text):
# Giriş metnini tokenize et ve tensor'a çevir
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
# Model ile tahmin yap
with torch.no_grad():
outputs = model(**inputs)
# Logits'leri al ve tahmin yap
logits = outputs.logits
prediction = torch.argmax(logits, dim=-1).item() # 0: yanlış, 1: doğru
return prediction
def explain_roberta_prediction(model, tokenizer, input_text):
# Tokenize et
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
# Model ile tahmin yap
with torch.no_grad():
outputs = model(**inputs)
# Logits'leri al
logits = outputs.logits
probabilities = torch.softmax(logits, dim=-1).cpu().numpy()[0]
# Tahmin sonucunu ve olasılıkları elde et
predicted_class = torch.argmax(logits, dim=-1).item()
result = "Doğru" if predicted_class == 1 else "Yanlış"
explanation = f"Modelin tahmini: {result} (Olasılık: {probabilities[predicted_class]:.2f})\n"
# Önemli kelimeleri çıkarma (örnek olarak)
tokenized_input = tokenizer.tokenize(tokenizer.decode(inputs['input_ids'][0]))
important_tokens = tokenized_input[:10] # İlk 10 tokeni al
explanation += "Modelin kararı aşağıdaki anahtar kelimelere dayanıyor:\n" + ', '.join(important_tokens)
return explanation
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True, device=device)
def score_and_visualize(text):
prediction = pipe([text])
print(prediction[0])
explainer = shap.Explainer(pipe)
shap_values = explainer([text])
shap.plots.text(shap_values)
api_key = '764e3b45715b449a8aedb8cd8018dfed'
def fetch_news_from_api(api_key, query, page_size=100):
url = f'https://newsapi.org/v2/everything?q={query}&pageSize={page_size}&apiKey={api_key}'
response = requests.get(url)
# API yanıtını kontrol et
if response.status_code == 200:
articles = response.json().get('articles', [])
return articles
else:
print(f"Error: {response}")
return []
def extract_keywords(text, top_n=5):
# 1. Metni temizleme
text = re.sub(r'[^\w\s]', '', text.lower()) # Noktalama işaretlerini kaldırma ve küçük harfe çevirme
# 2. Tokenizasyon
words = text.split()
# 3. Durak kelimeleri kaldırma
keywords = [word for word in words if word not in ENGLISH_STOP_WORDS]
# 4. Anahtar kelimeleri sayma ve en sık geçenleri alma
keyword_counts = Counter(keywords)
most_common_keywords = keyword_counts.most_common(top_n)
return [keyword for keyword, _ in most_common_keywords]
kw_model = KeyBERT('all-mpnet-base-v2') # SBERT kullanarak modeli yükleyin
def extract_keywords_keybert(text, num_keywords=2):
keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=num_keywords)
return [kw[0] for kw in keywords]
def filter_trusted_sources(articles, trusted_sources):
trusted_articles = []
for article in articles:
source_name = article['source']['name'].lower() # Kaynağı küçük harfe çevir
if any(trusted_source in source_name for trusted_source in trusted_sources):
trusted_articles.append(article)
return trusted_articles
def fetch_news_content(link):
response = requests.get(link)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
# Başlık ve içerik çıkarma
title = soup.find('h1').get_text() if soup.find('h1') else "Başlık bulunamadı"
content = ' '.join([p.get_text() for p in soup.find_all('p')])
return title, content
else:
print(f"Error fetching content: {response.status_code}")
return "", ""
def compare_with_thrusted(input_text, bbc_articles):
texts = [input_text] + [article[1] for article in bbc_articles]
vectorizer = TfidfVectorizer().fit_transform(texts)
vectors = vectorizer.toarray()
similarities = cosine_similarity(vectors[0:1], vectors[1:]).flatten()
return similarities
from sentence_transformers import SentenceTransformer, util
def sbert_similarity(input_text, bbc_articles):
# SBERT modelini yükleyin
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
# Kullanıcı metni ve internetten çekilen metinleri vektörize edin
input_embedding = model.encode(input_text, convert_to_tensor=True)
news_embeddings = model.encode([news[1] for news in bbc_articles], convert_to_tensor=True)
# Benzerlikleri hesaplayın
cosine_scores = util.pytorch_cos_sim(input_embedding, news_embeddings)
# En yüksek benzerlik skoru ve karşılık gelen haber
max_score, most_similar_news = cosine_scores.max(), bbc_articles[cosine_scores.argmax().item()]
print(f"En benzer haber skoru: {max_score:.2f}")
def translate_text(text, source_lang='tr', target_lang='en'):
translated = DeeplTranslator(api_key=api_key_deepl, source=source_lang, target=target_lang).translate(text)
return translated
# Türkçe modelini yükle
# model_tr_name = "dbmdz/bert-base-turkish-cased" # Buraya Türkçe modelinizi yazın
# model_tr = AutoModelForSequenceClassification.from_pretrained(model_tr_name)
# tokenizer_tr = AutoTokenizer.from_pretrained(model_tr_name)
# classifier_tr = pipeline("sentiment-analysis", model=model_tr, tokenizer=tokenizer_tr)
tokenizer_tr = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
model_tr = AutoModelForSequenceClassification.from_pretrained("OsBaran/Bert-Classification-Model-Tr-3", num_labels=2)
def trModelPredictAlgo(input_news):
inputs = tokenizer(input_news, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {key: value.to(device) for key, value in inputs.items()}
# Modelin tahmin yapması
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
# Softmax uygulama (olasılık hesaplama)
probabilities = F.softmax(logits, dim=-1)
# En yüksek olasılığı ve sınıfı bulma
predicted_class = torch.argmax(probabilities, dim=-1)
predicted_probability = probabilities[0, predicted_class].item()
sonuc = 0
if(predicted_class.item()==0):
sonuc = "Yanlış"
else :
sonuc = "Doğru"
# Sonucu yazdırma
print(f"Predicted class: {predicted_class.item()}")
print(f"Prediction probability: {predicted_probability * 100:.2f}%")
return f"Dogruluk tahmini: {sonuc}" + f"Tahmin olasılığı: {predicted_probability * 100:.2f}%"
def enModelPredictAlgo(input_news):
keywords = extract_keywords_keybert(input_news)
search_query = ' '.join(keywords)
news_articles = fetch_news_from_api(api_key, search_query)
trusted_sources = [
"bbc news",
"cnn",
"reuters.com",
"theguardian.com",
"time",
# Diğer güvenilir kaynaklar...
]
trusted_articles = filter_trusted_sources(news_articles, trusted_sources)
# # Sonuçları yazdır
trusted_articles_urls = []
for i in trusted_articles:
trusted_articles_urls.append(i["url"])
if trusted_articles:
print(f"\nGüvenilir kaynaklardan bulunan haberler:\n")
print(trusted_articles_urls)
bbc_articles = [fetch_news_content(link) for link in trusted_articles_urls]
similarities = compare_with_thrusted(input_news, bbc_articles)
sbert_similarity(input_news, bbc_articles)
print(similarities)
max_similarity = max(similarities)
threshold = 0.8
if max_similarity > threshold:
print(f"Sonuç: Doğru (Benzerlik: {max_similarity:.2f})")
else:
# Benzerlik bulunmazsa tahmin algoritmasını kullanın ve açıklama sağlayın
prediction = predict_with_roberta(model, tokenizer, input_news)
explanation = explain_roberta_prediction(model, tokenizer, input_news)
# Tahmin sonucunu yazdır
# result = "Doğru" if prediction == 1 else "Yanlış"
# print(f"Haberin durumu: {result}")
print(explanation)
return explanation
else:
print("Güvenilir kaynaklardan hiç haber bulunamadı.")
prediction = predict_with_roberta(model, tokenizer, input_news)
explanation = explain_roberta_prediction(model, tokenizer, input_news)
# Tahmin sonucunu yazdır
result = "Doğru" if prediction == 1 else "Yanlış"
print(f"Haberin durumu: {result}")
print("Haberin açıklaması:")
print(explanation)
return explanation
# Gradio ile API oluştur
def predict(input_news, language):
if language == "en":
result = enModelPredictAlgo(input_news=input_news)
return {"Sonuç": result}
elif language == "tr":
input_news_en= translate_text(input_news)
result1 = enModelPredictAlgo(input_news_en)
result2= trModelPredictAlgo(input_news=input_news)
return {"İngilizce Model Sonucu": result1, "Türkçe Model Sonucu": result2}
else:
result = {"error": "Unsupported language"}
# return result
# Arayüz
gr.Interface(fn=predict,
inputs=[gr.Textbox(label="Text Input"), gr.Dropdown(["en", "tr"], label="Language")],
outputs="json").launch()
|