Spaces:

omar0scarf
/

arabic-analyzer

Sleeping

App Files Files Community

arabic-analyzer / app.py

omar0scarf

إزالة خيار share=True لأنه غير مدعوم في Hugging Face Spaces

97c4c9c about 1 month ago

raw

history blame contribute delete

6.33 kB

	import gradio as gr
	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	AutoModelForSequenceClassification,
	pipeline
	)
	from datasets import load_dataset
	import numpy as np
	import re
	import os

	# تعيين توكن Hugging Face من متغير بيئي
	HF_TOKEN = os.getenv('HF_TOKEN')
	if not HF_TOKEN:
	raise ValueError("يرجى تعيين متغير البيئة HF_TOKEN")

	# تهيئة النماذج
	print("جاري تهيئة النماذج...")
	base_model_name = "aubmindlab/aragpt2-base"
	sentiment_model_name = "CAMeL-Lab/bert-base-arabic-camelbert-msa"

	# تهيئة المعالجات
	tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=HF_TOKEN)
	model = AutoModelForCausalLM.from_pretrained(base_model_name, use_auth_token=HF_TOKEN)

	# إعداد معالجات النصوص
	text_generator = pipeline(
	'text-generation',
	model=model,
	tokenizer=tokenizer,
	device=0 if torch.cuda.is_available() else -1
	)

	sentiment_analyzer = pipeline(
	'sentiment-analysis',
	model=sentiment_model_name,
	tokenizer=AutoTokenizer.from_pretrained(sentiment_model_name),
	device=0 if torch.cuda.is_available() else -1
	)

	def clean_arabic_text(text):
	# إزالة الأسطر الجديدة والمسافات الزائدة
	text = ' '.join(text.split())
	# إزالة التشكيل
	text = re.sub(r'[\u064B-\u065F\u0670]', '', text)
	# إزالة الرموز غير المرغوب فيها
	text = re.sub(r'[^\u0600-\u06FF\s]', ' ', text)
	# توحيد الألف والياء
	text = re.sub('[إأآا]', 'ا', text)
	text = re.sub('[ىي]', 'ي', text)
	# إزالة التكرار
	text = re.sub(r'(.)\1+', r'\1', text)
	return text.strip()

	def analyze_sentiment(text):
	try:
	result = sentiment_analyzer(text)[0]
	if result['label'] == 'positive':
	return "إيجابي", result['score']
	elif result['label'] == 'negative':
	return "سلبي", result['score']
	else:
	return "محايد", result['score']
	except:
	return "محايد", 0.5

	def summarize_text(text, max_length=100):
	try:
	summary = text_generator(
	f"لخص النص التالي: {text}",
	max_length=max_length,
	num_return_sequences=1,
	no_repeat_ngram_size=2,
	do_sample=True,
	top_k=50,
	top_p=0.95,
	temperature=0.7
	)[0]['generated_text']
	return summary
	except:
	return "لم نتمكن من تلخيص النص"

	def suggest_response(text):
	try:
	response = text_generator(
	f"اقترح رداً مناسباً على النص التالي: {text}",
	max_length=150,
	num_return_sequences=1,
	no_repeat_ngram_size=2,
	do_sample=True,
	top_k=50,
	top_p=0.95,
	temperature=0.7
	)[0]['generated_text']
	return response
	except:
	return "لم نتمكن من توليد رد مناسب"

	def detect_topics(text):
	topics = {
	"سياسة": ["حكومة", "وزير", "برلمان", "رئيس", "انتخابات"],
	"اقتصاد": ["اقتصاد", "سوق", "بورصة", "أسهم", "استثمار"],
	"رياضة": ["كرة", "مباراة", "فريق", "لاعب", "بطولة"],
	"تكنولوجيا": ["تقنية", "إنترنت", "تطبيق", "برمجة", "ذكاء اصطناعي"],
	"ثقافة": ["فن", "أدب", "مسرح", "سينما", "موسيقى"]
	}

	text_lower = text.lower()
	detected = []
	for topic, keywords in topics.items():
	if any(keyword in text_lower for keyword in keywords):
	detected.append(topic)

	return detected if detected else ["عام"]

	def analyze_text(text, include_summary=True, include_response=True):
	if not text.strip():
	return "الرجاء إدخال نص للتحليل"

	try:
	# تنظيف النص
	cleaned_text = clean_arabic_text(text)

	# تحليل المشاعر
	sentiment, confidence = analyze_sentiment(cleaned_text)

	# تحديد المواضيع
	topics = detect_topics(cleaned_text)

	# إنشاء التقرير
	report = f"""🔍 تحليل النص:

	📝 النص الأصلي:
	{text}

	📊 التحليل الأساسي:
	• المشاعر: {sentiment} (الثقة: {confidence:.1%})
	• المواضيع: {', '.join(topics)}
	"""

	# إضافة التلخيص إذا مطلوب
	if include_summary:
	summary = summarize_text(cleaned_text)
	report += f"\n✨ ملخص النص:\n{summary}"

	# إضافة الرد المقترح إذا مطلوب
	if include_response:
	response = suggest_response(cleaned_text)
	report += f"\n💡 الرد المقترح:\n{response}"

	return report

	except Exception as e:
	return f"⚠️ حدث خطأ أثناء التحليل: {str(e)}"

	# إنشاء واجهة المستخدم
	demo = gr.Interface(
	fn=analyze_text,
	inputs=[
	gr.Textbox(
	label="أدخل النص هنا",
	placeholder="اكتب نصاً عربياً هنا للتحليل...",
	lines=5
	),
	gr.Checkbox(
	label="تضمين ملخص للنص",
	value=True
	),
	gr.Checkbox(
	label="تضمين رد مقترح",
	value=True
	)
	],
	outputs=gr.Textbox(label="نتائج التحليل", lines=12),
	title="🤖 المحلل الذكي للنصوص العربية",
	description="""نموذج متقدم لتحليل النصوص العربية وتوليد الردود
	✨ المميزات:
	• تحليل المشاعر في النص
	• تحديد المواضيع الرئيسية
	• تلخيص النص
	• اقتراح ردود مناسبة
	• معالجة متقدمة للغة العربية
	""",
	theme="default"
	)

	# تشغيل الواجهة
	if __name__ == "__main__":
	print("جاري تشغيل النموذج...")
	demo.launch()