Spaces:
Configuration error
Configuration error
omarelsayeed
commited on
Commit
•
8fdd1a4
1
Parent(s):
c5c6917
first commit
Browse files- app.py +51 -0
- aravec_model/New Text Document.txt +0 -0
- aravec_model/vocab/New Text Document.txt +0 -0
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import spacy
|
3 |
+
import gradio as gr
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
dataset = pd.read_excel('Dataset-Verse-by-Verse.xlsx')
|
8 |
+
dataset.rename(columns={'ArabicText': 'text'}, inplace=True)
|
9 |
+
nlp = spacy.load('aravec_model')
|
10 |
+
all_docs = [nlp(doc) for doc in dataset['text']]
|
11 |
+
|
12 |
+
def clean_text(text):
|
13 |
+
# remove tashkeel
|
14 |
+
text = re.sub('[~ًٌٍَُِّْ]', '', text)
|
15 |
+
text = re.sub('[ًٌٍَُِّْـ]', '', text)
|
16 |
+
# ozbot el alef
|
17 |
+
text = re.sub('إ', 'ا', text)
|
18 |
+
text = re.sub('أ', 'ا', text)
|
19 |
+
text = re.sub('آ', 'ا', text)
|
20 |
+
# remove longation
|
21 |
+
text = re.sub(r'(.)\1+', r'\1\1', text)
|
22 |
+
# remove extra spaces
|
23 |
+
text = re.sub(' +', ' ', text)
|
24 |
+
text = text.strip()
|
25 |
+
text = re.sub('[\s]+', ' ', text)
|
26 |
+
# remove punctuations
|
27 |
+
text = re.sub(r'[^\w\s]', '', text)
|
28 |
+
return text
|
29 |
+
|
30 |
+
def get_similar_sentences(text):
|
31 |
+
text = clean_text(text)
|
32 |
+
ref_sentence = nlp(text)
|
33 |
+
similar_sentences = []
|
34 |
+
for i,doc in enumerate(all_docs):
|
35 |
+
similar_sentences.append((doc, ref_sentence.similarity(doc) , i))
|
36 |
+
similar_sentences.sort(key=lambda x: x[1], reverse=True)
|
37 |
+
top_10 = similar_sentences[:10]
|
38 |
+
# add the surahnamearabic to text
|
39 |
+
return dict(zip([' [ ' + dataset['SurahNameArabic'][i] + ' ] ' + doc.text for doc, _, i in top_10], [similarity for _, similarity, _ in top_10]))
|
40 |
+
|
41 |
+
text_input = gr.inputs.Textbox(lines = 1 , label = "Enter a Quran Verse" )
|
42 |
+
|
43 |
+
label = gr.outputs.Label()
|
44 |
+
examples = ['الحمدلله رب العالمين',
|
45 |
+
'مثلهم كمثل الذي استوقد نارًا فلما أضاءت ما حوله ذهب الله بنورهم وتركهم في ظلماتٍ لا يبصرون',
|
46 |
+
'إن الذين كفروا سواء عليهم أأنذرتهم أم لم تنذرهم لا يؤمنون',
|
47 |
+
'ونادى أصحاب الجنة أصحاب النار أن قد وجدنا ما وعدنا ربنا حقا فهل وجدتم ما وعد ربكم حقا ۖ قالوا نعم ۚ فأذن مؤذن بينهم أن لعنة الله على الظالمين'
|
48 |
+
]
|
49 |
+
|
50 |
+
intf = gr.Interface(fn = get_similar_sentences , inputs = text_input , outputs = label , examples=examples )
|
51 |
+
intf.launch(share=True)
|
aravec_model/New Text Document.txt
ADDED
File without changes
|
aravec_model/vocab/New Text Document.txt
ADDED
File without changes
|