import gradio as gr
from pyvis.network import Network
import networkx as nx
import numpy as np
import pandas as pd
import os
from datasets import load_dataset
from datasets import Features
from datasets import Value
from datasets import Dataset
import matplotlib.pyplot as plt
import re
from collections import defaultdict
from huggingface_hub import hf_hub_download
import json
pattern = r'"(.*?)"'
# this pattern captures anything in a double quotes.
Secret_token = os.getenv('HF_token')
dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
edge_info = dataset.to_pandas()
features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'), 'Generation': Value('string')})
narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features )
narrator_bios = narrator_bios['train'].to_pandas()
narrator_bios.loc[49845, 'Narrator Rank'] = 'رسول الله'
narrator_bios.loc[49845, 'Number of Narrations'] = 0
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
narrator_bios.loc[49845, 'Number of Narrations'] = 327512
# 8125 Narrators have no Generation, listed in dataset as None
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
# Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
# Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
taraf_max = np.max(matn_info['taraf_ID'].unique())
isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))
cmap = plt.colormaps['cool']
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas()
matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = pd.merge(matn_info, books, on='Book_ID')
# Preprocess narrator_bios into a dictionary
narrator_info = narrator_bios.set_index('Rawi ID').to_dict(orient='index')
# Download and read a file
file_path = hf_hub_download(
repo_id="FDSRashid/hadith_info", # read in fast lookup data structure
filename="hadith_lookup.json",
repo_type="dataset",
token=Secret_token,
)
with open(file_path, 'r') as f:
hadith_lookup_dict = json.load(f)
HADITH_LOOKUP = defaultdict(list, hadith_lookup_dict)
def value_to_hex(value):
rgba_color = cmap(value)
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
def get_node_info(node):
node = int(node) # Ensure node is an integer
info = narrator_info.get(node, {})
student_narrations = info.get('Number of Narrations', 1)
student_gen = info.get('Generation', -1)
student_rank = info.get('Narrator Rank', 'فلان')
node_name = info.get('Famous Name', 'فلان')
return info, student_narrations, student_gen, student_rank, node_name
def lookup_hadith(taraf_hadith, hadith_lookup):
"""
Returns a list of unique elements from the hadith_lookup for the given taraf_hadith.
Parameters:
taraf_hadith (str or list of str): A string or list of strings to look up.
hadith_lookup (defaultdict): A defaultdict containing the hadith data.
Returns:
list: A list of unique elements from the lookup results.
"""
# Ensure taraf_hadith is always a list
if isinstance(taraf_hadith, str):
taraf_hadith = [taraf_hadith]
# Create a set to accumulate unique elements
unique_elements = {elem for key in taraf_hadith for elem in hadith_lookup[key]}
# Convert the set to a list for consistency
return list(unique_elements)
def visualize_isnad(taraf_num, yaxis):
# Precompute filtered dataframes
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
taraf_hadith = taraf['bookid_hadithid'].to_list()
# Precompute hadiths where taraf_num exists
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
isnad_hadith = isnad_info[hadith_cleaned]
lst_hadith = []
for i, hadith_parts in enumerate(taraf_hadith):
# look up hadith for each bookid_hadithid
isnad_hadith1 = isnad_info.iloc[lookup_hadith(taraf_hadith[i], HADITH_LOOKUP)][['Source', 'Destination']]
# Create graph and find end nodes
G = nx.from_pandas_edgelist(isnad_hadith1, source='Source', target='Destination', create_using=nx.DiGraph())
nodes = [int(n) for n, d in G.out_degree() if d == 0]
if nodes:
# Batch fetch data from narrator_bios for efficiency
bio_data = narrator_bios[narrator_bios['Rawi ID'].isin(nodes)]
for n in nodes:
gen_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Generation'].squeeze()
gen_node = gen_node if pd.notna(gen_node) else -1
name_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Famous Name'].squeeze()
name_node = name_node if pd.notna(name_node) else 'فلان'
# Append result for each node
lst_hadith.append([
taraf.iloc[i]['matn'],
gen_node,
name_node,
taraf.iloc[i]['Book_Name'],
taraf.iloc[i]['Author'],
taraf.iloc[i]['Hadith Number'],
n,
i
])
# Convert to DataFrame
df = pd.DataFrame(lst_hadith, columns=['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
isnad_hadith[['Source', 'Destination']] = isnad_hadith[['Source', 'Destination']].astype(int)
# Merge isnad_hadith with narrator_bios for Teacher and Student
isnad_hadith = isnad_hadith.merge(narrator_bios[['Rawi ID', 'Famous Name']], left_on='Source', right_on='Rawi ID', how='left').rename(columns={'Famous Name': 'Teacher'})
isnad_hadith = isnad_hadith.merge(narrator_bios[['Rawi ID', 'Famous Name']], left_on='Destination', right_on='Rawi ID', how='left').rename(columns={'Famous Name': 'Student'})
isnad_hadith[['Source', 'Destination']] = isnad_hadith[['Source', 'Destination']].astype(str)
# Fill missing values with 'فلان'
# isnad_hadith['Teacher'].fillna('فلان', inplace=True)
# isnad_hadith['Student'].fillna('فلان', inplace=True)
end_nodes = df['End Transmitter ID'].tolist()
G = nx.from_pandas_edgelist(isnad_hadith, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
isnad_pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
x_stretch = 4
y_stretch = 4
net = Network(directed =True, select_menu=True, cdn_resources='remote')
# Precompute end_matn_info for each end node
end_node_data = df.groupby('End Transmitter ID').apply(lambda x: " ".join(x["Hadith Number"].astype("string"))).to_dict()
# Loop over isnad_pos
for node, pos in isnad_pos.items():
node_info, student_narrations, student_gen, student_rank, node_name = get_node_info(node)
label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen}'
size = 50
font_color = 'red'
if node == '99999':
label = f'{node_name} \n ID: {node} - Gen {student_gen}'
size = 70
font_color = 'black'
elif int(node) in end_nodes:
hadith_numbers = end_node_data.get(int(node), '')
label += f' \n Hadith {hadith_numbers}'
net.add_node(node, font={'size': 30, 'color': font_color}, color=value_to_hex(student_narrations), label=label, x=pos[0] * x_stretch, y=-pos[1] * y_stretch, size=size)
# Add edges efficiently
edge_data = isnad_hadith[['Source', 'Destination', f'{yaxis} Count']].values
for source, target, count in edge_data:
net.add_edge(source, target, color=value_to_hex(int(count)), label=f"{count}")
net.toggle_physics(False)
html = net.generate_html()
html = html.replace("'", "\"")
df = df.rename(columns = {'Generation': 'Gen.', 'Book Hadith Number': 'Hdth Num', 'End Transmitter ID': 'End Narrator ID', 'Hadith Number': 'Index', 'Book_Name': 'Book', 'Name':'Final Narrator'})
return f"""""" , df.drop('Hdth Num', axis=1)
def visualize_subTaraf(taraf_num, hadith_str, yaxis):
hadith_list = hadith_str.split(',')
hadith_list = [hadith.strip() for hadith in hadith_list]
hadiths = np.array([], dtype=int)
for hadith in hadith_list:
if '-' in hadith:
if hadith.count('-') > 1:
#print('Please use only one Dash mark!')
raise gr.Error('Please use only one Dash mark!')
hadith_multi = hadith.strip().split('-')
if any([not had.isnumeric() for had in hadith_multi]):
#print('Invalid Begining')
raise gr.Error('Invalid Begining')
elif len(hadith_multi) != 2:
#print('Two numbers for a range of Hadith numbers please!')
raise gr.Error('Two numbers for a range of Hadith numbers please!')
hadith_multi = [int(had) for had in hadith_multi]
hadiths = np.append(hadiths, np.arange(hadith_multi[0], hadith_multi[1] +1))
elif hadith.isnumeric():
hadiths = np.append(hadiths, int(hadith))
else:
#print('Invalid Data format!')
raise gr.Error("Invalid Data format!")
hadiths= np.unique(hadiths)
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
num_hadith = taraf.shape[0]
if np.max(hadiths) > num_hadith:
raise gr.Error(f'Hadith index outside of range. Total Number of Hadith in this Taraf: {num_hadith}')
taraf['Index'] = np.arange(num_hadith)
sub_taraf = taraf[taraf['Index'].isin(hadiths)]
isnad_hadith = isnad_info.iloc[lookup_hadith(sub_taraf['bookid_hadithid'].to_list(), HADITH_LOOKUP)]
isnad_hadith[['Source', 'Destination']] = isnad_hadith[['Source', 'Destination']].astype(int)
# Merge isnad_hadith with narrator_bios for Teacher and Student
isnad_hadith = isnad_hadith.merge(narrator_bios[['Rawi ID', 'Famous Name']], left_on='Source', right_on='Rawi ID', how='left').rename(columns={'Famous Name': 'Teacher'})
isnad_hadith = isnad_hadith.merge(narrator_bios[['Rawi ID', 'Famous Name']], left_on='Destination', right_on='Rawi ID', how='left').rename(columns={'Famous Name': 'Student'})
isnad_hadith[['Source', 'Destination']] = isnad_hadith[['Source', 'Destination']].astype(str)
# isnad_hadith['Teacher'].fillna('فلان', inplace=True)
# isnad_hadith['Student'].fillna('فلان', inplace=True)
G = nx.from_pandas_edgelist(isnad_hadith, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
isnad_pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
x_stretch = 4
y_stretch = 4
net = Network(directed =True, select_menu=True, cdn_resources='remote')
for node, pos in isnad_pos.items():
node_info,student_narrations,student_gen, student_rank, node_name = get_node_info(node)
if node == '99999':
net.add_node(node, font = {'size':50, 'color': 'black'}, color = '#000000', label = f'{node_name} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 70)
else:
net.add_node(node, font = {'size':30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 50)
for _, row in isnad_hadith.iterrows():
source = row['Source']
target = row['Destination']
net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
net.toggle_physics(False)
html = net.generate_html()
html = html.replace("'", "\"")
return f"""""", sub_taraf[['matn', 'Book_Name', 'Author', 'Book_ID', 'Hadith Number']]
def taraf_booknum(taraf_num):
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
num_hadith = taraf.shape[0]
taraf['Index'] = np.arange(num_hadith)
return taraf[['matn', 'Book_ID', 'Hadith Number', 'Book_Name', 'Author', 'Index']]
def visualize_hadith_isnad(df, yaxis):
df['bookid_hadithid'] = df['Book_ID'].astype(str) + '_' + df['Hadith Number'].astype(str)
hadith = matn_info[matn_info['bookid_hadithid'].isin(df['bookid_hadithid'])]
taraf_hadith = df['bookid_hadithid'].to_list()
isnad_hadith = isnad_info.iloc[lookup_hadith(taraf_hadith, HADITH_LOOKUP)]
isnad_hadith[['Source', 'Destination']] = isnad_hadith[['Source', 'Destination']].astype(int)
# Merge isnad_hadith with narrator_bios for Teacher and Student
isnad_hadith = isnad_hadith.merge(narrator_bios[['Rawi ID', 'Famous Name']], left_on='Source', right_on='Rawi ID', how='left').rename(columns={'Famous Name': 'Teacher'})
isnad_hadith = isnad_hadith.merge(narrator_bios[['Rawi ID', 'Famous Name']], left_on='Destination', right_on='Rawi ID', how='left').rename(columns={'Famous Name': 'Student'})
isnad_hadith[['Source', 'Destination']] = isnad_hadith[['Source', 'Destination']].astype(str)
# isnad_hadith['Teacher'].fillna('فلان', inplace=True)
# isnad_hadith['Student'].fillna('فلان', inplace=True)
G = nx.from_pandas_edgelist(isnad_hadith, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
isnad_pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
x_stretch = 4
y_stretch = 4
net = Network(directed =True, select_menu=True, cdn_resources='remote')
for node, pos in isnad_pos.items():
node_info,student_narrations,student_gen, student_rank, node_name = get_node_info(node)
if node == '99999':
net.add_node(node, font = {'size':50, 'color': 'black'}, color = '#000000', label = f'{node_name} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 70)
else:
net.add_node(node, font = {'size':30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 50)
for _, row in isnad_hadith.iterrows():
source = row['Source']
target = row['Destination']
net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
net.toggle_physics(False)
html = net.generate_html()
html = html.replace("'", "\"")
return f"""""" , hadith[['matn', 'Book_ID', 'Hadith Number', 'Book_Name', 'Author', 'taraf_ID']]
def visualize_narrator_taraf(taraf_num, narrator, yaxis):
taraf = matn_info[matn_info['taraf_ID'] == taraf_num].copy()
taraf['Index'] = np.arange(len(taraf))
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
isnad_hadith = isnad_info[hadith_cleaned]
isnad_hadith[['Source', 'Destination']] = isnad_hadith[['Source', 'Destination']].astype(int)
# Merge isnad_hadith with narrator_bios for Teacher and Student
isnad_hadith = isnad_hadith.merge(narrator_bios[['Rawi ID', 'Famous Name']], left_on='Source', right_on='Rawi ID', how='left').rename(columns={'Famous Name': 'Teacher'})
isnad_hadith = isnad_hadith.merge(narrator_bios[['Rawi ID', 'Famous Name']], left_on='Destination', right_on='Rawi ID', how='left').rename(columns={'Famous Name': 'Student'})
isnad_hadith[['Source', 'Destination']] = isnad_hadith[['Source', 'Destination']].astype(str)
taraf_hadith = taraf['bookid_hadithid'].to_list()
# original graph of whole taraf
G = nx.from_pandas_edgelist(isnad_hadith, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
if narrator not in G.nodes():
raise gr.Error('Narrator not in Isnad of Taraf!')
matns_with_narrator = []
end_node = {}
# Process each hadith in taraf_hadith_split
for idx, split_hadith in enumerate(taraf_hadith):
isnad_hadith1 = isnad_info.iloc[lookup_hadith(taraf_hadith[idx], HADITH_LOOKUP)]
G1 = nx.from_pandas_edgelist(isnad_hadith1, source='Source', target='Destination', create_using=nx.DiGraph())
if narrator in G1.nodes:
matns_with_narrator.append(taraf_hadith[idx])
for node in (n for n, d in G1.out_degree() if d == 0):
end_node.setdefault(node, []).append(str(idx))
# Update the graph
list_of_lists = [hadith_lookup[i] for i in matns_with_narrator]
flattened = list(set([elem for sublist in list_of_lists for elem in sublist]))
isnad_hadith = isnad_info.iloc[flattened][['Source', 'Destination']]
G = nx.from_pandas_edgelist(isnad_hadith, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
isnad_pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
narrator_matn_info = taraf[taraf['bookid_hadithid'].isin(matns_with_narrator)]
narrator_matn_info['Subset Index'] = np.arange(len(narrator_matn_info))
# Visualization with pyvis
x_stretch = 4
y_stretch = 4
net = Network(directed =True, select_menu=True, cdn_resources='remote')
for node, pos in isnad_pos.items():
node_info, student_narrations, student_gen, student_rank, node_name = get_node_info(node)
label = f'{node_name} \n ID: {node} - Gen {student_gen}'
size = 70 if node == '99999' else 50
font_color = 'black' if node == '99999' else 'red'
hadiths = f" \n Hadiths {', '.join(end_node[node])}" if node in end_node else ''
net.add_node(node, font={'size': 30, 'color': font_color}, color=value_to_hex(student_narrations),
label=f"{label} {hadiths}", x=pos[0] * x_stretch, y=-pos[1] * y_stretch, size=size)
for edge in G.edges:
row = isnad_hadith[(isnad_hadith['Source'] == edge[0]) & (isnad_hadith['Destination'] == edge[1])].iloc[0]
net.add_edge(edge[0], edge[1], color=value_to_hex(int(row[f'{yaxis} Count'])), label=f"{row[f'{yaxis} Count']}")
net.toggle_physics(False)
html = net.generate_html()
html = html.replace("'", "\"")
return f"""""" , narrator_matn_info[['matn', 'Book_Name', 'Author', 'Book_ID', 'Hadith Number', 'Index', 'Subset Index']]
with gr.Blocks() as demo:
with gr.Tab("Whole Taraf Visualizer"):
Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn = gr.Button('Submit')
#
btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame(wrap=True, column_widths=[43, 8, 11,11,10,8, 9])])
with gr.Tab("Book and Hadith Number Retriever"):
taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn_num = gr.Button('Retrieve')
btn_num.click(fn=taraf_booknum, inputs = [taraf_num], outputs= [gr.DataFrame(wrap=True)])
with gr.Tab('Sub Taraf Visualizer'):
taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
hadith_str = gr.Textbox(label='Hadith Selection', info='Choose which range of Hadith you would like visualized from the Taraf (eg "1, 2, 4-7")')
btn_sub = gr.Button('Visualize')
btn_sub.click(fn=visualize_subTaraf, inputs = [taraf_num, hadith_str, Yaxis], outputs=[gr.HTML(), gr.DataFrame(wrap=True)])
with gr.Tab('Select Hadith Isnad Visualizer'):
yyaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
hadith_selection = gr.Dataframe(
headers=["Book_ID", "Hadith Number"],
datatype=["number", "number"],
row_count=5,
col_count=(2, "fixed"))
btn_hadith = gr.Button('Visualize')
btn_hadith.click(fn=visualize_hadith_isnad, inputs=[hadith_selection, yyaxis], outputs=[gr.HTML(), gr.DataFrame(wrap=True)])
with gr.Tab('Taraf Narrator Isnad Visualizer'):
Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
narr = gr.Textbox(label='Narrator', info='Choose a Narrator (Refer to full isnad from previous tab)')
btn_narr = gr.Button('Visualize')
btn_narr.click(fn=visualize_narrator_taraf, inputs=[taraf_number, narr, Yaxis], outputs=[gr.HTML(), gr.DataFrame(wrap=True)])
demo.launch()