Spaces:

sartifyllc
/

Swahili-Text-Embeddings-Leaderboard

Running

App Files Files Community

Swahili-Text-Embeddings-Leaderboard / app.py

Mollel

Update app.py

4a04f21 verified 7 months ago

raw

history blame

4.32 kB

	import streamlit as st
	import pandas as pd
	import io
	import re


	# Constants
	GITHUB_URL = "https://github.com/Sartify/STEL"
	POSSIBLE_NON_BENCHMARK_COLS = ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka", "Dimension", "Average"]

	def extract_table_from_markdown(markdown_text, table_start):
	"""Extract table content from markdown text."""
	lines = markdown_text.split('\n')
	table_content = []
	capture = False
	for line in lines:
	if line.startswith(table_start):
	capture = True
	if capture and line.strip() == '':
	break
	if capture:
	table_content.append(line)
	return '\n'.join(table_content)

	def markdown_table_to_df(table_content):
	"""Convert markdown table to pandas DataFrame."""
	# Split the table content into lines
	lines = table_content.split('\n')

	# Extract headers
	headers = [h.strip() for h in lines[0].split('\|') if h.strip()]

	# Extract data
	data = []
	for line in lines[2:]: # Skip the header separator line
	row = [cell.strip() for cell in line.split('\|') if cell.strip()]
	if row:
	data.append(row)

	# Create DataFrame
	df = pd.DataFrame(data, columns=headers)

	# Convert numeric columns to float
	for col in df.columns:
	if df[col].dtype == object:
	try:
	df[col] = df[col].astype(float)
	except ValueError:
	pass # Keep as string if conversion fails

	return df

	def setup_page():
	"""Set up the Streamlit page."""
	st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide")
	st.title("⚡ Swahili Text Embeddings Leaderboard (STEL)")
	st.image("https://raw.githubusercontent.com/username/repo/main/files/STEL.jpg", width=300)

	# def display_leaderboard(df):
	# """Display the leaderboard."""
	# st.header("📊 Leaderboard")

	# # Determine which non-benchmark columns are present
	# present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns]

	# # Add filters
	# columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols]
	# selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter)

	# # Filter dataframe
	# df_display = df[present_non_benchmark_cols + selected_columns]

	# # Display dataframe
	# st.dataframe(df_display.style.format("{:.4f}", subset=selected_columns))

	# # Download buttons
	# csv = df_display.to_csv(index=False)
	# st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")

	def display_leaderboard(df):
	"""Display the leaderboard."""
	st.header("📊 Leaderboard")

	# Determine which non-benchmark columns are present
	present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns]

	# Add filters
	columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols]
	selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter)

	# Filter dataframe
	df_display = df[present_non_benchmark_cols + selected_columns]

	# Display dataframe
	st.dataframe(df_display.style.format("{:.4f}", subset=[col for col in df_display.columns if df_display[col].dtype == 'float64']))

	# Download buttons
	csv = df_display.to_csv(index=False)
	st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")


	# ... (rest of the code remains the same)

	def main():
	setup_page()

	# Read README content
	with open("README.md", "r") as f:
	readme_content = f.read()

	# Extract and process leaderboard table
	leaderboard_table = extract_table_from_markdown(readme_content, "\| Model Name")
	df_leaderboard = markdown_table_to_df(leaderboard_table)

	display_leaderboard(df_leaderboard)
	display_evaluation()
	display_contribution()
	display_sponsorship()

	st.markdown("---")
	st.markdown("Thank you for being part of this effort to advance Swahili language technologies!")

	if __name__ == "__main__":
	main()