Spaces:
Runtime error
Runtime error
File size: 4,324 Bytes
bda7c4e 6b2b26c 00b7e99 bda7c4e 00b7e99 bda7c4e 00b7e99 6b2b26c bda7c4e 4a04f21 6b2b26c bda7c4e 6b2b26c 1c32a9e bda7c4e 1c32a9e bda7c4e 1c32a9e bda7c4e 4a04f21 bda7c4e 4a04f21 00b7e99 6b2b26c bda7c4e 6b2b26c bda7c4e 6b2b26c 00b7e99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
import pandas as pd
import io
import re
# Constants
GITHUB_URL = "https://github.com/Sartify/STEL"
POSSIBLE_NON_BENCHMARK_COLS = ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka", "Dimension", "Average"]
def extract_table_from_markdown(markdown_text, table_start):
"""Extract table content from markdown text."""
lines = markdown_text.split('\n')
table_content = []
capture = False
for line in lines:
if line.startswith(table_start):
capture = True
if capture and line.strip() == '':
break
if capture:
table_content.append(line)
return '\n'.join(table_content)
def markdown_table_to_df(table_content):
"""Convert markdown table to pandas DataFrame."""
# Split the table content into lines
lines = table_content.split('\n')
# Extract headers
headers = [h.strip() for h in lines[0].split('|') if h.strip()]
# Extract data
data = []
for line in lines[2:]: # Skip the header separator line
row = [cell.strip() for cell in line.split('|') if cell.strip()]
if row:
data.append(row)
# Create DataFrame
df = pd.DataFrame(data, columns=headers)
# Convert numeric columns to float
for col in df.columns:
if df[col].dtype == object:
try:
df[col] = df[col].astype(float)
except ValueError:
pass # Keep as string if conversion fails
return df
def setup_page():
"""Set up the Streamlit page."""
st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide")
st.title("⚡ Swahili Text Embeddings Leaderboard (STEL)")
st.image("https://raw.githubusercontent.com/username/repo/main/files/STEL.jpg", width=300)
# def display_leaderboard(df):
# """Display the leaderboard."""
# st.header("📊 Leaderboard")
# # Determine which non-benchmark columns are present
# present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns]
# # Add filters
# columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols]
# selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter)
# # Filter dataframe
# df_display = df[present_non_benchmark_cols + selected_columns]
# # Display dataframe
# st.dataframe(df_display.style.format("{:.4f}", subset=selected_columns))
# # Download buttons
# csv = df_display.to_csv(index=False)
# st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
def display_leaderboard(df):
"""Display the leaderboard."""
st.header("📊 Leaderboard")
# Determine which non-benchmark columns are present
present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns]
# Add filters
columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols]
selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter)
# Filter dataframe
df_display = df[present_non_benchmark_cols + selected_columns]
# Display dataframe
st.dataframe(df_display.style.format("{:.4f}", subset=[col for col in df_display.columns if df_display[col].dtype == 'float64']))
# Download buttons
csv = df_display.to_csv(index=False)
st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
# ... (rest of the code remains the same)
def main():
setup_page()
# Read README content
with open("README.md", "r") as f:
readme_content = f.read()
# Extract and process leaderboard table
leaderboard_table = extract_table_from_markdown(readme_content, "| Model Name")
df_leaderboard = markdown_table_to_df(leaderboard_table)
display_leaderboard(df_leaderboard)
display_evaluation()
display_contribution()
display_sponsorship()
st.markdown("---")
st.markdown("Thank you for being part of this effort to advance Swahili language technologies!")
if __name__ == "__main__":
main() |