import streamlit as st import pandas as pd import io import re # Constants GITHUB_URL = "https://github.com/Sartify/STEL" POSSIBLE_NON_BENCHMARK_COLS = ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka", "Dimension", "Average"] def extract_table_from_markdown(markdown_text, table_start): """Extract table content from markdown text.""" lines = markdown_text.split('\n') table_content = [] capture = False for line in lines: if line.startswith(table_start): capture = True if capture and line.strip() == '': break if capture: table_content.append(line) return '\n'.join(table_content) def markdown_table_to_df(table_content): """Convert markdown table to pandas DataFrame.""" # Split the table content into lines lines = table_content.split('\n') # Extract headers headers = [h.strip() for h in lines[0].split('|') if h.strip()] # Extract data data = [] for line in lines[2:]: # Skip the header separator line row = [cell.strip() for cell in line.split('|') if cell.strip()] if row: data.append(row) # Create DataFrame df = pd.DataFrame(data, columns=headers) # Convert numeric columns to float for col in df.columns: if df[col].dtype == object: try: df[col] = df[col].astype(float) except ValueError: pass # Keep as string if conversion fails return df def setup_page(): """Set up the Streamlit page.""" st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide") st.title("⚡ Swahili Text Embeddings Leaderboard (STEL)") st.image("https://raw.githubusercontent.com/username/repo/main/files/STEL.jpg", width=300) # def display_leaderboard(df): # """Display the leaderboard.""" # st.header("📊 Leaderboard") # # Determine which non-benchmark columns are present # present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns] # # Add filters # columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols] # selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter) # # Filter dataframe # df_display = df[present_non_benchmark_cols + selected_columns] # # Display dataframe # st.dataframe(df_display.style.format("{:.4f}", subset=selected_columns)) # # Download buttons # csv = df_display.to_csv(index=False) # st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv") def display_leaderboard(df): """Display the leaderboard.""" st.header("📊 Leaderboard") # Determine which non-benchmark columns are present present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns] # Add filters columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols] selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter) # Filter dataframe df_display = df[present_non_benchmark_cols + selected_columns] # Display dataframe st.dataframe(df_display.style.format("{:.4f}", subset=[col for col in df_display.columns if df_display[col].dtype == 'float64'])) # Download buttons csv = df_display.to_csv(index=False) st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv") # ... (rest of the code remains the same) def main(): setup_page() # Read README content with open("README.md", "r") as f: readme_content = f.read() # Extract and process leaderboard table leaderboard_table = extract_table_from_markdown(readme_content, "| Model Name") df_leaderboard = markdown_table_to_df(leaderboard_table) display_leaderboard(df_leaderboard) display_evaluation() display_contribution() display_sponsorship() st.markdown("---") st.markdown("Thank you for being part of this effort to advance Swahili language technologies!") if __name__ == "__main__": main()