Mollel commited on
Commit
bda7c4e
1 Parent(s): 6b2b26c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -320
app.py CHANGED
@@ -1,334 +1,145 @@
 
 
1
  import io
2
  import re
3
- from collections.abc import Iterable
4
-
5
- import pandas as pd
6
- import streamlit as st
7
- from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype, is_numeric_dtype
8
 
9
- GITHUB_URL = "https://github.com/msamwelmollel/Swahili_LLM_Leaderboard"
10
- NON_BENCHMARK_COLS = ["Open?", "Publisher"]
11
-
12
-
13
- def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataFrame:
14
- """Extracts a table from a markdown text and formats it as a pandas DataFrame.
15
- Args:
16
- text (str): Markdown text containing a table.
17
- Returns:
18
- pd.DataFrame: Table as pandas DataFrame.
19
- """
20
- df = (
21
- pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
22
- .dropna(axis=1, how="all") # drop empty columns
23
- .iloc[1:] # drop first row which is the "----" separator of the original markdown table
24
- .sort_index(ascending=True)
25
- .apply(lambda x: x.str.strip() if x.dtype == "object" else x)
26
- .replace("", float("NaN"))
27
- .astype(float, errors="ignore")
28
- )
29
-
30
- # remove whitespace from column names and index
31
  df.columns = df.columns.str.strip()
32
- df.index = df.index.str.strip()
33
- df.index.name = df.index.name.strip()
34
-
35
- return df
36
-
37
-
38
- def extract_markdown_table_from_multiline(multiline: str, table_headline: str, next_headline_start: str = "#") -> str:
39
- """Extracts the markdown table from a multiline string.
40
- Args:
41
- multiline (str): content of README.md file.
42
- table_headline (str): Headline of the table in the README.md file.
43
- next_headline_start (str, optional): Start of the next headline. Defaults to "#".
44
- Returns:
45
- str: Markdown table.
46
- Raises:
47
- ValueError: If the table could not be found.
48
- """
49
- # extract everything between the table headline and the next headline
50
- table = []
51
- start = False
52
- for line in multiline.split("\n"):
53
- if line.startswith(table_headline):
54
- start = True
55
- elif line.startswith(next_headline_start):
56
- start = False
57
- elif start:
58
- table.append(line + "\n")
59
-
60
- if len(table) == 0:
61
- raise ValueError(f"Could not find table with headline '{table_headline}'")
62
-
63
- return "".join(table)
64
-
65
-
66
- def remove_markdown_links(text: str) -> str:
67
- """Modifies a markdown text to remove all markdown links.
68
- Example: [DISPLAY](LINK) to DISPLAY
69
- First find all markdown links with regex.
70
- Then replace them with: $1
71
- Args:
72
- text (str): Markdown text containing markdown links
73
- Returns:
74
- str: Markdown text without markdown links.
75
- """
76
-
77
- # find all markdown links
78
- markdown_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", text)
79
-
80
- # remove link keep display text
81
- for display, link in markdown_links:
82
- text = text.replace(f"[{display}]({link})", display)
83
-
84
- return text
85
-
86
-
87
- def filter_dataframe_by_row_and_columns(df: pd.DataFrame, ignore_columns: list[str] | None = None) -> pd.DataFrame:
88
- """
89
- Filter dataframe by the rows and columns to display.
90
- This does not select based on the values in the dataframe, but rather on the index and columns.
91
- Modified from https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
92
- Args:
93
- df (pd.DataFrame): Original dataframe
94
- ignore_columns (list[str], optional): Columns to ignore. Defaults to None.
95
- Returns:
96
- pd.DataFrame: Filtered dataframe
97
- """
98
- df = df.copy()
99
-
100
- if ignore_columns is None:
101
- ignore_columns = []
102
-
103
- modification_container = st.container()
104
-
105
- with modification_container:
106
- to_filter_index = st.multiselect("Filter by model:", sorted(df.index))
107
- if to_filter_index:
108
- df = pd.DataFrame(df.loc[to_filter_index])
109
-
110
- to_filter_columns = st.multiselect(
111
- "Filter by benchmark:", sorted([c for c in df.columns if c not in ignore_columns])
112
- )
113
- if to_filter_columns:
114
- df = pd.DataFrame(df[ignore_columns + to_filter_columns])
115
-
116
  return df
117
 
 
 
 
 
 
118
 
119
- def filter_dataframe_by_column_values(df: pd.DataFrame) -> pd.DataFrame:
120
- """
121
- Filter dataframe by the values in the dataframe.
122
- Modified from https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
123
- Args:
124
- df (pd.DataFrame): Original dataframe
125
- Returns:
126
- pd.DataFrame: Filtered dataframe
127
- """
128
- df = df.copy()
129
-
130
- modification_container = st.container()
131
-
132
- with modification_container:
133
- to_filter_columns = st.multiselect("Filter results on:", df.columns)
134
- left, right = st.columns((1, 20))
135
-
136
- for column in to_filter_columns:
137
- if is_bool_dtype(df[column]):
138
- user_bool_input = right.checkbox(f"{column}", value=True)
139
- df = df[df[column] == user_bool_input]
140
-
141
- elif is_numeric_dtype(df[column]):
142
- _min = float(df[column].min())
143
- _max = float(df[column].max())
144
-
145
- if (_min != _max) and pd.notna(_min) and pd.notna(_max):
146
- step = 0.01
147
- user_num_input = right.slider(
148
- f"Values for {column}:",
149
- min_value=round(_min - step, 2),
150
- max_value=round(_max + step, 2),
151
- value=(_min, _max),
152
- step=step,
153
- )
154
- df = df[df[column].between(*user_num_input)]
155
-
156
- elif is_datetime64_any_dtype(df[column]):
157
- user_date_input = right.date_input(
158
- f"Values for {column}:",
159
- value=(
160
- df[column].min(),
161
- df[column].max(),
162
- ),
163
- )
164
- if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
165
- user_date_input_datetime = tuple(map(pd.to_datetime, user_date_input))
166
- start_date, end_date = user_date_input_datetime
167
- df = df.loc[df[column].between(start_date, end_date)]
168
-
169
- else:
170
- selected_values = right.multiselect(
171
- f"Values for {column}:",
172
- sorted(df[column].unique()),
173
- )
174
-
175
- if selected_values:
176
- df = df[df[column].isin(selected_values)]
177
-
178
- return df
179
-
180
-
181
- def setup_basic():
182
- title = "🏆 LLM-Leaderboard"
183
-
184
- st.set_page_config(
185
- page_title=title,
186
- page_icon="🏆",
187
- layout="wide",
188
- )
189
- st.title(title)
190
-
191
- st.markdown(
192
- "A joint community effort to create one central leaderboard for LLMs."
193
- f" Visit [swahili-llm-leaderboard]({GITHUB_URL}) to contribute. \n"
194
- 'We refer to a model being "open" if it can be locally deployed and used for commercial purposes.'
195
- )
196
-
197
-
198
- def setup_leaderboard(readme: str):
199
- leaderboard_table = extract_markdown_table_from_multiline(readme, table_headline="## Leaderboard")
200
- leaderboard_table = remove_markdown_links(leaderboard_table)
201
- df_leaderboard = extract_table_and_format_from_markdown_text(leaderboard_table)
202
- df_leaderboard["Open?"] = df_leaderboard["Open?"].map({"yes": 1, "no": 0}).astype(bool)
203
-
204
- st.markdown("## Leaderboard")
205
- modify = st.checkbox("Add filters")
206
- clear_empty_entries = st.checkbox("Clear empty entries", value=True)
207
-
208
- if modify:
209
- df_leaderboard = filter_dataframe_by_row_and_columns(df_leaderboard, ignore_columns=NON_BENCHMARK_COLS)
210
- df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
211
-
212
- if clear_empty_entries:
213
- df_leaderboard = df_leaderboard.dropna(axis=1, how="all")
214
- benchmark_columns = [c for c in df_leaderboard.columns if df_leaderboard[c].dtype == float]
215
- rows_wo_any_benchmark = df_leaderboard[benchmark_columns].isna().all(axis=1)
216
- df_leaderboard = df_leaderboard[~rows_wo_any_benchmark]
217
-
218
- st.dataframe(df_leaderboard)
219
-
220
- st.download_button(
221
- "Download current selection as .html",
222
- df_leaderboard.to_html().encode("utf-8"),
223
- "leaderboard.html",
224
- "text/html",
225
- key="download-html",
226
- )
227
-
228
- st.download_button(
229
- "Download current selection as .csv",
230
- df_leaderboard.to_csv().encode("utf-8"),
231
- "leaderboard.csv",
232
- "text/csv",
233
- key="download-csv",
234
- )
235
-
236
-
237
- def setup_benchmarks(readme: str):
238
- benchmarks_table = extract_markdown_table_from_multiline(readme, table_headline="## Benchmarks")
239
- df_benchmarks = extract_table_and_format_from_markdown_text(benchmarks_table)
240
-
241
- st.markdown("## Covered Benchmarks")
242
-
243
- selected_benchmark = st.selectbox("Select a benchmark to learn more:", df_benchmarks.index.unique())
244
- df_selected = df_benchmarks.loc[selected_benchmark]
245
- text = [
246
- f"Name: {selected_benchmark}",
247
- ]
248
- for key in df_selected.keys():
249
- text.append(f"{key}: {df_selected[key]} ")
250
- st.markdown(" \n".join(text))
251
-
252
-
253
- def setup_sources():
254
- st.markdown("## Sources")
255
- st.markdown(
256
- "The results of this leaderboard are collected from the individual papers and published results of the model "
257
- "authors. If you are interested in the sources of each individual reported model value, please visit the "
258
- f"[llm-leaderboard]({GITHUB_URL}) repository."
259
- )
260
- st.markdown(
261
- """
262
- Special thanks to the following pages:
263
- - [MosaicML - Model benchmarks](https://www.mosaicml.com/blog/mpt-7b)
264
- - [lmsys.org - Chatbot Arena benchmarks](https://lmsys.org/blog/2023-05-03-arena/)
265
- - [Papers With Code](https://paperswithcode.com/)
266
- - [Stanford HELM](https://crfm.stanford.edu/helm/latest/)
267
- - [HF Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
268
- """
269
- )
270
-
271
- def setup_Contribution():
272
- st.markdown("## How to Contribute")
273
-
274
- markdown_content = """
275
- - Model name (don't forget the links):
276
- - Filling in missing entries
277
- - Adding a new model as a new row to the leaderboard. Please keep the descending order.
278
- - Adding a new benchmark as a new column in the leaderboard and adding the benchmark to the benchmarks table. Please keep the descending order.
279
- - Code work:
280
- - Improving the existing code
281
- - Requesting and implementing new features
282
- """
283
- st.markdown(markdown_content)
284
 
285
- def setup_Sponsorship():
286
- st.markdown("## Sponsorship")
287
- st.markdown(
288
- # "The results of this leaderboard are collected from the individual papers and published results of the model "
289
- # "authors. If you are interested in the sources of each individual reported model value, please visit the "
290
- # f"[llm-leaderboard]({GITHUB_URL}) repository."
291
- "The benchmark is English-based, and we need support translating it into Swahili."
292
- "We welcome sponsorships to help advance this endeavor."
293
- "Your sponsorship would facilitate this essential translation effort, bridging language barriers and making the benchmark "
294
- "accessible to a broader audience. We're grateful for the dedication shown by our collaborators and aim to extend this impact "
295
- "further with the support of sponsors committed to advancing language technologies."
296
- "Any support please reach me: msamwelmollel@gmail.com"
297
- )
298
-
299
-
300
- def setup_disclaimer():
301
- st.markdown("## Disclaimer")
302
- st.markdown(
303
- "Above information may be wrong. If you want to use a published model for commercial use, please contact a "
304
- "lawyer."
305
- )
306
-
307
-
308
- def setup_footer():
309
- st.markdown(
310
- """
311
- ---
312
- Made with ❤️ by the awesome open-source community from all over 🌍.
313
- """
314
- )
315
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
  def main():
318
- setup_basic()
319
-
 
320
  with open("README.md", "r") as f:
321
- readme = f.read()
322
-
323
-
324
- setup_leaderboard(readme)
325
- # setup_benchmarks(readme)
326
- # setup_sources()
327
- # setup_disclaimer()
328
- # setup_footer()
329
- setup_Contribution()
330
- setup_Sponsorship()
331
-
 
 
332
 
333
  if __name__ == "__main__":
334
  main()
 
1
+ import streamlit as st
2
+ import pandas as pd
3
  import io
4
  import re
 
 
 
 
 
5
 
6
+ # Constants
7
+ GITHUB_URL = "https://github.com/Sartify/STEL"
8
+ NON_BENCHMARK_COLS = ["Open?", "Publisher", "Basemodel", "Matryoshka", "Dimension"]
9
+
10
+ def extract_table_from_markdown(markdown_text, table_start):
11
+ """Extract table content from markdown text."""
12
+ lines = markdown_text.split('\n')
13
+ table_content = []
14
+ capture = False
15
+ for line in lines:
16
+ if line.startswith(table_start):
17
+ capture = True
18
+ continue
19
+ if capture and line.strip() == '':
20
+ break
21
+ if capture:
22
+ table_content.append(line)
23
+ return '\n'.join(table_content)
24
+
25
+ def markdown_table_to_df(table_content):
26
+ """Convert markdown table to pandas DataFrame."""
27
+ df = pd.read_csv(io.StringIO(table_content), sep='|', skipinitialspace=True)
28
  df.columns = df.columns.str.strip()
29
+ df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
30
+ df = df.dropna(axis=1, how='all')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  return df
32
 
33
+ def setup_page():
34
+ """Set up the Streamlit page."""
35
+ st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide")
36
+ st.title("⚡ Swahili Text Embeddings Leaderboard (STEL)")
37
+ st.image("STEL.jpg", width=300)
38
 
39
+ def display_leaderboard(df):
40
+ """Display the leaderboard."""
41
+ st.header("📊 Leaderboard")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # Add filters
44
+ columns_to_filter = [col for col in df.columns if col not in NON_BENCHMARK_COLS]
45
+ selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter)
46
+
47
+ # Filter dataframe
48
+ df_display = df[NON_BENCHMARK_COLS + selected_columns]
49
+
50
+ # Display dataframe
51
+ st.dataframe(df_display)
52
+
53
+ # Download buttons
54
+ csv = df_display.to_csv(index=False)
55
+ st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
56
+
57
+ def display_evaluation():
58
+ """Display the evaluation section."""
59
+ st.header("🧪 Evaluation")
60
+ st.markdown("""
61
+ To evaluate a model on the Swahili Embeddings Text Benchmark, you can use the following Python script:
62
+ ```python
63
+ pip install mteb
64
+ pip install sentence-transformers
65
+ import mteb
66
+ from sentence_transformers import SentenceTransformer
67
+
68
+ models = ["sartifyllc/MultiLinguSwahili-bert-base-sw-cased-nli-matryoshka"]
69
+
70
+ for model_name in models:
71
+ truncate_dim = 768
72
+ language = "swa"
73
+
74
+ device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
75
+ model = SentenceTransformer(model_name, device=device, trust_remote_code=True)
76
+
77
+ tasks = [
78
+ mteb.get_task("AfriSentiClassification", languages=["swa"]),
79
+ mteb.get_task("AfriSentiLangClassification", languages=["swa"]),
80
+ mteb.get_task("MasakhaNEWSClassification", languages=["swa"]),
81
+ mteb.get_task("MassiveIntentClassification", languages=["swa"]),
82
+ mteb.get_task("MassiveScenarioClassification", languages=["swa"]),
83
+ mteb.get_task("SwahiliNewsClassification", languages=["swa"]),
84
+ ]
85
+
86
+ evaluation = mteb.MTEB(tasks=tasks)
87
+ results = evaluation.run(model, output_folder=f"{model_name}")
88
+
89
+ tasks = mteb.get_tasks(task_types=["PairClassification", "Reranking", "BitextMining", "Clustering", "Retrieval"], languages=["swa"])
90
+
91
+ evaluation = mteb.MTEB(tasks=tasks)
92
+ results = evaluation.run(model, output_folder=f"{model_name}")
93
+ ```
94
+ """)
95
+
96
+ def display_contribution():
97
+ """Display the contribution section."""
98
+ st.header("🤝 How to Contribute")
99
+ st.markdown("""
100
+ We welcome and appreciate all contributions! You can help by:
101
+
102
+ ### Table Work
103
+
104
+ - Filling in missing entries.
105
+ - New models are added as new rows to the leaderboard (maintaining descending order).
106
+ - Add new benchmarks as new columns in the leaderboard and include them in the benchmarks table (maintaining descending order).
107
+
108
+ ### Code Work
109
+
110
+ - Improving the existing code.
111
+ - Requesting and implementing new features.
112
+ """)
113
+
114
+ def display_sponsorship():
115
+ """Display the sponsorship section."""
116
+ st.header("🤝 Sponsorship")
117
+ st.markdown("""
118
+ This benchmark is Swahili-based, and we need support translating and curating more tasks into Swahili.
119
+ Sponsorships are welcome to help advance this endeavour. Your sponsorship will facilitate essential
120
+ translation efforts, bridge language barriers, and make the benchmark accessible to a broader audience.
121
+ We are grateful for the dedication shown by our collaborators and aim to extend this impact further
122
+ with the support of sponsors committed to advancing language technologies.
123
+ """)
124
 
125
  def main():
126
+ setup_page()
127
+
128
+ # Read README content
129
  with open("README.md", "r") as f:
130
+ readme_content = f.read()
131
+
132
+ # Extract and process leaderboard table
133
+ leaderboard_table = extract_table_from_markdown(readme_content, "| Model Name")
134
+ df_leaderboard = markdown_table_to_df(leaderboard_table)
135
+
136
+ display_leaderboard(df_leaderboard)
137
+ display_evaluation()
138
+ display_contribution()
139
+ display_sponsorship()
140
+
141
+ st.markdown("---")
142
+ st.markdown("Thank you for being part of this effort to advance Swahili language technologies!")
143
 
144
  if __name__ == "__main__":
145
  main()