AdithyaSK commited on
Commit
a6ebd86
β€’
1 Parent(s): a95af80

added sort by langauge feature - Adithya S K

Browse files
Files changed (1) hide show
  1. app.py +95 -34
app.py CHANGED
@@ -8,16 +8,42 @@ import plotly.graph_objs as go
8
  from huggingface_hub import HfApi
9
  from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
10
  from dotenv import load_dotenv
 
 
11
 
12
  load_dotenv()
13
 
14
  SERVER_URL = os.getenv("SERVER_URL")
15
 
 
16
  def get_data():
17
  response = requests.get(SERVER_URL)
18
  data = response.json()
19
  return data
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def main():
22
 
23
  st.set_page_config(page_title="Indic LLM Leaderboard", layout="wide")
@@ -65,10 +91,6 @@ def main():
65
  MMLU = item["result"]["MMLU"]["acc_norm"]
66
  except KeyError:
67
  MMLU = None
68
- try:
69
- Winograde = item["result"]["Winograde"]["acc_norm"]
70
- except KeyError:
71
- Winograde = None
72
  try:
73
  Translation = item["result"]["Translation"]["acc_norm"]
74
  except KeyError:
@@ -80,7 +102,7 @@ def main():
80
 
81
  all_models.append(model_name)
82
  table_data.append({
83
- "Model Name": model_name,
84
  "Language": language,
85
  "Avergae": ALL,
86
  "ARC-Easy": ARC_Easy,
@@ -88,60 +110,99 @@ def main():
88
  "Hellaswag": Hellaswag,
89
  "Boolq": Boolq,
90
  "MMLU": MMLU,
91
- "Winograde": Winograde,
92
  "Translation": Translation,
93
  "Generation": Generation
94
  })
95
 
96
  df = pd.DataFrame(table_data)
97
 
98
- title = st.text_input('Model Name', placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...")
99
 
 
 
100
  col1, col2 = st.columns(2)
101
  with col1:
102
  benchmark_options = st.multiselect(
103
  'Pick Benchmark',
104
- ['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU','Winogrande','Translation','Generation'],['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU'])
105
  with col2:
106
  language_options = st.multiselect(
107
  'Pick Languages',
108
  ['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'],['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- if title:
111
- if ';' in title:
112
- model_names = [name.strip() for name in title.split(';')]
113
- filtered_df = df[df['Model Name'].isin(model_names)]
114
- else:
115
- filtered_df = df[df['Model Name'].str.contains(title, case=False, na=False)]
116
-
117
- filtered_df = filtered_df[filtered_df['Language'].isin(language_options)]
118
- filtered_df = filtered_df[df.columns.intersection(['Model Name', 'Language'] + benchmark_options)]
119
-
120
- # Calculate average across selected benchmark columns
121
- filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
122
-
123
- # Display the filtered DataFrame
124
- st.dataframe(filtered_df, use_container_width=True)
125
- elif benchmark_options or language_options:
126
- filtered_df = df[df['Language'].isin(language_options)]
127
- filtered_df = filtered_df[df.columns.intersection(['Model Name', 'Language'] + benchmark_options)]
128
-
129
- # Calculate average across selected benchmark columns
130
- filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
131
-
132
- st.dataframe(filtered_df, use_container_width=True)
133
 
 
 
134
  # Multiselect for comparing models
135
  compare_models = st.multiselect(
136
  'Pick Models to compare them',
137
- df['Model Name'].unique()
138
  )
139
-
140
  # Display DataFrame for selected models and their scores
141
  if compare_models:
142
  compare_data = []
143
  for model in compare_models:
144
- model_data = df[df['Model Name'] == model]
145
  compare_data.append(model_data)
146
  if compare_data:
147
  compare_df = pd.concat(compare_data)
 
8
  from huggingface_hub import HfApi
9
  from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
10
  from dotenv import load_dotenv
11
+ from huggingface_hub import HfApi
12
+ from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
13
 
14
  load_dotenv()
15
 
16
  SERVER_URL = os.getenv("SERVER_URL")
17
 
18
+ @st.cache_data
19
  def get_data():
20
  response = requests.get(SERVER_URL)
21
  data = response.json()
22
  return data
23
 
24
+ @st.cache_data
25
+ def get_model_info(df):
26
+ api = HfApi()
27
+
28
+ # Initialize new columns for likes and tags
29
+ df['Likes'] = None
30
+
31
+ # Iterate through DataFrame rows
32
+ for index, row in df.iterrows():
33
+ model = row['Model'].strip()
34
+ try:
35
+ model_info = api.model_info(repo_id=str(model))
36
+ df.loc[index, 'Likes'] = f"{model_info.likes}🧑"
37
+ # df.loc[index, 'Tags'] = ', '.join(model_info.tags)
38
+
39
+ except (RepositoryNotFoundError, RevisionNotFoundError):
40
+ df.loc[index, 'Likes'] = None
41
+ # df.loc[index, 'Tags'] = ''
42
+
43
+ return df
44
+
45
+
46
+ # @st.cache_data
47
  def main():
48
 
49
  st.set_page_config(page_title="Indic LLM Leaderboard", layout="wide")
 
91
  MMLU = item["result"]["MMLU"]["acc_norm"]
92
  except KeyError:
93
  MMLU = None
 
 
 
 
94
  try:
95
  Translation = item["result"]["Translation"]["acc_norm"]
96
  except KeyError:
 
102
 
103
  all_models.append(model_name)
104
  table_data.append({
105
+ "Model": model_name,
106
  "Language": language,
107
  "Avergae": ALL,
108
  "ARC-Easy": ARC_Easy,
 
110
  "Hellaswag": Hellaswag,
111
  "Boolq": Boolq,
112
  "MMLU": MMLU,
 
113
  "Translation": Translation,
114
  "Generation": Generation
115
  })
116
 
117
  df = pd.DataFrame(table_data)
118
 
119
+ title = st.text_input('Model', placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...")
120
 
121
+ on = st.checkbox('Sort by Language')
122
+
123
  col1, col2 = st.columns(2)
124
  with col1:
125
  benchmark_options = st.multiselect(
126
  'Pick Benchmark',
127
+ ['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU','Translation','Generation'],['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU'])
128
  with col2:
129
  language_options = st.multiselect(
130
  'Pick Languages',
131
  ['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'],['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'])
132
+ if on:
133
+ # Loop through each selected language
134
+ for language in language_options:
135
+ filtered_df = df[df['Language'] == language]
136
+ # Check if the filtered dataframe is not empty
137
+ if not filtered_df.empty:
138
+ st.subheader(f"{language.capitalize()[0]}{language[1:]}")
139
+ filtered_df.reset_index(drop=True, inplace=True)
140
+ # Display filtered dataframe
141
+ filtered_df = get_model_info(filtered_df)
142
+ if title:
143
+ if ';' in title:
144
+ model_names = [name.strip() for name in title.split(';')]
145
+ filtered_df = df[df['Model'].isin(model_names)]
146
+ else:
147
+ filtered_df = df[df['Model'].str.contains(title, case=False, na=False)]
148
+
149
+ filtered_df = filtered_df[df.columns.intersection(['Model', 'Language'] + benchmark_options)]
150
+
151
+ # Calculate average across selected benchmark columns
152
+ filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
153
+ filtered_df.index += 1
154
+ st.dataframe(filtered_df, use_container_width=True)
155
+ elif benchmark_options or language_options:
156
+ filtered_df = filtered_df[df.columns.intersection(['Model', 'Language'] + benchmark_options)]
157
+
158
+ # Calculate average across selected benchmark columns
159
+ filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
160
+
161
+ filtered_df = get_model_info(filtered_df)
162
+ filtered_df.index += 1
163
+ st.dataframe(filtered_df, use_container_width=True)
164
+ # st.write('Feature activated!')
165
+ else:
166
+
167
+ if title:
168
+ if ';' in title:
169
+ model_names = [name.strip() for name in title.split(';')]
170
+ filtered_df = df[df['Model'].isin(model_names)]
171
+ else:
172
+ filtered_df = df[df['Model'].str.contains(title, case=False, na=False)]
173
+
174
+ filtered_df = filtered_df[filtered_df['Language'].isin(language_options)]
175
+ filtered_df = filtered_df[df.columns.intersection(['Model', 'Language'] + benchmark_options)]
176
+
177
+ # Calculate average across selected benchmark columns
178
+ filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
179
+ filtered_df.index += 1
180
+ # Display the filtered DataFrame
181
+ st.dataframe(filtered_df, use_container_width=True)
182
+ elif benchmark_options or language_options:
183
+ filtered_df = df[df['Language'].isin(language_options)]
184
+ filtered_df = filtered_df[df.columns.intersection(['Model', 'Language'] + benchmark_options)]
185
+
186
+ # Calculate average across selected benchmark columns
187
+ filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
188
+
189
+ filtered_df = get_model_info(filtered_df)
190
+ filtered_df.index += 1
191
+ st.dataframe(filtered_df, use_container_width=True)
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+
195
+
196
  # Multiselect for comparing models
197
  compare_models = st.multiselect(
198
  'Pick Models to compare them',
199
+ df['Model'].unique()
200
  )
 
201
  # Display DataFrame for selected models and their scores
202
  if compare_models:
203
  compare_data = []
204
  for model in compare_models:
205
+ model_data = df[df['Model'] == model]
206
  compare_data.append(model_data)
207
  if compare_data:
208
  compare_df = pd.concat(compare_data)