Y1OV commited on
Commit
1aa060b
·
verified ·
1 Parent(s): 592d1f0

Upload 8 files

Browse files
Files changed (8) hide show
  1. ITOG.csv +26 -0
  2. Leaderboard.py +198 -0
  3. main.py +46 -0
  4. oblzn.csv +27 -0
  5. provokac.csv +27 -0
  6. setup.cfg +2 -0
  7. streamlit_app.py +23 -0
  8. vidvopr.csv +27 -0
ITOG.csv ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,NUM_Q_multich_EM,NUM_Q_multich_CC,NUM_Q_multich_PM,NUM_Q_onech_EM,NUM_Q_seq_EM,NUM_Q_seq_CC,NUM_Q_seq_PM,NUM_Q_map_EM,NUM_Q_map_CC,NUM_Q_map_PM,OPEN_Q_EM,OPEN_Q_F1,OPEN_Q_LR,LEADERBOARD
2
+ qwen2:72b-instruct-q4_0,55.59,69.97,71.88,85.66,62.01,62.12,62.12,31.4,34.38,36.87,8.93,43.03,57.1,52.39
3
+ GigaChat_Pro,63.29,66.23,75.44,76.33,52.06,52.06,52.06,11.59,11.59,15.88,40.89,47.64,64.51,48.43
4
+ yandexgpt_pro,47.28,49.35,75.05,84.88,41.75,41.75,43.3,5.58,5.58,7.3,15.46,50.76,63.7,40.9
5
+ GigaChat_Plus,51.2,55.56,68.03,69.27,34.02,34.02,34.02,6.01,6.01,8.15,32.99,40.2,58.88,38.33
6
+ GigaChat_Lite,51.2,55.56,68.03,69.27,34.02,34.02,34.02,6.01,6.01,8.15,32.65,40.37,58.92,38.32
7
+ gemma2:9b-instruct-q4_0,40.65,52.63,63.85,76.58,29.31,30.48,35.03,4.31,4.89,9.49,24.45,32.04,49.76,34.88
8
+ llama3:70b-instruct-q4_0,33.5,59.86,59.18,78.14,16.61,17.88,28.41,4.14,5.22,7.25,10.32,44.83,60.49,32.76
9
+ yandexgpt_lite,7.3,8.06,54.08,76.16,19.59,19.59,24.48,1.29,1.29,6.65,31.96,49.83,64.93,28.09
10
+ llama3.1:70b-instruct-q4_0,26.43,50.33,61.55,24.28,8.36,15.66,26.56,1.49,2.24,10.15,14.31,48.9,63.83,27.24
11
+ qwen2:7b-instruct-q4_0,10.39,13.02,54.71,66.22,19.58,20.53,21.27,1.91,2.32,7.5,3.74,11.44,32.79,20.42
12
+ ilyagusev/saiga_llama3,2.09,10.0,47.38,63.04,6.88,6.88,18.1,0.0,0.0,2.82,6.9,21.94,40.68,17.44
13
+ phi3:14b-medium-4k-instruct-q4_0,0.04,0.15,57.14,56.63,2.75,4.87,36.3,0.0,0.33,9.07,5.26,16.85,31.65,17.0
14
+ mistral:7b-instruct-v0.3-q4_0,0.0,0.02,44.55,46.65,0.0,0.0,26.98,0.0,0.0,3.94,2.47,11.6,26.0,12.48
15
+ solar:10.7b-instruct-v1-q4_0,0.0,0.26,48.67,46.3,0.0,0.21,18.25,0.0,0.08,3.23,3.42,11.43,23.18,11.93
16
+ random,4.04,7.6,32.7,24.51,14.07,14.07,14.07,0.83,0.83,3.23,,,,11.6
17
+ wavecut/vikhr:7b-instruct_0.4-Q4_1,0.0,0.02,36.12,34.32,0.95,2.75,6.24,0.0,0.0,0.58,10.01,19.1,34.3,11.11
18
+ llama3:8b-instruct-q4_0,0.32,1.0,47.4,26.49,0.53,1.48,4.07,0.0,0.08,2.28,0.06,17.11,35.27,10.47
19
+ mixtral:8x7b-instruct-v0.1-q4_0,0.0,11.81,51.05,8.67,0.11,16.93,19.47,0.0,1.66,10.48,0.63,5.19,9.85,10.45
20
+ yi:9b,4.09,11.51,34.87,25.9,8.99,10.26,12.7,0.0,0.25,1.41,0.63,3.46,15.14,9.94
21
+ gemma:7b-instruct-v1.1-q4_0,2.4,12.13,30.96,27.28,4.97,11.75,5.61,0.83,0.83,2.53,0.06,6.03,23.68,9.93
22
+ llama3.1:8b-instruct-q4_0,0.06,0.62,46.74,8.41,0.0,1.9,6.35,0.0,0.08,4.64,0.44,19.23,39.63,9.85
23
+ qwen:7b,0.0,0.02,30.09,37.09,8.99,10.05,11.96,0.0,0.0,1.33,0.0,2.44,17.1,9.16
24
+ gemma2:27b-instruct-q4_0,1.45,6.9,36.8,22.15,2.54,8.57,8.57,0.0,0.08,0.79,1.65,8.24,15.15,8.68
25
+ yi:6b,0.6,8.94,21.59,12.62,0.32,9.74,1.75,0.17,0.83,0.41,0.19,2.29,10.27,5.36
26
+ llama2:13b,0.0,0.04,28.24,0.0,0.0,0.11,2.75,0.0,0.0,0.12,0.06,5.17,13.06,3.81
Leaderboard.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+
4
+ def load_data():
5
+ df_itog = pd.read_csv("ITOG.csv")
6
+ df_oblzn = pd.read_csv("oblzn.csv")
7
+ df_vidvopr = pd.read_csv("vidvopr.csv")
8
+ df_provokac = pd.read_csv("provokac.csv")
9
+
10
+ return df_itog, df_oblzn, df_vidvopr, df_provokac
11
+
12
+ def select_table(tables):
13
+ table_choice = st.selectbox(
14
+ "Выберите таблицу для отображения:",
15
+ ["ITOG", "Область знаний", "Вид вопроса", "Провокационность"],
16
+ index=0
17
+ )
18
+
19
+ return tables[table_choice]
20
+
21
+ def filter_itog_columns(df):
22
+ st.markdown("### Фильтрация по отдельным колонкам (кроме Model, которая всегда видна):")
23
+
24
+ selected_columns = st.multiselect(
25
+ "Выберите колонки для отображения:",
26
+ options=[col for col in df.columns if col != "Model"]
27
+ )
28
+
29
+ selected_columns = ["Model"] + selected_columns if selected_columns else ["Model"]
30
+
31
+ return df[selected_columns]
32
+
33
+ def filter_oblzn_columns(df):
34
+ st.markdown("### Фильтрация по областям знаний:")
35
+
36
+ knowledge_areas_dict = {
37
+ "GEO": "География",
38
+ "HIST": "История",
39
+ "SOC": "Обществознание (социология)",
40
+ "POL": "Политология и основы нац.безопасности",
41
+ "ALL": "Все области"
42
+ }
43
+
44
+ knowledge_areas = list(knowledge_areas_dict.values())
45
+
46
+ selected_columns = st.multiselect(
47
+ "Выберите области знаний:",
48
+ knowledge_areas
49
+ )
50
+
51
+ if 'Все области' in selected_columns or not selected_columns:
52
+ return df
53
+ else:
54
+ selected_abbr = [abbr for abbr, full in knowledge_areas_dict.items() if full in selected_columns]
55
+
56
+ filtered_columns = [col for col in df.columns if any(abbr in col for abbr in selected_abbr)]
57
+
58
+ selected_columns = ["Model"] + filtered_columns if filtered_columns else ["Model"]
59
+ return df[selected_columns]
60
+
61
+
62
+ def filter_vidvopr_columns(df):
63
+ st.markdown("### Фильтрация по виду вопроса:")
64
+
65
+ question_types_dict = {
66
+ "multich": "multichoice (мультивыбор)",
67
+ "onech": "one choice (вопрос с одним правильным ответом)",
68
+ "seq": "sequence (последовательность)",
69
+ "map": "mapping (сопоставление)",
70
+ "ALL": "Все типы"
71
+ }
72
+
73
+ question_types = list(question_types_dict.values())
74
+
75
+ selected_columns = st.multiselect(
76
+ "Выберите типы вопросов:",
77
+ question_types
78
+ )
79
+
80
+ if 'Все типы' in selected_columns or not selected_columns:
81
+ return df
82
+ else:
83
+ selected_abbr = [abbr for abbr, full in question_types_dict.items() if full in selected_columns]
84
+
85
+ filtered_columns = [col for col in df.columns if any(abbr in col for abbr in selected_abbr)]
86
+
87
+ selected_columns = ["Model"] + filtered_columns if filtered_columns else ["Model"]
88
+ return df[selected_columns]
89
+
90
+
91
+ def filter_provokac_columns(df):
92
+ st.markdown("### Фильтрация по уровню провокативности:")
93
+
94
+ provocation_levels_dict = {
95
+ "PROVOC_1": "1ый уровень провокативности",
96
+ "PROVOC_2": "2ой уровень провокативности",
97
+ "PROVOC_3": "3ий уровень провокативности",
98
+ "ALL": "Все уровни"
99
+ }
100
+
101
+ provocation_levels = list(provocation_levels_dict.values())
102
+
103
+ selected_columns = st.multiselect(
104
+ "Выберите уровни провокативности:",
105
+ provocation_levels
106
+ )
107
+
108
+ if 'Все уровни' in selected_columns or not selected_columns:
109
+ return df
110
+ else:
111
+ selected_abbr = [abbr for abbr, full in provocation_levels_dict.items() if full in selected_columns]
112
+
113
+ filtered_columns = [col for col in df.columns if any(abbr in col for abbr in selected_abbr)]
114
+
115
+ selected_columns = ["Model"] + filtered_columns if filtered_columns else ["Model"]
116
+ return df[selected_columns]
117
+
118
+
119
+
120
+
121
+ st.title("Leaderboard")
122
+
123
+ df_itog, df_oblzn, df_vidvopr, df_provokac = load_data()
124
+
125
+ tables = {
126
+ "ITOG": df_itog,
127
+ "Область знаний": df_oblzn,
128
+ "Вид вопроса": df_vidvopr,
129
+ "Провокационность": df_provokac
130
+ }
131
+
132
+ df_selected = select_table(tables)
133
+
134
+ if st.checkbox("Добавить фильтры"):
135
+ if df_selected is df_itog:
136
+ df_selected = filter_itog_columns(df_selected)
137
+ elif df_selected is df_oblzn:
138
+ df_selected = filter_oblzn_columns(df_selected)
139
+ elif df_selected is df_vidvopr:
140
+ df_selected = filter_vidvopr_columns(df_selected)
141
+ elif df_selected is df_provokac:
142
+ df_selected = filter_provokac_columns(df_selected)
143
+
144
+ st.dataframe(df_selected, use_container_width=True)
145
+
146
+ st.download_button(
147
+ "Скачать таблицу в формате CSV",
148
+ df_selected.to_csv(index=False).encode('utf-8'),
149
+ "filtered_table.csv",
150
+ "text/csv"
151
+ )
152
+ st.write('---')
153
+
154
+ st.write("""
155
+ ### Описание структуры названий колонок:
156
+
157
+ #### Части, отвечающие за область знаний:
158
+ - **GEO** - география
159
+ - **HIST** - история
160
+ - **SOC** - обществознание (социология)
161
+ - **POL** - политология и основы национальной безопасности
162
+
163
+ #### Части, отвечающие за вид вопроса:
164
+ - **NUM_Q** или **_num_q_** - вопрос с числовым ответом, с делением на:
165
+ - **_multich_** - "multichoice", мультивыбор
166
+ - **_onech_** - "one choice", вопрос с одним правильным ответом
167
+ - **_seq_** - "sequence", последовательность
168
+ - **_map_** - "mapping", соответствие
169
+ - **OPEN_Q** или **_open_q_** - открытый вопрос, подразумевающий свободный письменный ответ
170
+
171
+ #### Части, отвечающие за уровень провокативности:
172
+ - **PROVOC_1** - первый уровень провокативности
173
+ - **PROVOC_2** - второй уровень провокативности
174
+ - **PROVOC_3** - третий уровень провокативности
175
+
176
+ #### Части, указывающие метрику:
177
+ - **_EM** - "exact match", ответ модели точно совпадает с правильным
178
+ - **_CC** - "contains check", ответ модели содержит правильный ответ
179
+ - **_PM** - "partially match", ответ модели частично верный
180
+ - **_F1** - метрика f1-score
181
+ - **_LR** - "levenshtein ratio", мера схожести ответа модели с эталонным, на основе расстояния Левенштейна
182
+
183
+ ---
184
+
185
+ ### Структура таблиц:
186
+ В таблице представлены три обобщенные вкладки по каждому срезу:
187
+ - **Область знаний**
188
+ - **Вид вопроса**
189
+ - **Уровень провокативности**
190
+
191
+ Также присутствует таблица **ИТОГ**, представляющая итоговый рейтинг. Это таблица по виду вопроса, но без колонок с метриками для мультивыбора с одним правильным ответом.
192
+ """)
193
+
194
+ st.write("### `Ссылки/контакты`")
195
+
196
+ st.write("[GitHub](https://github.com/ikanam-ai/slava)")
197
+ st.write("[Dataset](https://huggingface.co/datasets/RANEPA-ai/SLAVA-OpenData-2800-v1)")
198
+
main.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title("SLAVA")
4
+ st.write('### Бенчмарк социально-политического ландшафта и ценностного анализа')
5
+
6
+ html_code = '''
7
+ <div style="text-align: center;">
8
+ <a href="https://raw.githubusercontent.com/Y1OV/project_lab/main/data/ranepa.png">
9
+ <img src="https://raw.githubusercontent.com/Y1OV/project_lab/main/data/ranepa.png" alt="Foo" style="width: 50%; height: auto;">
10
+ </a>
11
+ </div>
12
+ '''
13
+
14
+ st.markdown(html_code, unsafe_allow_html=True)
15
+
16
+
17
+ st.write("""
18
+
19
+ С 2024 года был разработан бенчмарк SLAVA, содержащий около 14 тысяч вопросов для российского домена из таких областей, как история, политология, социология, политическая география и основы национальной безопасности. Этот бенчмарк оценивает способности больших языковых моделей (LLM) справляться с чувствительными темами, важными для российского информационного пространства.
20
+
21
+ #### Основные задачи:
22
+ - Проверка фактических знаний LLM в российских доменах.
23
+ - Оценка чувствительности (провокативности) вопросов.
24
+ - Создание комплексной системы оценки на основе точности ответов.
25
+
26
+ #### Структура:
27
+ Вопросы делятся на следующие типы:
28
+ - Мультивыбор с одним или несколькими правильными ответами.
29
+ - Последовательности и соответствия.
30
+ - Открытые ответы.
31
+
32
+ #### Провокативность вопросов:
33
+ - **1 балл**: Низкая чувствительность — общепризнанные факты.
34
+ - **2 балла**: Средняя чувствительность — спорные темы.
35
+ - **3 балла**: Высокая чувствительность — политические и культурные вопросы, вызывающие конфликты.
36
+
37
+ #### Результаты:
38
+ Были протестированы 24 LLM, поддерживающие русский язык. Модели от компаний **GigaChat**, **YandexGPT** и **qwen2** показали наивысшую точность и способность справляться с сложными, провокативными вопросами. В то время как некоторые модели, такие как **llama2** и **mixtral**, продемонстрировали более слабые результаты.
39
+
40
+ Этот бенчмарк подчеркивает необходимость дальнейших исследований в области надежности LLM, особенно в контексте социально-политических тем, значимых для России.
41
+ """)
42
+
43
+ st.write("### `Ссылки/контакты`")
44
+
45
+ st.write("[GitHub](https://github.com/ikanam-ai/slava)")
46
+ st.write("[Dataset](https://huggingface.co/datasets/RANEPA-ai/SLAVA-OpenData-2800-v1)")
oblzn.csv ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,GEO_num_q_EM,GEO_num_q_CC,GEO_num_q_PM,GEO_open_q_EM,GEO_open_q_F1,GEO_open_q_LR,GEO_avg,HIST_num_q_EM,HIST_num_q_CC,HIST_num_q_PM,HIST_open_q_EM,HIST_open_q_F1,HIST_open_q_LR,HIST_avg,SOC_num_q_EM,SOC_num_q_CC,SOC_num_q_PM,SOC_open_q_EM,SOC_open_q_F1,SOC_open_q_LR,SOC_avg,POL_num_q_EM,POL_num_q_CC,POL_num_q_PM,POL_avg,DOMAIN_avg
2
+ gemma2:27b-instruct-q4_0,15.5348837209302,36.2325581395349,34.7209302325581,1.36518771331058,6.08108161580778,12.3447098976109,17.71322521995874,9.0968443960827,29.5756256800871,26.822633297062,1.97309417040359,8.93722159294191,16.0089686098655,15.402397957740467,8.88761467889908,29.1284403669725,42.6032110091743,0.0,7.37259511525245,14.374269005848,17.061021696024387,23.7947122861586,59.0979782270607,46.8895800933126,43.26075686884396,20.51629237375588
3
+ gemma2:9b-instruct-q4_0,58.9302325581395,62.3720930232558,64.4883720930233,30.7167235494881,38.6347348301442,54.7849829351536,51.65452316486741,39.8694232861806,43.4385201305767,48.139281828074,17.6681614349776,25.4223580804077,44.1973094170404,36.45584236287616,59.6139143730887,68.4633027522936,72.887996941896,57.8947368421053,63.8700918964077,77.4327485380117,66.6937985573005,68.2737169517885,71.3841368584759,70.4510108864697,70.0362882322447,54.234945200333264
4
+ gemma:7b-instruct-v1.1-q4_0,20.8372093023256,39.2093023255814,42.046511627907,0.0,3.94542088889186,19.0238907849829,20.843722488281458,9.88030467899891,19.8258977149075,23.8737758433079,0.0896860986547085,4.38105088097796,23.3417040358744,13.565403208786895,12.6911314984709,32.0336391437309,43.243501529052,0.0,20.3867021011607,33.8479532163743,23.70048791479813,27.6827371695179,54.7433903576983,55.6765163297045,46.03421461897357,23.179063120386648
5
+ ilyagusev/saiga_llama3,37.8139534883721,40.8372093023256,49.3023255813954,15.358361774744,35.1715192074092,52.7337883959044,38.53619295835845,26.2241566920566,27.7040261153428,37.1490750816104,2.51121076233184,15.1855722725022,34.7470852017937,23.920187687606255,26.5863914373089,32.2438837920489,56.5271406727829,21.0526315789474,43.3031923570162,58.7485380116959,39.74362964163337,60.8087091757387,63.2970451010886,63.9191290824261,62.67496111975114,38.15356881356391
6
+ llama2:13b,0.0,31.6279069767442,4.69767441860465,0.341296928327645,9.40460676527417,20.5904436860068,11.11032146249291,0.0,19.0424374319913,5.61479869423286,0.0,4.03632452572392,11.5237668161435,6.702887911348597,0.0,9.53746177370031,20.3172782874618,0.0,5.28747736387109,10.2046783625731,7.55781596460105,0.0,49.3001555209953,5.05443234836703,18.11819595645411,9.837178090477034
7
+ llama3.1:70b-instruct-q4_0,17.2558139534884,60.6511627906977,65.3255813953489,5.11945392491468,59.2786046255473,67.2184300341297,45.80817445402112,16.7791077257889,46.8335146898803,51.2622415669206,16.1434977578475,43.4389491770808,61.27533632287,39.288774540064686,28.3448012232416,64.7362385321101,71.1678134556575,18.1286549707602,66.7182357754662,74.672514619883,53.96137642951976,17.8849144634526,72.6283048211509,71.850699844479,54.12130637636083,47.46256531765313
8
+ llama3.1:8b-instruct-q4_0,4.6046511627907,31.8139534883721,32.1860465116279,0.0,32.1790002422677,49.0546075085324,24.973043152265134,3.13384113166485,24.3960826985854,30.4678998911861,0.62780269058296,14.4408672828017,36.1085201793722,18.195835645698867,3.26834862385321,24.8853211009174,51.5768348623853,0.0,28.2554385814658,46.4093567251462,25.732549982294653,11.5085536547434,57.0762052877138,51.1664074650078,39.917055469155,25.388559004238896
9
+ llama3:70b-instruct-q4_0,57.3953488372093,63.4883720930233,63.9069767441861,5.80204778156997,60.7482833489855,68.3788395904437,53.28664473256964,40.7181719260065,46.5070729053319,49.2491838955386,10.6726457399103,40.3680070949795,58.322869955157,40.9729919194873,52.3891437308869,70.7759938837921,67.8516819571865,15.7894736842105,46.6919490734894,61.1052631578947,52.43391758124334,68.895800933126,70.6065318818041,71.3841368584759,70.29548989113533,51.95465690824798
10
+ llama3:8b-instruct-q4_0,15.8139534883721,35.5348837209302,45.7674418604651,0.0,31.6684961159729,48.0511945392492,29.472661620831584,10.5114254624592,25.5495103373232,33.8737758433079,0.0896860986547085,12.4341412479717,31.7417040358744,19.033373837598518,11.2767584097859,27.1215596330275,57.7025993883792,0.0,22.6456467258,36.4152046783626,25.860294805892533,20.99533437014,62.6749611197512,62.5194401244168,48.729911871436,28.20893891429732
11
+ mistral:7b-instruct-v0.3-q4_0,25.4883720930233,29.953488372093,44.4883720930233,6.8259385665529,14.7143576600266,29.5290102389079,25.166589837271164,18.759521218716,23.8302502720348,34.7007616974973,0.717488789237668,8.34350531701643,24.0941704035874,18.407616283014935,18.4059633027523,23.7576452599388,52.3318042813456,6.4327485380117,27.5101026274299,32.4035087719298,26.806962130234684,49.4556765163297,57.3872472783826,58.0093312597201,54.950751684810804,27.959012597978905
12
+ mixtral:8x7b-instruct-v0.1-q4_0,5.53488372093023,48.7906976744186,52.3953488372093,0.341296928327645,3.94842331452323,6.73037542662116,19.62350431700503,3.5038084874864,37.9542981501632,42.3612622415669,0.717488789237668,4.68070314515325,10.1865470852018,16.5673513164682,3.45948012232416,37.5955657492355,59.7094801223242,0.584795321637427,10.6595347898574,12.9590643274854,20.827986738810683,6.22083981337481,72.1617418351477,61.1975116640747,46.52669777086573,22.937768930776222
13
+ phi3:14b-medium-4k-instruct-q4_0,34.2325581395349,43.8139534883721,59.5813953488372,12.9692832764505,30.2658027774082,44.8976109215017,37.62676732535076,24.069640914037,33.6235038084875,46.9314472252448,2.69058295964126,12.3929007725114,27.7345291479821,24.573767471317343,21.5596330275229,30.2752293577982,65.7874617737003,8.7719298245614,22.9512670175593,34.4678362573099,30.63555954307533,50.5443234836703,68.7402799377916,66.5629860031104,61.94919647485742,35.37448359347776
14
+ qwen2:72b-instruct-q4_0,80.5581395348837,83.953488372093,83.7441860465116,1.70648464163823,60.9664006973775,66.8259385665529,62.95910630984282,60.1088139281828,65.310119695321,65.4733405875952,10.6726457399103,37.226795170081,53.6,48.73195252018172,68.5015290519878,79.434250764526,78.3925840978593,9.94152046783626,50.1081604264453,63.2748538011696,58.27548310163737,68.5847589424572,74.805598755832,71.2286158631415,71.53965785381023,58.781820245304864
15
+ qwen2:7b-instruct-q4_0,45.7209302325581,46.9302325581395,54.8837209302326,6.48464163822526,12.4030115766671,33.8430034129693,33.37759005813198,27.8346028291621,30.1196953210011,37.2143634385201,2.0627802690583,7.87949319222932,29.6869955156951,22.466321760944336,35.4548929663609,37.2133027522936,65.5485474006116,9.94152046783626,33.0038691193979,51.2105263157895,38.72877650371496,62.0528771384137,63.9191290824261,64.0746500777605,63.34888543286676,36.0706088683499
16
+ qwen:7b,20.093023255814,20.8837209302326,26.5813953488372,0.0,1.97488246634156,16.8600682593857,14.39884837676851,14.6681175190424,15.4080522306855,21.7627856365615,0.0,1.34680160823358,15.7542600896861,11.49000284736818,16.532874617737,16.5519877675841,35.3211009174312,0.0,10.3653291238234,26.2690058479532,17.50671637908815,39.50233281493,39.9688958009331,40.9797822706065,40.15033696215653,18.134496024086605
17
+ solar:10.7b-instruct-v1-q4_0,23.7674418604651,34.4651162790698,46.0232558139535,1.70648464163823,8.4900835179791,17.1194539249147,21.92863933967007,16.474428726877,26.89880304679,37.0729053318825,3.04932735426009,10.4661683783297,22.9569506726457,19.48643058513083,21.6360856269113,25.9556574923547,57.559250764526,8.7719298245614,22.7690138336274,35.0643274853801,28.626044171226813,42.1461897356143,60.3421461897356,60.1866251944012,54.22498703991704,27.7581736045675
18
+ wavecut/vikhr:7b-instruct_0.4-Q4_1,17.8139534883721,23.2093023255814,28.093023255814,12.9692832764505,22.0442276930355,35.7849829351536,23.31912882906785,14.2110990206746,17.5843307943417,23.4820457018498,7.98206278026906,16.8323787548876,32.647533632287,18.78990844738496,14.2201834862385,17.6796636085627,40.9690366972477,18.1286549707602,28.8398070736436,42.5029239766082,27.05671163551015,32.5038880248834,42.7682737169518,41.9906687402799,39.08761016070503,25.34558685494728
19
+ yi:6b,7.06976744186047,26.4651162790698,17.4651162790698,0.0,1.72191372616652,8.90102389078498,10.270489602825263,3.65614798694233,22.6550598476605,13.7540805223069,0.0,2.08897265807147,10.7318385650224,8.814349930000601,6.19266055045872,23.9296636085627,26.3379204892966,1.75438596491228,4.61048579154667,9.56140350877193,12.064419985591483,18.1959564541213,36.8584758942457,30.0933125972006,28.382581648522535,12.954442955051032
20
+ yi:9b,16.046511627907,27.1162790697674,30.5813953488372,0.0,3.15355705067136,12.098976109215,14.832786534399661,9.07508161044614,19.7606093579978,22.829162132753,0.179372197309417,2.8367657031748,15.2224215246637,11.65056875439081,15.1567278287462,29.6062691131498,42.4120795107034,4.67836257309942,8.03562572777585,19.7836257309942,19.945448414078147,33.5925349922239,50.0777604976672,48.2115085536547,43.9606013478486,19.5454583933694
21
+ GigaChat_Lite,61.1872146118721,62.5570776255708,63.1278538812785,40.9090909090909,51.7079785604165,65.8030303030303,57.54870764854318,40.8845738942826,43.042071197411,45.5231930960086,26.2032085561497,33.7021080330654,53.9144385026738,40.54493221326518,63.4799235181644,66.5391969407266,73.565965583174,50.0,53.5087719298246,71.6052631578947,63.11652018829738,52.8985507246377,55.7971014492754,54.7101449275362,54.46859903381644,53.841274162003984
22
+ GigaChat_Plus,61.1872146118721,62.5570776255708,63.1278538812785,40.9090909090909,51.7298973967309,65.7424242424243,57.542259777827915,40.8845738942826,43.042071197411,45.5231930960086,26.7379679144385,33.4191279951085,53.8449197860963,40.57530898055759,63.4799235181644,66.5391969407266,73.565965583174,50.0,53.5087719298246,71.7631578947368,63.142835977771064,52.8985507246377,55.7971014492754,54.7101449275362,54.46859903381644,53.85562978658994
23
+ GigaChat_Pro,71.2328767123288,72.1461187214612,72.6027397260274,53.030303030303,62.0469396605864,73.8484848484849,67.48457711653195,49.8381877022654,51.024811218986,54.1531823085221,33.6898395721925,40.7455479303299,59.379679144385,48.138541312780156,74.3785850860421,76.7686424474187,81.357552581262,55.2631578947369,56.578947368421,73.5,69.64114756298011,55.072463768116,57.9710144927536,56.8840579710145,56.642512077294704,61.02443486598271
24
+ yandexgpt_lite,47.2602739726027,48.1735159817352,55.5936073059361,42.4242424242424,55.6932275713253,67.6363636363636,52.79687181536755,39.697950377562,40.453074433657,50.9708737864078,28.3422459893048,42.8268094822329,61.0160427807487,43.884499474985525,35.3728489483748,36.2332695984704,65.2485659655832,31.5789473684211,74.1228070175439,79.5263157894737,53.68045911464452,57.9710144927536,58.695652173913,60.8695652173913,59.17874396135263,51.41462925304969
25
+ yandexgpt_pro,63.9269406392694,64.3835616438356,68.7214611872146,22.7272727272727,44.3545700753268,56.3939393939394,53.41795761114307,56.0949298813376,56.2028047464941,62.2977346278317,7.48663101604278,47.2091634117127,61.855614973262,48.52447977611348,63.6711281070746,65.2963671128107,80.9273422562142,42.1052631578947,79.3859649122807,85.4473684210526,69.47223899455459,60.8695652173913,61.5942028985507,63.0434782608696,61.83574879227053,57.809300222270394
26
+ random,16.8372093023256,17.5813953488372,22.3720930232558,,,,18.930232558139533,13.2535364526659,13.9281828073993,20.4134929270947,,,,15.8650707290533,11.6207951070336,13.8188073394495,29.0997706422018,,,,18.17979102956163,26.1275272161742,26.905132192846,28.149300155521,27.0606531881804,20.008936876233715
27
+ Среднее значение,33.0457343102899,44.58990336625253,47.67298715089731,12.571103526734921,29.2636258910368,41.42481556176096,34.58490265246131,23.569131589728055,32.9484170331947,37.23665961195567,8.346142778350634,19.61007223781356,35.41221693324701,26.081951898966594,29.447253553657156,40.244822273288065,56.480499470825215,,,,,40.339261162575795,57.78393851285865,54.392521468659126,50.838573714697866,35.27007299907976
provokac.csv ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,PROVOC_1_num_q_EM,PROVOC_1_num_q_CC,PROVOC_1_num_q_PM,PROVOC_1_open_q_EM,PROVOC_1_open_q_F1,PROVOC_1_open_q_LR,PROVOC_1_avg,PROVOC_2_num_q_EM,PROVOC_2_num_q_CC,PROVOC_2_num_q_PM,PROVOC_2_open_q_EM,PROVOC_2_open_q_F1,PROVOC_2_open_q_LR,PROVOC_2_avg,PROVOC_3_num_q_EM,PROVOC_3_num_q_CC,PROVOC_3_num_q_PM,PROVOC_3_open_q_EM,PROVOC_3_open_q_F1,PROVOC_3_open_q_LR,PROVOC_3_avg,PROVOC_avg
2
+ gemma2:27b-instruct-q4_0,13.0885602583716,35.0501444841068,36.6054733979262,2.46710526315789,11.3438276640427,18.7450657894737,19.550029476179816,10.5151915455746,34.6895640686922,36.1426684280053,1.7353579175705,7.23438464320977,14.3036876355748,17.436809039771195,6.84281842818428,22.5948509485095,33.4688346883469,0.588235294117647,5.44191022355375,11.6352941176471,13.428657283393198,16.805165266448068
3
+ gemma2:9b-instruct-q4_0,55.4139044705082,60.4283528811831,64.5843957164712,27.3026315789474,34.9929958961886,52.3865131578947,49.184798950198875,55.2708058124174,60.105680317041,63.0779392338177,27.3318872017354,35.245661577345,52.6681127982647,48.950014490103534,44.2073170731707,52.4390243902439,56.8428184281843,18.4313725490196,25.6150995826153,44.0039215686275,40.25659226531022,46.13046856853753
4
+ gemma:7b-instruct-v1.1-q4_0,16.1652218255992,32.3814380418154,37.3363929967704,0.0,6.12514628005334,23.4473684210526,19.24259459421516,14.0290620871863,31.7040951122853,37.8071334214003,0.216919739696312,7.23992067543114,24.943600867679,19.323455317279723,8.87533875338753,22.9336043360434,33.6720867208672,0.0,4.83394851448634,22.8098039215686,15.520797041058843,18.028948984184574
5
+ ilyagusev/saiga_llama3,32.87438381778,35.5430902600714,49.7790243073262,6.41447368421053,20.8053473204584,38.7894736842105,30.700965512342844,33.8441215323646,36.4861294583884,49.4583883751651,9.54446854663774,25.5266537708331,44.7830802603037,33.27380699061544,19.8170731707317,26.1856368563686,45.2235772357724,5.09803921568627,20.0481873440546,39.2372549019608,25.934961454095728,29.969911319018006
6
+ llama2:13b,0.0,24.1543430222675,9.55294917559069,0.0,2.83808861437468,8.72203947368421,7.544570047652848,0.0,15.9313077939234,9.85468956406869,0.0,5.93808858848223,14.4577006507592,7.696964432872253,0.0,11.7547425474255,17.5982384823848,0.196078431372549,7.24939909960657,16.978431372549,8.962814988889738,8.068116489804948
7
+ llama3.1:70b-instruct-q4_0,22.0975692673806,59.9864014958355,65.0263471018188,18.0921052631579,49.4000831972529,65.4835526315789,46.681009826170765,21.558784676354,59.9471598414795,64.2536327608983,12.5813449023861,52.9417050541935,65.1952277657267,46.079642500173016,21.1382113821138,51.219512195122,57.1815718157182,11.3725490196078,44.6480415518083,60.621568627451,41.03024243197018,44.59696491943799
8
+ llama3.1:8b-instruct-q4_0,4.11354750977393,29.3047764745878,38.2457929627741,0.657894736842105,16.8408300317768,39.21875,21.39693195262579,4.38573315719947,31.7569352708058,41.889035667107,0.216919739696312,23.6358923040094,41.9913232104121,23.979306558205014,2.710027100271,18.5636856368564,43.4959349593496,0.392156862745098,18.0908295632752,37.9745098039216,20.20452398773648,21.860254166189094
9
+ llama3:70b-instruct-q4_0,53.7650858405575,62.0941696413395,63.9129695733469,13.1578947368421,44.7874198858453,60.8569078947368,49.762407928778025,51.7569352708058,61.9286657859974,60.5019815059445,7.15835140997831,47.0840736221898,61.0976138828633,48.25460357962985,39.5325203252033,56.3008130081301,54.0650406504065,9.80392156862745,42.8576254770552,59.5039215686275,43.677307099675005,47.23143953602762
10
+ llama3:8b-instruct-q4_0,14.4144144144144,32.296447390787,47.5437701852796,0.164473684210526,14.8289590829379,32.9309210526316,23.696497635043503,13.5799207397622,33.8705416116248,49.0885072655218,0.0,21.1064741325482,39.5336225596529,26.196511051518314,6.30081300813008,19.579945799458,44.2581300813008,0.0,16.2142171491163,34.2176470588235,20.095125516138115,23.329378067566648
11
+ mistral:7b-instruct-v0.3-q4_0,23.6613972463029,28.0469148393677,45.3340132585416,1.97368421052632,11.6339867326757,25.5740131578947,22.704001574218154,24.4121532364597,31.8361955085865,46.4861294583884,2.16919739696312,11.9370897080079,26.0325379609544,23.81221721156,12.7032520325203,16.8021680216802,41.8529810298103,3.33333333333333,11.2589813397905,26.4862745098039,18.739498377823086,21.751905721200412
12
+ mixtral:8x7b-instruct-v0.1-q4_0,5.16743158252592,43.8551759306476,52.0652728199898,1.15131578947368,6.41044460793093,11.1595394736842,19.968196700708692,3.93659180977543,44.8348745046235,54.8612945838837,0.0,4.81190475632898,9.01301518438178,19.57628013983223,1.6260162601626,32.079945799458,49.1531165311653,0.588235294117647,4.08394916230811,9.03137254901961,16.09377259937188,18.5460831466376
13
+ phi3:14b-medium-4k-instruct-q4_0,29.5427502974673,38.6367499575047,60.2583715791263,5.09868421052632,17.0255729274497,30.3042763157895,30.14440088131064,28.1638044914135,41.5852047556143,58.5204755614267,6.941431670282,19.6178110192585,35.4880694143167,31.719466152051954,16.6327913279133,22.5609756097561,52.4220867208672,3.92156862745098,14.1475921869919,29.7803921568627,23.24423443830703,28.36936715722321
14
+ qwen2:72b-instruct-q4_0,72.9389767125616,78.7013428522863,78.2168961414244,10.8552631578947,44.3525005243567,58.4539473684211,57.25315445949081,67.6882430647292,74.7424042272127,73.0515191545575,5.85683297180043,45.8644120654459,58.1301518438178,54.22226055459392,56.4363143631436,67.2086720867209,67.8184281842818,9.41176470588235,38.8817905312052,54.5607843137255,49.05295903082655,53.509458014970434
15
+ qwen2:7b-instruct-q4_0,40.3705592384838,41.9683834778174,54.3770185279619,3.78289473684211,12.2433821762303,34.2121710526316,31.159068201661185,38.1770145310436,40.105680317041,53.553500660502,4.55531453362256,12.2532095018336,33.5726681127983,30.369564609473503,23.5772357723577,25.8807588075881,50.9993224932249,2.94117647058823,9.74641760239777,30.3843137254902,23.921537478607817,28.483390096580838
16
+ qwen:7b,19.496855345912,19.9898011218766,29.7977222505524,0.0,2.53149921250892,16.6134868421053,14.738227462159204,20.2113606340819,20.5284015852048,29.8546895640687,0.0,2.9492578357003,18.6898047722343,15.372252398548332,10.6029810298103,11.0772357723577,27.10027100271,0.0,1.87065931868442,16.2372549019608,11.148067004253868,13.752848954987138
17
+ solar:10.7b-instruct-v1-q4_0,22.7265000849906,31.7525072242053,48.6996430392657,2.96052631578947,12.611043103768,25.0707236842105,23.97015724203826,26.1558784676354,34.7159841479524,50.1453104359313,4.12147505422994,11.1460215172048,23.6637744034707,24.991407337737424,11.6531165311653,18.3265582655827,45.0033875338753,3.33333333333333,10.2843989247715,20.5039215686274,18.184119359559254,22.38189464644498
18
+ wavecut/vikhr:7b-instruct_0.4-Q4_1,16.9471358150603,20.9246982831888,31.2850586435492,9.70394736842105,18.138570599769,32.7384868421053,21.62298292534894,18.6261558784676,23.7252311756935,34.1479524438573,9.76138828633406,20.2412450317039,35.527114967462,23.671514630586397,9.72222222222222,12.8048780487805,32.6388888888889,10.5882352941176,19.2141238256413,35.043137254902,20.001914255758752,21.76547060389803
19
+ yi:6b,5.9323474417814,25.922148563658,19.8368179500255,0.164473684210526,1.66768509191218,8.45559210526316,10.329844139475128,7.84676354029062,27.2655217965654,21.664464993395,0.433839479392625,2.71018597592006,11.409978308026,11.888459015598285,3.89566395663957,18.360433604336,20.0542005420054,0.0,2.66425497076314,11.3882352941176,9.393798061310283,10.5373670721279
20
+ yi:9b,15.4682984871664,26.6020737718851,33.2313445520993,0.986842105263158,3.19675483677559,14.8717105263158,15.726170713250893,14.7952443857332,29.4848084544254,34.6235138705416,0.433839479392625,3.52530523130186,14.0694143167028,16.155354289682915,10.1964769647696,23.0691056910569,32.8590785907859,0.392156862745098,3.71036337636581,16.4176470588235,14.440804757424466,15.440776586786093
21
+ GigaChat_Lite,60.2159468438538,62.4584717607973,65.0332225913621,33.0357142857143,40.9633292643836,60.4375,53.69069745768518,54.6666666666667,56.6666666666667,60.1333333333333,34.5238095238095,42.2388149962259,62.6666666666667,51.815992975561464,41.8487394957983,45.2100840336134,52.0168067226891,30.5263157894737,38.0252714022117,53.821052631579,43.57471167922753,49.693800704158065
22
+ GigaChat_Plus,60.2159468438538,62.4584717607973,65.0332225913621,32.1428571428571,40.6589114935278,58.9017857142857,53.235199257780636,54.6666666666667,56.6666666666667,60.1333333333333,34.5238095238095,41.8542728148981,62.8809523809524,51.787616897721115,41.8487394957983,45.2100840336134,52.0168067226891,32.6315789473684,38.1823841355402,55.3263157894737,44.202651520747175,49.74182255874965
23
+ GigaChat_Pro,69.2691029900332,70.1827242524917,72.7159468438538,43.75,51.2258923889299,67.6160714285714,62.45995631731333,65.0666666666667,67.0666666666667,69.8,41.6666666666667,48.8871336719648,67.2976190476191,59.964125453263996,51.4285714285714,54.453781512605,58.9075630252101,36.8421052631579,42.3231189954232,58.3684210526316,50.3872602129332,57.60378066117019
24
+ yandexgpt_lite,44.9335548172757,45.8471760797342,60.6727574750831,30.3571428571429,54.3516030514378,67.375,50.58953904677895,45.8666666666667,46.2666666666667,60.4,33.3333333333333,50.0530790747819,66.6309523809524,50.4251163537335,23.5294117647059,24.7058823529412,50.2521008403361,32.6315789473684,44.307314479042,60.5578947368421,39.33069718687261,46.78178419579502
25
+ yandexgpt_pro,69.3521594684385,70.0996677740864,76.7441860465116,8.92857142857143,54.2402693529171,66.5,57.64414234508751,60.2666666666667,60.6666666666667,70.1333333333333,27.3809523809524,53.1044518132574,63.5595238095238,55.851932445066716,44.2016806722689,45.7142857142857,60.8403361344538,12.6315789473684,44.5947129451386,60.5157894736842,44.749730647866606,52.748601812673606
26
+ random,15.4513003569607,16.8111507734149,25.5141934387217,,,,19.258881523032432,14.3196829590489,15.4821664464993,23.3157199471598,,,,17.705856450902665,10.0271002710027,11.4837398373984,25.0338753387534,,,,15.5149051490515,17.49321437432887
27
+ Среднее значение,31.34491803908213,42.219864884630134,49.256112126669386,10.547854010025063,23.883922639062664,38.28603735902256,32.490177046821906,29.992271246147073,41.522395420519615,48.51594187582562,11.020297489928728,24.8811270575865,39.48359221671315,32.420821235043306,20.774177313201694,30.260816196397265,44.191019334563094,9.40230478156174,21.178941320910287,36.05854833161335,26.843667353128367,30.584888544997867
setup.cfg ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [flake8]
2
+ max-line-length = 88
streamlit_app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ main_page = st.Page("main.py", title="Главная"
4
+ # , icon=":material/add_circle:"
5
+ )
6
+ Leaderboard_page = st.Page("Leaderboard.py", title="Лидерборд"
7
+ # , icon=":material/delete:"
8
+ )
9
+
10
+ pg = st.navigation(
11
+ {
12
+ "Main": [main_page, Leaderboard_page],
13
+ # "Reports": []
14
+ }
15
+ )
16
+ st.set_page_config(page_title="Leaderboard"
17
+ # , page_icon=":material/edit:"
18
+ )
19
+ pg.run()
20
+
21
+ # logo = '/Users/y1ov/Work/streamlits/senej/files/beta-1.png'
22
+
23
+ # st.logo(logo, icon_image=logo)
vidvopr.csv ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,NUM_Q_multich_EM,NUM_Q_multich_CC,NUM_Q_multich_PM,NUM_Q_onech_EM,NUM_Q_onech_CC,NUM_Q_onech_PM,NUM_Q_seq_EM,NUM_Q_seq_CC,NUM_Q_seq_PM,NUM_Q_map_EM,NUM_Q_map_CC,NUM_Q_map_PM,OPEN_Q_EM,OPEN_Q_F1,OPEN_Q_LR,Q_TYPE_avg
2
+ gemma2:27b-instruct-q4_0,1.44711640774633,6.89508406043839,36.7950627793147,22.1528861154446,63.0265210608424,46.628531807939,2.53968253968254,8.57142857142857,8.57142857142857,0.0,0.0828500414250207,0.787075393537697,1.64661177960735,8.23779148465489,15.1519949335022,14.835604369799482
3
+ gemma2:9b-instruct-q4_0,40.6469461587572,52.6282187699511,63.8540114918068,76.5817299358641,79.0084936730803,76.5990639625585,29.3121693121693,30.4761904761905,35.026455026455,4.30820215410108,4.88815244407622,9.48632974316487,24.4458518049398,32.0378038500143,49.7612412919569,40.604057339672394
4
+ gemma:7b-instruct-v1.1-q4_0,2.40476697169611,12.1302404766972,30.9640349010428,27.2837580169873,53.5794765123938,53.4408042988386,4.97354497354497,11.7460317460317,5.60846560846561,0.828500414250207,0.828500414250207,2.52692626346313,0.0633312222925902,6.03356941864105,23.6782773907536,15.739348575289926
5
+ ilyagusev/saiga_llama3,2.08555011704618,10.0021281123643,47.3824217918706,63.0438550875368,64.3092390362281,64.2052348760617,6.87830687830688,6.87830687830688,18.0952380952381,0.0,0.0,2.8169014084507,6.90310322989234,21.9392109592531,40.68397720076,23.681564911421045
6
+ llama2:13b,0.0,0.0425622472866567,28.2400510746967,0.0,41.0469752123418,1.73340266944011,0.0,0.105820105820106,2.75132275132275,0.0,0.0,0.124275062137531,0.0633312222925902,5.16796089780207,13.0633312222926,6.155935497695527
7
+ llama3.1:70b-instruct-q4_0,26.4311555650138,50.3298574164716,61.5450095765056,24.284971398856,82.6833073322933,81.1232449297972,8.35978835978836,15.6613756613757,26.5608465608466,1.49130074565037,2.23695111847556,10.149130074565,14.3128562381254,48.89922596918,63.82900569981,34.52653510978363
8
+ llama3.1:8b-instruct-q4_0,0.0638433709299851,0.617152585656523,46.7439880825708,8.40700294678454,59.4037094817126,48.6566129311839,0.0,1.90476190476191,6.34920634920635,0.0,0.0,4.63960231980116,0.443318556048132,19.2284319751355,39.6263457884737,15.738931752817674
9
+ llama3:70b-instruct-q4_0,33.4964886145989,59.8638008086827,59.1828048520962,78.1417923383602,79.9791991679667,78.1591263650546,16.6137566137566,17.8835978835979,28.4126984126984,4.14250207125104,5.2195526097763,7.24937862468931,10.3229892336922,44.8346410536552,60.4901836605447,38.93283415402807
10
+ llama3:8b-instruct-q4_0,0.319216854649925,1.00021281123643,47.4037029155139,26.4863927890449,64.1012307158953,63.5812099150633,0.529100529100529,1.48148148148148,4.07407407407407,0.0,0.0828500414250207,2.27837613918807,0.0633312222925902,17.1091465760483,35.2742241925269,17.58563668383605
11
+ mistral:7b-instruct-v0.3-q4_0,0.0,0.0212811236433284,44.5520323473079,46.6458658346334,58.0689894262437,56.612931183914,0.0,0.0,26.984126984127,0.0,0.0,3.93537696768848,2.46991766941102,11.6013570437946,26.0025332488917,18.45962745531034
12
+ mixtral:8x7b-instruct-v0.1-q4_0,0.0,11.8110236220472,51.0534156203448,8.66701334720055,77.8124458311666,67.2733576009707,0.105820105820106,16.9312169312169,19.4708994708995,0.0,1.65700082850041,10.4805302402651,0.633312222925902,5.19230683158126,9.84547181760608,18.72892096470301
13
+ phi3:14b-medium-4k-instruct-q4_0,0.0425622472866567,0.148967865503299,57.1398169823367,56.6302652106084,77.2404229502513,72.2828913156526,2.75132275132275,4.86772486772487,36.2962962962963,0.0,0.331400165700083,9.07207953603977,5.25649145028499,16.8528380209838,31.6485117162761,24.704106091751175
14
+ qwen2:72b-instruct-q4_0,55.5862949563737,69.9723345392637,71.8769951053416,85.6647599237303,89.3222395562489,85.9594383775351,62.010582010582,62.1164021164021,62.1164021164021,31.4001657000828,34.3827671913836,36.8682684341342,8.92970234325522,43.0269331550944,57.1019632678911,57.08901658624805
15
+ qwen2:7b-instruct-q4_0,10.3851883379442,13.024047669717,54.7137688869972,66.2159819726122,67.8973825619691,67.0653492806379,19.5767195767196,20.5291005291005,21.2698412698413,1.90555095277548,2.31980115990058,7.49792874896438,3.73654211526282,11.4397586578317,32.7891070297657,26.691071250002647
16
+ qwen:7b,0.0,0.0212811236433284,30.0915088316663,37.0948171260184,37.857514300572,37.0948171260184,8.99470899470899,10.0529100529101,11.957671957672,0.0,0.0,1.32560066280033,0.0,2.44002256870951,17.0981633945535,12.935267742618189
17
+ solar:10.7b-instruct-v1-q4_0,0.0,0.25537348371994,48.669929772292,46.2991853007454,64.2745709828393,62.2811579129832,0.0,0.211640211640212,18.2539682539683,0.0,0.0828500414250207,3.23115161557581,3.41988600379987,11.4318388715363,23.1849271690944,18.773098641307982
18
+ wavecut/vikhr:7b-instruct_0.4-Q4_1,0.0,0.0212811236433284,36.1247073845499,34.3213728549142,42.9883862021148,40.4402842780378,0.952380952380952,2.75132275132275,6.24338624338624,0.0,0.0,0.579950289975145,10.0063331222293,19.0998530939532,34.2970234325522,15.188418781937322
19
+ yi:6b,0.595871462013194,8.93807193019791,21.5896999361566,12.619171433524,44.6697867914717,26.7464031894609,0.317460317460317,9.73544973544974,1.74603174603175,0.165700082850041,0.828500414250207,0.414250207125104,0.189993666877771,2.29393179599174,10.265357821406,9.40771203535113
20
+ yi:9b,4.08597573951905,11.5130878910406,34.8691210895935,25.8970358814353,47.1658866354654,42.6417056682267,8.99470899470899,10.2645502645503,12.6984126984127,0.0,0.248550124275062,1.40845070422535,0.633312222925902,3.4585674314986,15.136795440152,14.601077385735296
21
+ GigaChat_Lite,51.19825708061,55.5555555555556,68.0283224400871,69.2691029900332,71.0963455149502,69.2691029900332,34.020618556701,34.020618556701,34.020618556701,6.00858369098712,6.00858369098712,8.15450643776824,32.6460481099656,40.3723509295672,58.9209621993127,42.57263848666402
22
+ GigaChat_Plus,51.19825708061,55.5555555555556,68.0283224400871,69.2691029900332,71.0963455149502,69.2691029900332,34.020618556701,34.020618556701,34.020618556701,6.00858369098712,6.00858369098712,8.15450643776824,32.9896907216495,40.1954759333432,58.8831615120275,42.58123628187567
23
+ GigaChat_Pro,63.2897603485839,66.2309368191721,75.4357298474945,76.3289036544851,77.7408637873754,76.3289036544851,52.0618556701031,52.0618556701031,52.0618556701031,11.587982832618,11.587982832618,15.8798283261803,40.893470790378,47.6443830947436,64.5051546391753,52.24263117584123
24
+ yandexgpt_lite,7.29847494553377,8.06100217864924,54.0849673202614,76.1627906976744,77.3255813953489,76.6611295681063,19.5876288659794,19.5876288659794,24.4845360824742,1.28755364806867,1.28755364806867,6.65236051502146,31.9587628865979,49.8317287269818,64.9347079037801,34.61376048323505
25
+ yandexgpt_pro,47.2766884531591,49.3464052287582,75.0544662309368,84.8837209302326,85.0498338870432,84.8837209302326,41.7525773195876,41.7525773195876,43.298969072165,5.5793991416309,5.5793991416309,7.29613733905579,15.4639175257732,50.7635115107509,63.6975945017182,46.77859456881752
26
+ random,4.04341349223239,7.59736114066823,32.698446477974,24.5103137458832,24.5103137458832,24.5103137458832,14.0740740740741,14.0740740740741,14.0740740740741,0.828500414250207,0.828500414250207,3.23115161557581,,,,13.748378084568563
27
+ Среднее значение,16.075833128172178,22.063312965582536,49.84489352715383,45.87447170090569,64.05020245106593,58.937913662725904,14.737097038127963,17.107467408498337,22.177897779959643,3.0217010215801214,3.379613200536212,6.569602924286439,10.312171056688333,23.297193327114417,37.91125068645098,26.276640176412442