Upload /Llama-2-13b-hf/nf4_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json with huggingface_hub
Browse files
Llama-2-13b-hf/nf4_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json
ADDED
@@ -0,0 +1,436 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"prefill": {
|
3 |
+
"memory": {
|
4 |
+
"unit": "MB",
|
5 |
+
"max_ram": 1027.62496,
|
6 |
+
"max_vram": 8388.608,
|
7 |
+
"max_reserved": 7904.165888,
|
8 |
+
"max_allocated": 7809.967616
|
9 |
+
},
|
10 |
+
"latency": {
|
11 |
+
"unit": "s",
|
12 |
+
"mean": 0.08499001525620284,
|
13 |
+
"stdev": 0.0006577089673895364,
|
14 |
+
"values": [
|
15 |
+
0.09199440002441406,
|
16 |
+
0.08446463775634766,
|
17 |
+
0.08470425415039062,
|
18 |
+
0.08481075286865235,
|
19 |
+
0.08479334259033203,
|
20 |
+
0.0848322525024414,
|
21 |
+
0.08471043395996093,
|
22 |
+
0.08478514862060547,
|
23 |
+
0.0848394546508789,
|
24 |
+
0.0848322525024414,
|
25 |
+
0.08482918548583984,
|
26 |
+
0.08484265899658203,
|
27 |
+
0.08501042938232421,
|
28 |
+
0.08475443267822266,
|
29 |
+
0.08487935638427735,
|
30 |
+
0.08489266967773437,
|
31 |
+
0.08490086364746094,
|
32 |
+
0.08497058868408203,
|
33 |
+
0.08493260955810547,
|
34 |
+
0.08479027557373046,
|
35 |
+
0.08499199676513672,
|
36 |
+
0.0848875503540039,
|
37 |
+
0.08490188598632813,
|
38 |
+
0.08481491088867188,
|
39 |
+
0.08488345336914062,
|
40 |
+
0.08483123016357422,
|
41 |
+
0.08486809539794922,
|
42 |
+
0.08493260955810547,
|
43 |
+
0.08497977447509765,
|
44 |
+
0.08486605072021484,
|
45 |
+
0.08485273742675781,
|
46 |
+
0.08489369964599609,
|
47 |
+
0.08506060791015625,
|
48 |
+
0.08489778900146484,
|
49 |
+
0.08489881896972656,
|
50 |
+
0.08495001220703124,
|
51 |
+
0.08482816314697265,
|
52 |
+
0.08495820617675781,
|
53 |
+
0.08485990142822265,
|
54 |
+
0.08498381042480468,
|
55 |
+
0.08494080352783204,
|
56 |
+
0.08497869110107421,
|
57 |
+
0.08484146881103516,
|
58 |
+
0.08503091430664063,
|
59 |
+
0.08483737945556641,
|
60 |
+
0.08495718383789062,
|
61 |
+
0.08483020782470703,
|
62 |
+
0.08491315460205077,
|
63 |
+
0.08504118347167969,
|
64 |
+
0.0849438705444336,
|
65 |
+
0.08483942413330078,
|
66 |
+
0.08503705596923829,
|
67 |
+
0.08483737945556641,
|
68 |
+
0.0850544662475586,
|
69 |
+
0.08497663879394532,
|
70 |
+
0.08501760101318359,
|
71 |
+
0.08497459411621094,
|
72 |
+
0.08493772888183594,
|
73 |
+
0.08489984130859375,
|
74 |
+
0.084927490234375,
|
75 |
+
0.0850165786743164,
|
76 |
+
0.08489881896972656,
|
77 |
+
0.08491136169433594,
|
78 |
+
0.08494284820556641,
|
79 |
+
0.08493670654296875,
|
80 |
+
0.08495001220703124,
|
81 |
+
0.08489984130859375,
|
82 |
+
0.08492237091064453,
|
83 |
+
0.0849244155883789,
|
84 |
+
0.0849459228515625,
|
85 |
+
0.08482099151611328,
|
86 |
+
0.08495820617675781,
|
87 |
+
0.08482713317871093,
|
88 |
+
0.08492237091064453,
|
89 |
+
0.08488038635253906,
|
90 |
+
0.08493977355957032,
|
91 |
+
0.08481279754638672,
|
92 |
+
0.084959228515625,
|
93 |
+
0.08497869110107421,
|
94 |
+
0.0849438705444336,
|
95 |
+
0.08495206451416015,
|
96 |
+
0.08507698822021484,
|
97 |
+
0.08495104217529297,
|
98 |
+
0.08496025848388672,
|
99 |
+
0.08492339324951172,
|
100 |
+
0.08519987487792968,
|
101 |
+
0.0849991683959961,
|
102 |
+
0.08462335968017579,
|
103 |
+
0.08489472198486328,
|
104 |
+
0.08511590576171875,
|
105 |
+
0.08520499420166015,
|
106 |
+
0.08498483276367187,
|
107 |
+
0.08503705596923829,
|
108 |
+
0.08517324829101562,
|
109 |
+
0.08496844482421875,
|
110 |
+
0.0850708465576172,
|
111 |
+
0.08506572723388672,
|
112 |
+
0.08485382080078124,
|
113 |
+
0.08492953491210938,
|
114 |
+
0.08516607666015626,
|
115 |
+
0.08506777954101563,
|
116 |
+
0.08492953491210938,
|
117 |
+
0.085032958984375,
|
118 |
+
0.08507596588134765,
|
119 |
+
0.08501248168945312,
|
120 |
+
0.08466329956054687,
|
121 |
+
0.0849776611328125,
|
122 |
+
0.08511795043945312,
|
123 |
+
0.08499199676513672,
|
124 |
+
0.08513024139404297,
|
125 |
+
0.08496742248535157,
|
126 |
+
0.08465408325195313,
|
127 |
+
0.08504335784912109,
|
128 |
+
0.08500838470458984,
|
129 |
+
0.085106689453125,
|
130 |
+
0.08504422760009765,
|
131 |
+
0.08494796752929687,
|
132 |
+
0.08497772979736327
|
133 |
+
]
|
134 |
+
},
|
135 |
+
"throughput": {
|
136 |
+
"unit": "tokens/s",
|
137 |
+
"value": 3012.1185321391777
|
138 |
+
},
|
139 |
+
"energy": null,
|
140 |
+
"efficiency": null
|
141 |
+
},
|
142 |
+
"decode": {
|
143 |
+
"memory": {
|
144 |
+
"unit": "MB",
|
145 |
+
"max_ram": 1047.547904,
|
146 |
+
"max_vram": 9154.06848,
|
147 |
+
"max_reserved": 8669.626368,
|
148 |
+
"max_allocated": 8519.06048
|
149 |
+
},
|
150 |
+
"latency": {
|
151 |
+
"unit": "s",
|
152 |
+
"mean": 13.418581016540532,
|
153 |
+
"stdev": 0,
|
154 |
+
"values": [
|
155 |
+
13.418581016540532
|
156 |
+
]
|
157 |
+
},
|
158 |
+
"throughput": {
|
159 |
+
"unit": "tokens/s",
|
160 |
+
"value": 19.003499675984518
|
161 |
+
},
|
162 |
+
"energy": null,
|
163 |
+
"efficiency": null
|
164 |
+
},
|
165 |
+
"per_token": {
|
166 |
+
"memory": null,
|
167 |
+
"latency": {
|
168 |
+
"unit": "s",
|
169 |
+
"mean": 0.05262188633937464,
|
170 |
+
"stdev": 0.00026683280808433463,
|
171 |
+
"values": [
|
172 |
+
0.053348350524902347,
|
173 |
+
0.05422489547729492,
|
174 |
+
0.052789249420166016,
|
175 |
+
0.05268070220947266,
|
176 |
+
0.052657150268554685,
|
177 |
+
0.05313228988647461,
|
178 |
+
0.0530145263671875,
|
179 |
+
0.05296537780761719,
|
180 |
+
0.05248742294311524,
|
181 |
+
0.053021438598632814,
|
182 |
+
0.05272576141357422,
|
183 |
+
0.052613121032714844,
|
184 |
+
0.052514816284179686,
|
185 |
+
0.05322444915771484,
|
186 |
+
0.05298688125610351,
|
187 |
+
0.05310464096069336,
|
188 |
+
0.052719615936279295,
|
189 |
+
0.05271756744384765,
|
190 |
+
0.052585472106933595,
|
191 |
+
0.053035102844238284,
|
192 |
+
0.05290588760375976,
|
193 |
+
0.05288550567626953,
|
194 |
+
0.05284864044189453,
|
195 |
+
0.052999168395996096,
|
196 |
+
0.052924415588378904,
|
197 |
+
0.05297049713134765,
|
198 |
+
0.052768768310546874,
|
199 |
+
0.052999168395996096,
|
200 |
+
0.053043201446533204,
|
201 |
+
0.05285478210449219,
|
202 |
+
0.0528271369934082,
|
203 |
+
0.05281382369995117,
|
204 |
+
0.05244825744628906,
|
205 |
+
0.0529172477722168,
|
206 |
+
0.05242060852050781,
|
207 |
+
0.052894718170166014,
|
208 |
+
0.052225025177001956,
|
209 |
+
0.052590591430664066,
|
210 |
+
0.052651008605957034,
|
211 |
+
0.05304115295410156,
|
212 |
+
0.05237452697753906,
|
213 |
+
0.052560897827148435,
|
214 |
+
0.052348926544189454,
|
215 |
+
0.0524769287109375,
|
216 |
+
0.05240524673461914,
|
217 |
+
0.0524769287109375,
|
218 |
+
0.05231001663208008,
|
219 |
+
0.05321011352539062,
|
220 |
+
0.05239910507202149,
|
221 |
+
0.052413440704345705,
|
222 |
+
0.05231820678710938,
|
223 |
+
0.05238476943969726,
|
224 |
+
0.052367359161376956,
|
225 |
+
0.052348926544189454,
|
226 |
+
0.05230182266235352,
|
227 |
+
0.05247078323364258,
|
228 |
+
0.052416511535644535,
|
229 |
+
0.05249331283569336,
|
230 |
+
0.05228339385986328,
|
231 |
+
0.05250764846801758,
|
232 |
+
0.05221068954467773,
|
233 |
+
0.052397056579589846,
|
234 |
+
0.05235507202148437,
|
235 |
+
0.05236838531494141,
|
236 |
+
0.05270220947265625,
|
237 |
+
0.05275878524780273,
|
238 |
+
0.05290367889404297,
|
239 |
+
0.05285580825805664,
|
240 |
+
0.05297663879394531,
|
241 |
+
0.05273190307617188,
|
242 |
+
0.05243392181396484,
|
243 |
+
0.05254963302612305,
|
244 |
+
0.05263273620605469,
|
245 |
+
0.052557662963867186,
|
246 |
+
0.05233561706542969,
|
247 |
+
0.052523006439208986,
|
248 |
+
0.05245337677001953,
|
249 |
+
0.053085182189941404,
|
250 |
+
0.05238272094726563,
|
251 |
+
0.052332542419433595,
|
252 |
+
0.05235302352905274,
|
253 |
+
0.05239091110229492,
|
254 |
+
0.05241753768920898,
|
255 |
+
0.05245132827758789,
|
256 |
+
0.05254963302612305,
|
257 |
+
0.052542465209960934,
|
258 |
+
0.05229056167602539,
|
259 |
+
0.05230284881591797,
|
260 |
+
0.05267148971557617,
|
261 |
+
0.052450302124023435,
|
262 |
+
0.052450302124023435,
|
263 |
+
0.05290291213989258,
|
264 |
+
0.05248921585083008,
|
265 |
+
0.05239910507202149,
|
266 |
+
0.052563968658447265,
|
267 |
+
0.05244518280029297,
|
268 |
+
0.052222976684570314,
|
269 |
+
0.05247385787963867,
|
270 |
+
0.052222976684570314,
|
271 |
+
0.05233356857299805,
|
272 |
+
0.05224038314819336,
|
273 |
+
0.0524400634765625,
|
274 |
+
0.05228543853759766,
|
275 |
+
0.0524031982421875,
|
276 |
+
0.05246976089477539,
|
277 |
+
0.05241446304321289,
|
278 |
+
0.05255782318115235,
|
279 |
+
0.05258342361450195,
|
280 |
+
0.05269504165649414,
|
281 |
+
0.0530513916015625,
|
282 |
+
0.052391937255859375,
|
283 |
+
0.05247180938720703,
|
284 |
+
0.05224060821533203,
|
285 |
+
0.052373279571533204,
|
286 |
+
0.05218304061889648,
|
287 |
+
0.05248921585083008,
|
288 |
+
0.052348926544189454,
|
289 |
+
0.05240627288818359,
|
290 |
+
0.05228236770629883,
|
291 |
+
0.05261209487915039,
|
292 |
+
0.05256294250488281,
|
293 |
+
0.052404254913330076,
|
294 |
+
0.052560863494873045,
|
295 |
+
0.05250867080688477,
|
296 |
+
0.052542465209960934,
|
297 |
+
0.05257318496704102,
|
298 |
+
0.05251583862304687,
|
299 |
+
0.05293260955810547,
|
300 |
+
0.052604927062988284,
|
301 |
+
0.052362239837646485,
|
302 |
+
0.05253836822509766,
|
303 |
+
0.0522874870300293,
|
304 |
+
0.05241548919677735,
|
305 |
+
0.05240627288818359,
|
306 |
+
0.052574207305908206,
|
307 |
+
0.05258444976806641,
|
308 |
+
0.05254143905639649,
|
309 |
+
0.05234175872802734,
|
310 |
+
0.05255680084228516,
|
311 |
+
0.05239295959472656,
|
312 |
+
0.052517887115478515,
|
313 |
+
0.05240115356445312,
|
314 |
+
0.052528129577636716,
|
315 |
+
0.052534271240234375,
|
316 |
+
0.05262540817260742,
|
317 |
+
0.05255782318115235,
|
318 |
+
0.0525219841003418,
|
319 |
+
0.05320806503295898,
|
320 |
+
0.05276979064941406,
|
321 |
+
0.05262745666503906,
|
322 |
+
0.05255168151855469,
|
323 |
+
0.05274214553833008,
|
324 |
+
0.05263974380493164,
|
325 |
+
0.0526376953125,
|
326 |
+
0.05359308624267578,
|
327 |
+
0.05255168151855469,
|
328 |
+
0.0525035514831543,
|
329 |
+
0.05252710342407227,
|
330 |
+
0.052536319732666016,
|
331 |
+
0.053085182189941404,
|
332 |
+
0.05253836822509766,
|
333 |
+
0.052468734741210936,
|
334 |
+
0.052536319732666016,
|
335 |
+
0.052569087982177735,
|
336 |
+
0.052664321899414064,
|
337 |
+
0.05241753768920898,
|
338 |
+
0.05253529739379883,
|
339 |
+
0.05260800170898437,
|
340 |
+
0.052724735260009765,
|
341 |
+
0.05242777633666992,
|
342 |
+
0.05263257598876953,
|
343 |
+
0.052495361328125,
|
344 |
+
0.05301964950561523,
|
345 |
+
0.052506622314453126,
|
346 |
+
0.05253017425537109,
|
347 |
+
0.05266124725341797,
|
348 |
+
0.052572158813476565,
|
349 |
+
0.052519935607910156,
|
350 |
+
0.05296025466918945,
|
351 |
+
0.05283942413330078,
|
352 |
+
0.05312614440917969,
|
353 |
+
0.052915199279785156,
|
354 |
+
0.053136383056640625,
|
355 |
+
0.053106689453125,
|
356 |
+
0.053114879608154295,
|
357 |
+
0.052657150268554685,
|
358 |
+
0.052370433807373044,
|
359 |
+
0.05226598358154297,
|
360 |
+
0.05279334259033203,
|
361 |
+
0.05293056106567383,
|
362 |
+
0.05275033569335937,
|
363 |
+
0.05258956909179688,
|
364 |
+
0.0526376953125,
|
365 |
+
0.05243904113769531,
|
366 |
+
0.05244211196899414,
|
367 |
+
0.052462593078613284,
|
368 |
+
0.05253017425537109,
|
369 |
+
0.05247795104980469,
|
370 |
+
0.05259366226196289,
|
371 |
+
0.05291417694091797,
|
372 |
+
0.053028865814208986,
|
373 |
+
0.0528089599609375,
|
374 |
+
0.05312691116333008,
|
375 |
+
0.05296844863891602,
|
376 |
+
0.052822017669677736,
|
377 |
+
0.05237760162353516,
|
378 |
+
0.05276774215698242,
|
379 |
+
0.05264691162109375,
|
380 |
+
0.052857856750488284,
|
381 |
+
0.05291417694091797,
|
382 |
+
0.053075969696044924,
|
383 |
+
0.05271551895141602,
|
384 |
+
0.05274214553833008,
|
385 |
+
0.05231513595581055,
|
386 |
+
0.05265919876098633,
|
387 |
+
0.052646144866943356,
|
388 |
+
0.05264972686767578,
|
389 |
+
0.05254553604125976,
|
390 |
+
0.05283020782470703,
|
391 |
+
0.05274316787719727,
|
392 |
+
0.05293772888183594,
|
393 |
+
0.052798465728759764,
|
394 |
+
0.052732929229736325,
|
395 |
+
0.05251891326904297,
|
396 |
+
0.05248921585083008,
|
397 |
+
0.05244927978515625,
|
398 |
+
0.05255475234985352,
|
399 |
+
0.05243596649169922,
|
400 |
+
0.05287526321411133,
|
401 |
+
0.05244211196899414,
|
402 |
+
0.052393985748291017,
|
403 |
+
0.052746238708496096,
|
404 |
+
0.052792320251464846,
|
405 |
+
0.05251379013061523,
|
406 |
+
0.052517887115478515,
|
407 |
+
0.05240729522705078,
|
408 |
+
0.052457473754882813,
|
409 |
+
0.05248819351196289,
|
410 |
+
0.052653057098388675,
|
411 |
+
0.052653057098388675,
|
412 |
+
0.05263872146606445,
|
413 |
+
0.05240627288818359,
|
414 |
+
0.052959232330322265,
|
415 |
+
0.05240422439575195,
|
416 |
+
0.05255475234985352,
|
417 |
+
0.05234175872802734,
|
418 |
+
0.05257523345947265,
|
419 |
+
0.05258137512207031,
|
420 |
+
0.053031936645507816,
|
421 |
+
0.05250867080688477,
|
422 |
+
0.05251686477661133,
|
423 |
+
0.052350975036621096,
|
424 |
+
0.05251686477661133,
|
425 |
+
0.05243084716796875,
|
426 |
+
0.05245542526245117
|
427 |
+
]
|
428 |
+
},
|
429 |
+
"throughput": {
|
430 |
+
"unit": "tokens/s",
|
431 |
+
"value": 19.003499675984518
|
432 |
+
},
|
433 |
+
"energy": null,
|
434 |
+
"efficiency": null
|
435 |
+
}
|
436 |
+
}
|