Upload /Llama-2-13b-hf/int8_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json with huggingface_hub
Browse files
Llama-2-13b-hf/int8_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json
ADDED
@@ -0,0 +1,423 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"prefill": {
|
3 |
+
"memory": {
|
4 |
+
"unit": "MB",
|
5 |
+
"max_ram": 1046.859776,
|
6 |
+
"max_vram": 14501.80608,
|
7 |
+
"max_reserved": 14008.97536,
|
8 |
+
"max_allocated": 13981.414912
|
9 |
+
},
|
10 |
+
"latency": {
|
11 |
+
"unit": "s",
|
12 |
+
"mean": 0.09575294138590493,
|
13 |
+
"stdev": 0.0008248219357816075,
|
14 |
+
"values": [
|
15 |
+
0.09952223968505859,
|
16 |
+
0.09869948577880859,
|
17 |
+
0.09565695953369141,
|
18 |
+
0.09567132568359375,
|
19 |
+
0.09575321960449219,
|
20 |
+
0.09588735961914062,
|
21 |
+
0.09552998352050782,
|
22 |
+
0.09497395324707031,
|
23 |
+
0.09540402984619141,
|
24 |
+
0.09527193450927735,
|
25 |
+
0.09516544342041015,
|
26 |
+
0.0951357421875,
|
27 |
+
0.09496371459960938,
|
28 |
+
0.09510604858398437,
|
29 |
+
0.09679154968261719,
|
30 |
+
0.09677311706542968,
|
31 |
+
0.09672908782958985,
|
32 |
+
0.0964290542602539,
|
33 |
+
0.09504768371582031,
|
34 |
+
0.09620800018310546,
|
35 |
+
0.09670758056640626,
|
36 |
+
0.0963921890258789,
|
37 |
+
0.09663180541992188,
|
38 |
+
0.09656114959716797,
|
39 |
+
0.09652326202392578,
|
40 |
+
0.09665433502197265,
|
41 |
+
0.09676703643798829,
|
42 |
+
0.09653043365478516,
|
43 |
+
0.09650809478759766,
|
44 |
+
0.0963399658203125,
|
45 |
+
0.09526783752441406,
|
46 |
+
0.09497702026367187,
|
47 |
+
0.09502925109863282,
|
48 |
+
0.09495040130615234,
|
49 |
+
0.09510502624511719,
|
50 |
+
0.09555974578857422,
|
51 |
+
0.09519718170166015,
|
52 |
+
0.09469951629638672,
|
53 |
+
0.09558220672607422,
|
54 |
+
0.09630924987792969,
|
55 |
+
0.09517670440673828,
|
56 |
+
0.09525974273681641,
|
57 |
+
0.09543065643310547,
|
58 |
+
0.09506201934814452,
|
59 |
+
0.09494732666015625,
|
60 |
+
0.0950804443359375,
|
61 |
+
0.09498009490966797,
|
62 |
+
0.09463807678222656,
|
63 |
+
0.09491149139404297,
|
64 |
+
0.09515110778808594,
|
65 |
+
0.09475276947021484,
|
66 |
+
0.09490841674804687,
|
67 |
+
0.09588019561767579,
|
68 |
+
0.09580748748779297,
|
69 |
+
0.09576448059082031,
|
70 |
+
0.09666969299316407,
|
71 |
+
0.09620889282226562,
|
72 |
+
0.09653555297851563,
|
73 |
+
0.09517977905273438,
|
74 |
+
0.09491046142578125,
|
75 |
+
0.0958187484741211,
|
76 |
+
0.09542041778564453,
|
77 |
+
0.09506201934814452,
|
78 |
+
0.09479373168945313,
|
79 |
+
0.09611878204345703,
|
80 |
+
0.09629388427734376,
|
81 |
+
0.09504255676269531,
|
82 |
+
0.09572351837158204,
|
83 |
+
0.09772953796386719,
|
84 |
+
0.09577062225341797,
|
85 |
+
0.09536409759521484,
|
86 |
+
0.09602764892578125,
|
87 |
+
0.09525350189208984,
|
88 |
+
0.0956231689453125,
|
89 |
+
0.09545420837402344,
|
90 |
+
0.09503846740722656,
|
91 |
+
0.09511321258544922,
|
92 |
+
0.09527616119384766,
|
93 |
+
0.09509478759765624,
|
94 |
+
0.09510195159912109,
|
95 |
+
0.09491558074951172,
|
96 |
+
0.09529974365234375,
|
97 |
+
0.09507839965820312,
|
98 |
+
0.09522688293457031,
|
99 |
+
0.09527295684814453,
|
100 |
+
0.09521971130371094,
|
101 |
+
0.09525875091552734,
|
102 |
+
0.09534770965576173,
|
103 |
+
0.09599180603027344,
|
104 |
+
0.09676493072509766,
|
105 |
+
0.09682022094726563,
|
106 |
+
0.09654476928710938,
|
107 |
+
0.09654988861083984,
|
108 |
+
0.09670963287353515,
|
109 |
+
0.09662464141845703,
|
110 |
+
0.09609932708740235,
|
111 |
+
0.09530060577392578,
|
112 |
+
0.09516031646728515,
|
113 |
+
0.09538969421386718,
|
114 |
+
0.09525247955322266,
|
115 |
+
0.09534464263916016,
|
116 |
+
0.09669734191894531,
|
117 |
+
0.09644953918457032,
|
118 |
+
0.09678131103515625,
|
119 |
+
0.09656832122802735
|
120 |
+
]
|
121 |
+
},
|
122 |
+
"throughput": {
|
123 |
+
"unit": "tokens/s",
|
124 |
+
"value": 2673.5471129629846
|
125 |
+
},
|
126 |
+
"energy": null,
|
127 |
+
"efficiency": null
|
128 |
+
},
|
129 |
+
"decode": {
|
130 |
+
"memory": {
|
131 |
+
"unit": "MB",
|
132 |
+
"max_ram": 1066.78272,
|
133 |
+
"max_vram": 15428.747264,
|
134 |
+
"max_reserved": 14935.916544,
|
135 |
+
"max_allocated": 14802.13504
|
136 |
+
},
|
137 |
+
"latency": {
|
138 |
+
"unit": "s",
|
139 |
+
"mean": 24.37827899169922,
|
140 |
+
"stdev": 0,
|
141 |
+
"values": [
|
142 |
+
24.37827899169922
|
143 |
+
]
|
144 |
+
},
|
145 |
+
"throughput": {
|
146 |
+
"unit": "tokens/s",
|
147 |
+
"value": 10.460131336048262
|
148 |
+
},
|
149 |
+
"energy": null,
|
150 |
+
"efficiency": null
|
151 |
+
},
|
152 |
+
"per_token": {
|
153 |
+
"memory": null,
|
154 |
+
"latency": {
|
155 |
+
"unit": "s",
|
156 |
+
"mean": 0.09560109408509497,
|
157 |
+
"stdev": 0.0008345143500717126,
|
158 |
+
"values": [
|
159 |
+
0.09789234924316406,
|
160 |
+
0.10317005157470703,
|
161 |
+
0.09503846740722656,
|
162 |
+
0.09514905548095703,
|
163 |
+
0.09555558776855469,
|
164 |
+
0.09526783752441406,
|
165 |
+
0.09490227508544923,
|
166 |
+
0.09493504333496093,
|
167 |
+
0.09535794830322265,
|
168 |
+
0.09534156799316407,
|
169 |
+
0.09575218963623047,
|
170 |
+
0.09676083374023438,
|
171 |
+
0.09579007720947266,
|
172 |
+
0.09530265808105469,
|
173 |
+
0.09519308471679687,
|
174 |
+
0.0955146255493164,
|
175 |
+
0.09568153381347656,
|
176 |
+
0.09708748626708984,
|
177 |
+
0.09749811553955078,
|
178 |
+
0.09656934356689453,
|
179 |
+
0.09681715393066406,
|
180 |
+
0.09518592071533204,
|
181 |
+
0.09517158508300781,
|
182 |
+
0.09610854339599609,
|
183 |
+
0.09673216247558594,
|
184 |
+
0.09689497375488282,
|
185 |
+
0.09666560363769532,
|
186 |
+
0.09705062103271485,
|
187 |
+
0.09702706909179687,
|
188 |
+
0.09676287841796875,
|
189 |
+
0.09680691528320312,
|
190 |
+
0.09631539154052735,
|
191 |
+
0.09600204467773438,
|
192 |
+
0.0950978546142578,
|
193 |
+
0.09581465911865235,
|
194 |
+
0.09653555297851563,
|
195 |
+
0.09607987213134765,
|
196 |
+
0.09519923400878906,
|
197 |
+
0.09509069061279297,
|
198 |
+
0.09468621063232421,
|
199 |
+
0.09568256378173828,
|
200 |
+
0.09539993286132813,
|
201 |
+
0.09614643096923828,
|
202 |
+
0.09663897705078126,
|
203 |
+
0.09697280120849609,
|
204 |
+
0.09549311828613281,
|
205 |
+
0.09523200225830078,
|
206 |
+
0.09517874908447266,
|
207 |
+
0.09519411468505859,
|
208 |
+
0.09505587005615235,
|
209 |
+
0.0950302734375,
|
210 |
+
0.09506304168701171,
|
211 |
+
0.09519923400878906,
|
212 |
+
0.09524018859863281,
|
213 |
+
0.09530879974365235,
|
214 |
+
0.09525247955322266,
|
215 |
+
0.09527398681640625,
|
216 |
+
0.09520845031738281,
|
217 |
+
0.0954593276977539,
|
218 |
+
0.09648435211181641,
|
219 |
+
0.09675161743164062,
|
220 |
+
0.0952985610961914,
|
221 |
+
0.09529036712646484,
|
222 |
+
0.0953733139038086,
|
223 |
+
0.09477017974853516,
|
224 |
+
0.0950487060546875,
|
225 |
+
0.09488384246826172,
|
226 |
+
0.09530879974365235,
|
227 |
+
0.09539276885986328,
|
228 |
+
0.09545830535888672,
|
229 |
+
0.09563136291503906,
|
230 |
+
0.09534259033203125,
|
231 |
+
0.09513267517089843,
|
232 |
+
0.09498828887939453,
|
233 |
+
0.0950282211303711,
|
234 |
+
0.09515929412841796,
|
235 |
+
0.09655500793457031,
|
236 |
+
0.09631743621826172,
|
237 |
+
0.09537843322753906,
|
238 |
+
0.09659801483154297,
|
239 |
+
0.09751961517333985,
|
240 |
+
0.0967741470336914,
|
241 |
+
0.09630003356933593,
|
242 |
+
0.09626214599609376,
|
243 |
+
0.09671475219726562,
|
244 |
+
0.09522278594970703,
|
245 |
+
0.09550643157958984,
|
246 |
+
0.09536000061035156,
|
247 |
+
0.09611161804199218,
|
248 |
+
0.09687245178222656,
|
249 |
+
0.09722169494628906,
|
250 |
+
0.09686214447021485,
|
251 |
+
0.09681510162353515,
|
252 |
+
0.09535692596435547,
|
253 |
+
0.09548697662353516,
|
254 |
+
0.09615360260009766,
|
255 |
+
0.0967383041381836,
|
256 |
+
0.09632870483398437,
|
257 |
+
0.09535078430175781,
|
258 |
+
0.09494528198242187,
|
259 |
+
0.09661644744873046,
|
260 |
+
0.09567641448974609,
|
261 |
+
0.09550540924072265,
|
262 |
+
0.0959139862060547,
|
263 |
+
0.09548287963867187,
|
264 |
+
0.09534668731689454,
|
265 |
+
0.09600511932373047,
|
266 |
+
0.09684812927246093,
|
267 |
+
0.09570073699951172,
|
268 |
+
0.09522380828857421,
|
269 |
+
0.09525350189208984,
|
270 |
+
0.09511833953857422,
|
271 |
+
0.094671875,
|
272 |
+
0.09503948974609375,
|
273 |
+
0.09514701080322266,
|
274 |
+
0.09497293090820312,
|
275 |
+
0.09541426849365234,
|
276 |
+
0.0949370880126953,
|
277 |
+
0.09484288024902343,
|
278 |
+
0.09507532501220703,
|
279 |
+
0.0951377944946289,
|
280 |
+
0.09536102294921875,
|
281 |
+
0.09610649871826171,
|
282 |
+
0.09647615814208985,
|
283 |
+
0.09658367919921874,
|
284 |
+
0.09658879852294922,
|
285 |
+
0.09637171173095703,
|
286 |
+
0.09533132934570313,
|
287 |
+
0.09509580993652343,
|
288 |
+
0.0951695327758789,
|
289 |
+
0.09635327911376954,
|
290 |
+
0.09675263977050781,
|
291 |
+
0.09631231689453125,
|
292 |
+
0.09564672088623047,
|
293 |
+
0.09514701080322266,
|
294 |
+
0.0949012451171875,
|
295 |
+
0.09466470336914062,
|
296 |
+
0.09508147430419922,
|
297 |
+
0.09521766662597657,
|
298 |
+
0.09537945556640624,
|
299 |
+
0.0952279052734375,
|
300 |
+
0.09517158508300781,
|
301 |
+
0.09494131469726562,
|
302 |
+
0.0953465576171875,
|
303 |
+
0.09559859466552735,
|
304 |
+
0.09697586822509766,
|
305 |
+
0.09607270050048829,
|
306 |
+
0.09569894409179687,
|
307 |
+
0.09513881683349609,
|
308 |
+
0.09539174652099609,
|
309 |
+
0.09540198516845703,
|
310 |
+
0.09547468566894532,
|
311 |
+
0.09482444763183594,
|
312 |
+
0.09529036712646484,
|
313 |
+
0.09626930999755859,
|
314 |
+
0.09698099517822266,
|
315 |
+
0.09525043487548829,
|
316 |
+
0.09508863830566407,
|
317 |
+
0.0955340805053711,
|
318 |
+
0.09503641510009765,
|
319 |
+
0.09526271820068359,
|
320 |
+
0.09584742736816407,
|
321 |
+
0.0952616958618164,
|
322 |
+
0.09529036712646484,
|
323 |
+
0.09478041839599609,
|
324 |
+
0.09497721862792968,
|
325 |
+
0.09511199951171875,
|
326 |
+
0.09554841613769531,
|
327 |
+
0.09540608215332032,
|
328 |
+
0.09528422546386718,
|
329 |
+
0.09555353546142578,
|
330 |
+
0.09587506866455078,
|
331 |
+
0.09458175659179688,
|
332 |
+
0.09509478759765624,
|
333 |
+
0.09496883392333984,
|
334 |
+
0.0950149154663086,
|
335 |
+
0.09508147430419922,
|
336 |
+
0.09513062286376953,
|
337 |
+
0.09481728363037109,
|
338 |
+
0.09505382537841797,
|
339 |
+
0.09503334045410156,
|
340 |
+
0.09520127868652344,
|
341 |
+
0.094993408203125,
|
342 |
+
0.09550438690185546,
|
343 |
+
0.09518284606933594,
|
344 |
+
0.09525965118408203,
|
345 |
+
0.09499443054199219,
|
346 |
+
0.09526271820068359,
|
347 |
+
0.09469132995605468,
|
348 |
+
0.09506201934814452,
|
349 |
+
0.09469337463378906,
|
350 |
+
0.09507430267333984,
|
351 |
+
0.09513881683349609,
|
352 |
+
0.09520435333251953,
|
353 |
+
0.09527603149414063,
|
354 |
+
0.09502413177490235,
|
355 |
+
0.09601741027832031,
|
356 |
+
0.09651302337646485,
|
357 |
+
0.09609113311767578,
|
358 |
+
0.09495654296875,
|
359 |
+
0.09531494140625,
|
360 |
+
0.0946698226928711,
|
361 |
+
0.09492070770263672,
|
362 |
+
0.09502617645263672,
|
363 |
+
0.0956211166381836,
|
364 |
+
0.09503846740722656,
|
365 |
+
0.09483980560302735,
|
366 |
+
0.09484083557128906,
|
367 |
+
0.09491661071777344,
|
368 |
+
0.09614540863037109,
|
369 |
+
0.09674649810791015,
|
370 |
+
0.09698918151855469,
|
371 |
+
0.09690828704833984,
|
372 |
+
0.09647411346435547,
|
373 |
+
0.09506201934814452,
|
374 |
+
0.09502207946777344,
|
375 |
+
0.09492479705810547,
|
376 |
+
0.095098876953125,
|
377 |
+
0.09491251373291015,
|
378 |
+
0.09460224151611328,
|
379 |
+
0.0951562271118164,
|
380 |
+
0.0948111343383789,
|
381 |
+
0.09505280303955078,
|
382 |
+
0.09521663665771485,
|
383 |
+
0.09516031646728515,
|
384 |
+
0.09499750518798829,
|
385 |
+
0.09513676452636718,
|
386 |
+
0.09482752227783203,
|
387 |
+
0.09493094635009766,
|
388 |
+
0.0950855712890625,
|
389 |
+
0.09506918334960937,
|
390 |
+
0.09522073364257813,
|
391 |
+
0.09560678100585937,
|
392 |
+
0.09685094451904297,
|
393 |
+
0.09678336334228516,
|
394 |
+
0.09528012847900391,
|
395 |
+
0.09516339111328125,
|
396 |
+
0.09540096282958985,
|
397 |
+
0.09549641418457032,
|
398 |
+
0.09585334777832032,
|
399 |
+
0.09605542755126953,
|
400 |
+
0.09530662536621094,
|
401 |
+
0.09515315246582032,
|
402 |
+
0.09510297393798828,
|
403 |
+
0.09488690948486328,
|
404 |
+
0.09500569915771484,
|
405 |
+
0.09521868896484376,
|
406 |
+
0.09564876556396484,
|
407 |
+
0.0954593276977539,
|
408 |
+
0.09548185729980468,
|
409 |
+
0.09527705383300782,
|
410 |
+
0.09608396911621093,
|
411 |
+
0.0964515838623047,
|
412 |
+
0.09501388549804687,
|
413 |
+
0.09472306823730468
|
414 |
+
]
|
415 |
+
},
|
416 |
+
"throughput": {
|
417 |
+
"unit": "tokens/s",
|
418 |
+
"value": 10.460131336048262
|
419 |
+
},
|
420 |
+
"energy": null,
|
421 |
+
"efficiency": null
|
422 |
+
}
|
423 |
+
}
|