{ "current_gpu_type": "NVIDIA L40S", "current_gpu_total_memory": 45372.6875, "memory_inference_first": 25590.0, "memory_inference": 25590.0, "token_generation_latency_sync": 30.846586608886717, "token_generation_latency_async": 30.805427208542824, "token_generation_throughput_sync": 0.03241849779618423, "token_generation_throughput_async": 0.03246181243422862, "token_generation_CO2_emissions": 1.7413158175960427e-06, "token_generation_energy_consumption": 0.0012653706804628014, "inference_latency_sync": 25.427967834472657, "inference_latency_async": 24.5699405670166, "inference_throughput_sync": 0.039326776190282166, "inference_throughput_async": 0.04070013915061842, "inference_CO2_emissions": 2.974451769021189e-06, "inference_energy_consumption": 1.0356497508141638e-05 }