Final results: OrderedDict([('D/P3DREAM/EVAL', {'accuracy': 0.581, 'score': 0.581}), ('average_score', 0.581), ('confidence', 0.0)]) Final results: OrderedDict([('D/P3COMMONSENSEQA/EVAL', {'accuracy': 0.741, 'score': 0.741}), ('average_score', 0.741), ('confidence', 0.0)])