common_init_from_params: setting dry_penalty_last_n to ctx_size = 768 common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable) system_info: n_threads = 6 (n_threads_batch = 6) / 12 | Metal : EMBED_LIBRARY = 1 | CPU : NEON = 1 | ARM_FMA = 1 | FP16_VA = 1 | DOTPROD = 1 | LLAMAFILE = 1 | ACCELERATE = 1 | AARCH64_REPACK = 1 | multiple_choice_score: there are 869 tasks in prompt multiple_choice_score: selecting 750 random tasks from 869 tasks available multiple_choice_score: preparing task data...done multiple_choice_score : calculating TruthfulQA score over 750 tasks. task acc_norm 1 100.00000000 2 50.00000000 3 66.66666667 4 50.00000000 5 60.00000000 6 66.66666667 7 71.42857143 8 75.00000000 9 66.66666667 10 60.00000000 11 54.54545455 12 50.00000000 13 46.15384615 14 50.00000000 15 53.33333333 16 50.00000000 17 52.94117647 18 50.00000000 19 47.36842105 20 45.00000000 21 47.61904762 22 50.00000000 23 47.82608696 24 50.00000000 25 48.00000000 26 46.15384615 27 48.14814815 28 46.42857143 29 44.82758621 30 46.66666667 31 45.16129032 32 43.75000000 33 42.42424242 34 41.17647059 35 40.00000000 36 38.88888889 37 37.83783784 38 39.47368421 39 38.46153846 40 40.00000000 41 41.46341463 42 40.47619048 43 41.86046512 44 40.90909091 45 40.00000000 46 41.30434783 47 42.55319149 48 41.66666667 49 40.81632653 50 42.00000000 51 41.17647059 52 42.30769231 53 43.39622642 54 44.44444444 55 43.63636364 56 44.64285714 57 45.61403509 58 46.55172414 59 45.76271186 60 46.66666667 61 47.54098361 62 46.77419355 63 47.61904762 64 46.87500000 65 46.15384615 66 46.96969697 67 46.26865672 68 45.58823529 69 46.37681159 70 45.71428571 71 45.07042254 72 44.44444444 73 45.20547945 74 45.94594595 75 46.66666667 76 47.36842105 77 48.05194805 78 48.71794872 79 48.10126582 80 47.50000000 81 46.91358025 82 47.56097561 83 46.98795181 84 47.61904762 85 48.23529412 86 47.67441860 87 48.27586207 88 48.86363636 89 49.43820225 90 50.00000000 91 50.54945055 92 50.00000000 93 49.46236559 94 48.93617021 95 49.47368421 96 48.95833333 97 48.45360825 98 47.95918367 99 48.48484848 100 48.00000000 101 47.52475248 102 48.03921569 103 47.57281553 104 47.11538462 105 47.61904762 106 47.16981132 107 47.66355140 108 47.22222222 109 46.78899083 110 46.36363636 111 46.84684685 112 47.32142857 113 47.78761062 114 47.36842105 115 47.82608696 116 47.41379310 117 47.00854701 118 47.45762712 119 47.05882353 120 47.50000000 121 47.10743802 122 47.54098361 123 47.96747967 124 47.58064516 125 48.00000000 126 48.41269841 127 48.81889764 128 48.43750000 129 48.06201550 130 47.69230769 131 47.32824427 132 46.96969697 133 47.36842105 134 47.01492537 135 46.66666667 136 46.32352941 137 46.71532847 138 46.37681159 139 46.76258993 140 46.42857143 141 46.80851064 142 47.18309859 143 47.55244755 144 47.22222222 145 47.58620690 146 47.26027397 147 47.61904762 148 47.29729730 149 47.65100671 150 48.00000000 151 47.68211921 152 48.02631579 153 48.36601307 154 48.05194805 155 47.74193548 156 48.07692308 157 48.40764331 158 48.73417722 159 49.05660377 160 48.75000000 161 48.44720497 162 48.76543210 163 48.46625767 164 48.17073171 165 47.87878788 166 48.19277108 167 47.90419162 168 48.21428571 169 47.92899408 170 48.23529412 171 48.53801170 172 48.83720930 173 49.13294798 174 49.42528736 175 49.14285714 176 48.86363636 177 49.15254237 178 48.87640449 179 49.16201117 180 48.88888889 181 48.61878453 182 48.90109890 183 48.63387978 184 48.36956522 185 48.64864865 186 48.92473118 187 49.19786096 188 48.93617021 189 48.67724868 190 48.42105263 191 48.69109948 192 48.43750000 193 48.18652850 194 48.45360825 195 48.71794872 196 48.46938776 197 48.73096447 198 48.98989899 199 49.24623116 200 49.50000000 201 49.75124378 202 50.00000000 203 49.75369458 204 50.00000000 205 49.75609756 206 49.51456311 207 49.75845411 208 50.00000000 209 49.76076555 210 50.00000000 211 50.23696682 212 50.00000000 213 49.76525822 214 50.00000000 215 50.23255814 216 50.00000000 217 49.76958525 218 50.00000000 219 50.22831050 220 50.00000000 221 49.77375566 222 49.54954955 223 49.32735426 224 49.55357143 225 49.33333333 226 49.11504425 227 49.33920705 228 49.56140351 229 49.78165939 230 50.00000000 231 49.78354978 232 49.56896552 233 49.78540773 234 49.57264957 235 49.36170213 236 49.15254237 237 48.94514768 238 49.15966387 239 49.37238494 240 49.58333333 241 49.37759336 242 49.17355372 243 48.97119342 244 48.77049180 245 48.57142857 246 48.37398374 247 48.58299595 248 48.38709677 249 48.59437751 250 48.80000000 251 49.00398406 252 49.20634921 253 49.40711462 254 49.21259843 255 49.41176471 256 49.60937500 257 49.80544747 258 50.00000000 259 49.80694981 260 49.61538462 261 49.80842912 262 49.61832061 263 49.80988593 264 50.00000000 265 50.18867925 266 50.00000000 267 50.18726592 268 50.00000000 269 50.18587361 270 50.37037037 271 50.18450185 272 50.00000000 273 49.81684982 274 49.63503650 275 49.45454545 276 49.63768116 277 49.81949458 278 49.64028777 279 49.82078853 280 50.00000000 281 50.17793594 282 50.35460993 283 50.17667845 284 50.35211268 285 50.17543860 286 50.34965035 287 50.17421603 288 50.34722222 289 50.51903114 290 50.34482759 291 50.51546392 292 50.34246575 293 50.51194539 294 50.34013605 295 50.50847458 296 50.33783784 297 50.50505051 298 50.33557047 299 50.50167224 300 50.33333333 301 50.16611296 302 50.33112583 303 50.49504950 304 50.32894737 305 50.49180328 306 50.32679739 307 50.48859935 308 50.32467532 309 50.16181230 310 50.32258065 311 50.48231511 312 50.32051282 313 50.47923323 314 50.31847134 315 50.15873016 316 50.31645570 317 50.47318612 318 50.31446541 319 50.15673981 320 50.00000000 321 49.84423676 322 50.00000000 323 49.84520124 324 49.69135802 325 49.53846154 326 49.69325153 327 49.54128440 328 49.39024390 329 49.54407295 330 49.69696970 331 49.84894260 332 49.69879518 333 49.84984985 334 49.70059880 335 49.55223881 336 49.70238095 337 49.55489614 338 49.40828402 339 49.55752212 340 49.70588235 341 49.56011730 342 49.41520468 343 49.56268222 344 49.70930233 345 49.56521739 346 49.71098266 347 49.56772334 348 49.42528736 349 49.57020057 350 49.42857143 351 49.57264957 352 49.43181818 353 49.29178470 354 49.15254237 355 49.01408451 356 48.87640449 357 48.73949580 358 48.88268156 359 48.74651811 360 48.88888889 361 49.03047091 362 48.89502762 363 49.03581267 364 48.90109890 365 48.76712329 366 48.90710383 367 49.04632153 368 48.91304348 369 49.05149051 370 48.91891892 371 48.78706199 372 48.65591398 373 48.52546917 374 48.66310160 375 48.53333333 376 48.67021277 377 48.80636605 378 48.94179894 379 48.81266491 380 48.94736842 381 48.81889764 382 48.69109948 383 48.56396867 384 48.43750000 385 48.31168831 386 48.44559585 387 48.32041344 388 48.45360825 389 48.32904884 390 48.46153846 391 48.33759591 392 48.46938776 393 48.60050891 394 48.73096447 395 48.86075949 396 48.73737374 397 48.86649874 398 48.74371859 399 48.62155388 400 48.75000000 401 48.62842893 402 48.75621891 403 48.88337469 404 49.00990099 405 49.13580247 406 49.26108374 407 49.38574939 408 49.50980392 409 49.38875306 410 49.51219512 411 49.63503650 412 49.75728155 413 49.63680387 414 49.75845411 415 49.63855422 416 49.51923077 417 49.40047962 418 49.28229665 419 49.16467780 420 49.28571429 421 49.16864608 422 49.28909953 423 49.40898345 424 49.29245283 425 49.41176471 426 49.29577465 427 49.41451991 428 49.29906542 429 49.41724942 430 49.30232558 431 49.41995360 432 49.30555556 433 49.19168591 434 49.07834101 435 49.19540230 436 49.08256881 437 49.19908467 438 49.08675799 439 49.20273349 440 49.09090909 441 49.20634921 442 49.09502262 443 48.98419865 444 48.87387387 445 48.98876404 446 49.10313901 447 49.21700224 448 49.33035714 449 49.44320713 450 49.33333333 451 49.22394678 452 49.11504425 453 49.00662252 454 48.89867841 455 49.01098901 456 48.90350877 457 48.79649891 458 48.68995633 459 48.80174292 460 48.69565217 461 48.80694143 462 48.91774892 463 48.81209503 464 48.70689655 465 48.60215054 466 48.71244635 467 48.82226981 468 48.71794872 469 48.61407249 470 48.72340426 471 48.83227176 472 48.72881356 473 48.62579281 474 48.52320675 475 48.42105263 476 48.31932773 477 48.42767296 478 48.53556485 479 48.64300626 480 48.75000000 481 48.64864865 482 48.75518672 483 48.86128364 484 48.96694215 485 49.07216495 486 49.17695473 487 49.07597536 488 48.97540984 489 48.87525562 490 48.77551020 491 48.87983707 492 48.78048780 493 48.68154158 494 48.78542510 495 48.68686869 496 48.58870968 497 48.69215292 498 48.79518072 499 48.89779559 500 49.00000000 501 49.10179641 502 49.20318725 503 49.30417495 504 49.40476190 505 49.50495050 506 49.40711462 507 49.30966469 508 49.40944882 509 49.31237721 510 49.21568627 511 49.11937378 512 49.21875000 513 49.12280702 514 49.22178988 515 49.32038835 516 49.22480620 517 49.12959381 518 49.22779923 519 49.13294798 520 49.03846154 521 49.13627639 522 49.04214559 523 48.94837476 524 48.85496183 525 48.76190476 526 48.66920152 527 48.57685009 528 48.48484848 529 48.39319471 530 48.49056604 531 48.58757062 532 48.68421053 533 48.78048780 534 48.68913858 535 48.59813084 536 48.50746269 537 48.60335196 538 48.51301115 539 48.60853432 540 48.51851852 541 48.42883549 542 48.33948339 543 48.43462247 544 48.34558824 545 48.44036697 546 48.35164835 547 48.26325411 548 48.35766423 549 48.45173042 550 48.36363636 551 48.27586207 552 48.18840580 553 48.28209765 554 48.19494585 555 48.28828829 556 48.20143885 557 48.11490126 558 48.02867384 559 48.12164580 560 48.21428571 561 48.30659537 562 48.39857651 563 48.49023091 564 48.40425532 565 48.31858407 566 48.40989399 567 48.50088183 568 48.59154930 569 48.50615114 570 48.42105263 571 48.33625219 572 48.25174825 573 48.16753927 574 48.25783972 575 48.34782609 576 48.43750000 577 48.52686308 578 48.44290657 579 48.53195164 580 48.62068966 581 48.53700516 582 48.45360825 583 48.37049743 584 48.45890411 585 48.54700855 586 48.46416382 587 48.38160136 588 48.29931973 589 48.38709677 590 48.47457627 591 48.56175973 592 48.64864865 593 48.56661046 594 48.65319865 595 48.73949580 596 48.65771812 597 48.57621441 598 48.49498328 599 48.41402337 600 48.33333333 601 48.41930116 602 48.33887043 603 48.25870647 604 48.17880795 605 48.09917355 606 48.18481848 607 48.10543657 608 48.19078947 609 48.11165846 610 48.19672131 611 48.11783961 612 48.03921569 613 47.96084829 614 47.88273616 615 47.80487805 616 47.72727273 617 47.81199352 618 47.73462783 619 47.65751212 620 47.74193548 621 47.82608696 622 47.90996785 623 47.99357945 624 47.91666667 625 47.84000000 626 47.76357827 627 47.84688995 628 47.92993631 629 48.01271860 630 48.09523810 631 48.17749604 632 48.10126582 633 48.02527646 634 47.94952681 635 48.03149606 636 48.11320755 637 48.03767661 638 48.11912226 639 48.20031299 640 48.12500000 641 48.20592824 642 48.13084112 643 48.05598756 644 48.13664596 645 48.06201550 646 47.98761610 647 47.91344668 648 47.83950617 649 47.76579353 650 47.69230769 651 47.77265745 652 47.69938650 653 47.62633997 654 47.70642202 655 47.63358779 656 47.56097561 657 47.48858447 658 47.56838906 659 47.49620637 660 47.42424242 661 47.35249622 662 47.43202417 663 47.51131222 664 47.59036145 665 47.66917293 666 47.74774775 667 47.82608696 668 47.75449102 669 47.68310912 670 47.76119403 671 47.69001490 672 47.61904762 673 47.54829123 674 47.47774481 675 47.40740741 676 47.48520710 677 47.56277696 678 47.64011799 679 47.56995582 680 47.64705882 681 47.57709251 682 47.65395894 683 47.58418741 684 47.66081871 685 47.73722628 686 47.66763848 687 47.74381368 688 47.81976744 689 47.75036284 690 47.82608696 691 47.75687410 692 47.83236994 693 47.90764791 694 47.98270893 695 48.05755396 696 47.98850575 697 47.91965567 698 47.85100287 699 47.78254649 700 47.85714286 701 47.93152639 702 47.86324786 703 47.79516358 704 47.86931818 705 47.94326241 706 48.01699717 707 48.09052334 708 48.16384181 709 48.23695346 710 48.16901408 711 48.24191280 712 48.31460674 713 48.38709677 714 48.31932773 715 48.25174825 716 48.18435754 717 48.11715481 718 48.05013928 719 47.98331015 720 48.05555556 721 47.98890430 722 47.92243767 723 47.99446750 724 48.06629834 725 48.13793103 726 48.07162534 727 48.00550206 728 47.93956044 729 48.01097394 730 48.08219178 731 48.15321477 732 48.22404372 733 48.29467940 734 48.36512262 735 48.29931973 736 48.36956522 737 48.30393487 738 48.23848238 739 48.30852503 740 48.24324324 741 48.31309042 742 48.38274933 743 48.45222073 744 48.38709677 745 48.45637584 746 48.39142091 747 48.32663989 748 48.26203209 749 48.19759680 750 48.26666667 Final result: 48.2667 ±1.8259 Random chance: 25.0083 ±1.5824