diff --git "a/gpt2/metrics.eval.jsonl" "b/gpt2/metrics.eval.jsonl" --- "a/gpt2/metrics.eval.jsonl" +++ "b/gpt2/metrics.eval.jsonl" @@ -23,3 +23,7 @@ {"created_at": "2025-06-28T15:00:22.761558", "global_step": 24000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.257679180887372, "acc_stderr,none": 0.012780770562768407, "acc_norm,none": 0.28071672354948807, "acc_norm_stderr,none": 0.01313123812697558}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6191077441077442, "acc_stderr,none": 0.009964428212260379, "acc_norm,none": 0.5492424242424242, "acc_norm_stderr,none": 0.010209906101011117}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132656, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132656}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596812, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596812}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218193, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218193}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237289, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237289}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687957, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687957}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.014288876375699817, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.014288876375699817}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132628, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132628}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683047, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683047}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579386, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579386}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2688888888888889, "acc_stderr,none": 0.014787619747567614, "acc_norm,none": 0.2688888888888889, "acc_norm_stderr,none": 0.014787619747567614}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380051, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380051}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596782, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596782}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651672, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651672}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579395, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579395}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3543118900617407, "acc_stderr,none": 0.00477326751011274, "acc_norm,none": 0.43238398725353516, "acc_norm_stderr,none": 0.004943945069611452}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.017666748582967847, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04429811949614585}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.14634146341463414, "acc_stderr,none": 0.03927202370241041}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.3047619047619048, "acc_stderr,none": 0.0451367671816831}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.04117581097845101}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.036956421364396135}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.05237229365663814}, "include_base_44_bengali": {"acc,none": 0.22445255474452555, "acc_stderr,none": 0.017866666707363147, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.19879518072289157, "acc_stderr,none": 0.03106939026078944}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.018334421339336573, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french": {"acc,none": 0.22911694510739858, "acc_stderr,none": 0.02063159904498545, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.21804511278195488, "acc_stderr,none": 0.025365363516347505}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.23404255319148937, "acc_stderr,none": 0.062426763436828826}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.24324324324324326, "acc_stderr,none": 0.050215421942054}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.2014388489208633, "acc_stderr,none": 0.03372687129786146, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.043478260869565216, "acc_stderr,none": 0.04347826086956523}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.25274725274725274, "acc_stderr,none": 0.045809518537328904}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_hindi": {"acc,none": 0.24862888482632542, "acc_stderr,none": 0.018561919417719605, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.2916666666666667, "acc_stderr,none": 0.04663376032810542}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390467}, "include_base_44_italian": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018489893682929224, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700356}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.034903504674283575}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245767}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_japanese": {"acc,none": 0.24750499001996007, "acc_stderr,none": 0.01926165193058403, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.20202020202020202, "acc_stderr,none": 0.040558353023247305}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2935323383084577, "acc_stderr,none": 0.03220024104534205}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.22388059701492538, "acc_stderr,none": 0.02947525023601718}, "include_base_44_korean": {"acc,none": 0.244, "acc_stderr,none": 0.019246029163290247, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.244, "acc_stderr,none": 0.027217995464553175}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.244, "acc_stderr,none": 0.027217995464553182}, "include_base_44_russian": {"acc,none": 0.27717391304347827, "acc_stderr,none": 0.018976202798419422, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472663}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191319}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.32142857142857145, "acc_stderr,none": 0.08987898137227081}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.36470588235294116, "acc_stderr,none": 0.05251932770420085}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750293}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750293}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.18085106382978725, "acc_stderr,none": 0.03991172470919536}, "include_base_44_spanish": {"acc,none": 0.24727272727272728, "acc_stderr,none": 0.01843204437224097, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.22, "acc_stderr,none": 0.02625179282460584}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.28, "acc_stderr,none": 0.09165151389911681}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.268, "acc_stderr,none": 0.028068762382526688}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_telugu": {"acc,none": 0.2518248175182482, "acc_stderr,none": 0.018513839694758838, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24607329842931938, "acc_stderr,none": 0.031247840365749114}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_turkish": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.018868333493041897, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233134}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530255}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560373}, "piqa": {"alias": "piqa", "acc,none": 0.6670293797606094, "acc_stderr,none": 0.010995648822619069, "acc_norm,none": 0.6741022850924918, "acc_norm_stderr,none": 0.010935760218903946}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.43694779116465865, "acc_stderr,none": 0.009942066394610852}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4931726907630522, "acc_stderr,none": 0.010021138522919162}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.4461847389558233, "acc_stderr,none": 0.009963854274139157}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.4497991967871486, "acc_stderr,none": 0.009971431255560173}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.39879518072289155, "acc_stderr,none": 0.009814625416137557}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.41325301204819276, "acc_stderr,none": 0.00987008743562378}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3827309236947791, "acc_stderr,none": 0.009742526340884055}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.35943775100401604, "acc_stderr,none": 0.009617895762902742}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.38393574297188754, "acc_stderr,none": 0.009748321202534384}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3465863453815261, "acc_stderr,none": 0.009538660220458996}} {"created_at": "2025-06-28T15:15:19.325194", "global_step": 25000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2627986348122867, "acc_stderr,none": 0.012862523175351333, "acc_norm,none": 0.2841296928327645, "acc_norm_stderr,none": 0.013179442447653886}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6186868686868687, "acc_stderr,none": 0.009966542497171018, "acc_norm,none": 0.5437710437710438, "acc_norm_stderr,none": 0.010220394383722017}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.01415271607913263, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.01415271607913263}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884518, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884518}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198737, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198737}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2511111111111111, "acc_stderr,none": 0.014463114105170805, "acc_norm,none": 0.2511111111111111, "acc_norm_stderr,none": 0.014463114105170805}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792662, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792662}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.35879306910973907, "acc_stderr,none": 0.004786660691181918, "acc_norm,none": 0.4373630750846445, "acc_norm_stderr,none": 0.004950472918523326}, "include_base_44_arabic": {"acc,none": 0.2210144927536232, "acc_stderr,none": 0.017610754015091025, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.041764667586049006}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.15853658536585366, "acc_stderr,none": 0.04058259927336571}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.03775026958386237}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.14, "acc_stderr,none": 0.04956957592256417}, "include_base_44_bengali": {"acc,none": 0.2208029197080292, "acc_stderr,none": 0.017753933256272295, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.18674698795180722, "acc_stderr,none": 0.030338749144500597}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345138}, "include_base_44_chinese": {"acc,none": 0.26605504587155965, "acc_stderr,none": 0.019016204086770672, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383254}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2315035799522673, "acc_stderr,none": 0.020654945546325493, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.2127659574468085, "acc_stderr,none": 0.06034260964773522}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.24324324324324326, "acc_stderr,none": 0.05021542194205401}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.2589928057553957, "acc_stderr,none": 0.03708121429363875, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.13043478260869565, "acc_stderr,none": 0.07180198468215396}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.3076923076923077, "acc_stderr,none": 0.048650425541052}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_hindi": {"acc,none": 0.2340036563071298, "acc_stderr,none": 0.018160628093446166, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.2916666666666667, "acc_stderr,none": 0.046633760328105435}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921427}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018643232039787568, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.03443623453899477}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245766}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.033675118801687026}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_japanese": {"acc,none": 0.23952095808383234, "acc_stderr,none": 0.01910440566800504, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.04199605255658083}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.263681592039801, "acc_stderr,none": 0.03115715086935557}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.22388059701492538, "acc_stderr,none": 0.029475250236017183}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.018971549191219426, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.236, "acc_stderr,none": 0.026909337594953835}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.232, "acc_stderr,none": 0.026750070374865157}, "include_base_44_russian": {"acc,none": 0.2644927536231884, "acc_stderr,none": 0.018753539530399592, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472663}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322382}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.35294117647058826, "acc_stderr,none": 0.0521414859075246}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.34782608695652173, "acc_stderr,none": 0.05775749253522359}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.2127659574468085, "acc_stderr,none": 0.04243864702455469}, "include_base_44_spanish": {"acc,none": 0.24, "acc_stderr,none": 0.018242821404610962, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.216, "acc_stderr,none": 0.026078657663732727}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.268, "acc_stderr,none": 0.02806876238252669}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_telugu": {"acc,none": 0.23722627737226276, "acc_stderr,none": 0.018146708004545358, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.0357160923005348}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.20418848167539266, "acc_stderr,none": 0.029244481767055425}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_turkish": {"acc,none": 0.2773722627737226, "acc_stderr,none": 0.019140201483360698, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683226}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.6719260065288357, "acc_stderr,none": 0.010954487135124228, "acc_norm,none": 0.6751904243743199, "acc_norm_stderr,none": 0.010926296238294025}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.43172690763052207, "acc_stderr,none": 0.009928203186112913}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4939759036144578, "acc_stderr,none": 0.010021345444047586}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.4502008032128514, "acc_stderr,none": 0.009972240296768891}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.43614457831325304, "acc_stderr,none": 0.009940006562498589}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.40843373493975904, "acc_stderr,none": 0.009852581919032235}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.41445783132530123, "acc_stderr,none": 0.009874311310483542}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.39397590361445783, "acc_stderr,none": 0.009794163014906765}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3859437751004016, "acc_stderr,none": 0.009757838842063334}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.37349397590361444, "acc_stderr,none": 0.00969598596221976}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.00946022348499647}} {"created_at": "2025-06-28T16:07:56.027869", "global_step": 26000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.26621160409556316, "acc_stderr,none": 0.012915774781523223, "acc_norm,none": 0.28924914675767915, "acc_norm_stderr,none": 0.013250012579393441}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6233164983164983, "acc_stderr,none": 0.009942848077476169, "acc_norm,none": 0.5475589225589226, "acc_norm_stderr,none": 0.010213265860171402}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.21888888888888888, "acc_stderr,none": 0.013790766978256947, "acc_norm,none": 0.21888888888888888, "acc_norm_stderr,none": 0.013790766978256947}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458148, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458148}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.21222222222222223, "acc_stderr,none": 0.013636956209422683, "acc_norm,none": 0.21222222222222223, "acc_norm_stderr,none": 0.013636956209422683}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.013740087830700177, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.013740087830700177}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458142, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458142}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198723, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198723}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945584, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945584}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.01398772152368795, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.01398772152368795}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859544, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859544}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642536, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642536}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945582, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945582}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.014377023375409383, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.014377023375409383}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859561, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859561}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3586934873531169, "acc_stderr,none": 0.004786368011500459, "acc_norm,none": 0.4367655845449114, "acc_norm_stderr,none": 0.0049497163688904874}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.01760249818643979, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.04176466758604901}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.15853658536585366, "acc_stderr,none": 0.04058259927336571}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.34285714285714286, "acc_stderr,none": 0.04654465622977447}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.03775026958386238}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.2, "acc_stderr,none": 0.03922322702763679}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "include_base_44_bengali": {"acc,none": 0.2208029197080292, "acc_stderr,none": 0.017738447007485548, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.18674698795180722, "acc_stderr,none": 0.030338749144500625}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.0548839220351387}, "include_base_44_chinese": {"acc,none": 0.26972477064220185, "acc_stderr,none": 0.019044505647100314, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.28735632183908044, "acc_stderr,none": 0.04879747731496575}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.352112676056338, "acc_stderr,none": 0.05708756925195619}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french": {"acc,none": 0.21957040572792363, "acc_stderr,none": 0.02027337914915624, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.21052631578947367, "acc_stderr,none": 0.02504373428583617}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.05801446334976933}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.24324324324324326, "acc_stderr,none": 0.050215421942054}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.23741007194244604, "acc_stderr,none": 0.03592493137715881, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.08695652173913043, "acc_stderr,none": 0.06007385040937022}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04761904761904758}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_hindi": {"acc,none": 0.2522851919561243, "acc_stderr,none": 0.018646409607347827, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.3125, "acc_stderr,none": 0.047555369390792634}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018660473115338774, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.034436234538994775}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23225806451612904, "acc_stderr,none": 0.03402770605128516}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.03419073042180668}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_japanese": {"acc,none": 0.22954091816367264, "acc_stderr,none": 0.01882818285277245, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.20202020202020202, "acc_stderr,none": 0.04055835302324732}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.24875621890547264, "acc_stderr,none": 0.030567675938916707}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.22388059701492538, "acc_stderr,none": 0.029475250236017193}, "include_base_44_korean": {"acc,none": 0.248, "acc_stderr,none": 0.019338456598561787, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.232, "acc_stderr,none": 0.02675007037486517}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.264, "acc_stderr,none": 0.027934518957690908}, "include_base_44_russian": {"acc,none": 0.2753623188405797, "acc_stderr,none": 0.01888098113889843, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.04742006474057421}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2318840579710145, "acc_stderr,none": 0.05117930441535768}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.32142857142857145, "acc_stderr,none": 0.08987898137227082}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.36470588235294116, "acc_stderr,none": 0.05251932770420085}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.391304347826087, "acc_stderr,none": 0.05918381823737157}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.30434782608695654, "acc_stderr,none": 0.0557990389495433}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.24, "acc_stderr,none": 0.018228212536278245, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.216, "acc_stderr,none": 0.026078657663732727}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.268, "acc_stderr,none": 0.02806876238252669}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.07483314773547879}, "include_base_44_telugu": {"acc,none": 0.23905109489051096, "acc_stderr,none": 0.018216065260751712, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.035509201856896294}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2198952879581152, "acc_stderr,none": 0.030047449343709735}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.03175554786629921}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_turkish": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.01923742250082005, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680588}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.0362933532994786}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560375}, "piqa": {"alias": "piqa", "acc,none": 0.6811751904243744, "acc_stderr,none": 0.01087303753433342, "acc_norm,none": 0.6811751904243744, "acc_norm_stderr,none": 0.010873037534333422}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.43654618473895584, "acc_stderr,none": 0.009941039791133126}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4927710843373494, "acc_stderr,none": 0.010021025361119621}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.45461847389558235, "acc_stderr,none": 0.009980706922977813}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.44779116465863456, "acc_stderr,none": 0.009967287545636116}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.41285140562248995, "acc_stderr,none": 0.0098686659430844}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.41445783132530123, "acc_stderr,none": 0.009874311310483542}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3963855421686747, "acc_stderr,none": 0.009804518520476644}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.37630522088353413, "acc_stderr,none": 0.00971054774421605}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.38112449799196785, "acc_stderr,none": 0.009734701300519844}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463626}} +{"created_at": "2025-06-30T15:03:20.876967", "global_step": 27000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2645051194539249, "acc_stderr,none": 0.012889272949313368, "acc_norm,none": 0.2883959044368601, "acc_norm_stderr,none": 0.013238394422428175}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6313131313131313, "acc_stderr,none": 0.00989964085568105, "acc_norm,none": 0.5542929292929293, "acc_norm_stderr,none": 0.010199118183322984}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.21444444444444444, "acc_stderr,none": 0.01368881978836738, "acc_norm,none": 0.21444444444444444, "acc_norm_stderr,none": 0.01368881978836738}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.013714527832369672, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.013714527832369672}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198709, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198709}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132625, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132625}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458153, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458153}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.01371452783236968, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.01371452783236968}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.014398737377336096, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.014398737377336096}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.014398737377336093, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.014398737377336093}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687956, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687956}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596795, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596795}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356805, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356805}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945585, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945585}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579385, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579385}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651677, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651677}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859556, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859556}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3593905596494722, "acc_stderr,none": 0.004788412062375701, "acc_norm,none": 0.4388568014339773, "acc_norm_stderr,none": 0.004952332378120329}, "include_base_44_arabic": {"acc,none": 0.2463768115942029, "acc_stderr,none": 0.01823760697001736, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.2073170731707317, "acc_stderr,none": 0.04504273750296659}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.3523809523809524, "acc_stderr,none": 0.046843501394377526}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.03990657150993187}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.03850512095536381}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "include_base_44_bengali": {"acc,none": 0.21897810218978103, "acc_stderr,none": 0.01768649212545982, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.18072289156626506, "acc_stderr,none": 0.029955737855810134}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "include_base_44_chinese": {"acc,none": 0.24954128440366974, "acc_stderr,none": 0.018623708091429225, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150304}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854672}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french": {"acc,none": 0.22195704057279236, "acc_stderr,none": 0.02035790416674946, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.21052631578947367, "acc_stderr,none": 0.025043734285836165}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.2127659574468085, "acc_stderr,none": 0.060342609647735225}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.24324324324324326, "acc_stderr,none": 0.05021542194205401}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.2302158273381295, "acc_stderr,none": 0.03545060543527253, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.08695652173913043, "acc_stderr,none": 0.06007385040937022}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04761904761904758}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_hindi": {"acc,none": 0.23583180987202926, "acc_stderr,none": 0.018213102658442278, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.2708333333333333, "acc_stderr,none": 0.045593471241867974}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.043247857666407805}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_italian": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018743296588106518, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.034673771737174536}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2129032258064516, "acc_stderr,none": 0.03298715238372958}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.281437125748503, "acc_stderr,none": 0.03490350467428356}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_japanese": {"acc,none": 0.2275449101796407, "acc_stderr,none": 0.01875718696880427, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.1919191919191919, "acc_stderr,none": 0.039780804479336844}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296014}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.21890547263681592, "acc_stderr,none": 0.029239174636647}, "include_base_44_korean": {"acc,none": 0.246, "acc_stderr,none": 0.019288967235912648, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.232, "acc_stderr,none": 0.02675007037486517}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.26, "acc_stderr,none": 0.027797315752644297}, "include_base_44_russian": {"acc,none": 0.2717391304347826, "acc_stderr,none": 0.018800661435050096, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.04742006474057422}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.052254366311072324}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.25, "acc_stderr,none": 0.08333333333333333}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.35294117647058826, "acc_stderr,none": 0.052141485907524605}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750293}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.1702127659574468, "acc_stderr,none": 0.0389706782884788}, "include_base_44_spanish": {"acc,none": 0.24909090909090909, "acc_stderr,none": 0.018449018954033822, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.216, "acc_stderr,none": 0.02607865766373272}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.284, "acc_stderr,none": 0.0285769587304374}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_telugu": {"acc,none": 0.24635036496350365, "acc_stderr,none": 0.018431180852252805, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.035294868015111135}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2356020942408377, "acc_stderr,none": 0.030787364755364144}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_turkish": {"acc,none": 0.291970802919708, "acc_stderr,none": 0.0194668125008164, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683227}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.6751904243743199, "acc_stderr,none": 0.01092629623829403, "acc_norm,none": 0.6822633297062024, "acc_norm_stderr,none": 0.010863133246569281}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.44417670682730925, "acc_stderr,none": 0.009959414626897999}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4983935742971888, "acc_stderr,none": 0.01002202114110211}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.4433734939759036, "acc_stderr,none": 0.009957592660538655}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.43413654618473896, "acc_stderr,none": 0.009934740969162524}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.4108433734939759, "acc_stderr,none": 0.009861456841490843}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.40843373493975904, "acc_stderr,none": 0.009852581919032238}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3923694779116466, "acc_stderr,none": 0.009787120838990103}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3863453815261044, "acc_stderr,none": 0.009759721337538359}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3682730923694779, "acc_stderr,none": 0.009668013178998446}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3405622489959839, "acc_stderr,none": 0.009498886690274447}} +{"created_at": "2025-06-30T16:26:38.088427", "global_step": 28000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.26621160409556316, "acc_stderr,none": 0.012915774781523217, "acc_norm,none": 0.28668941979522183, "acc_norm_stderr,none": 0.013214986329274774}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.625, "acc_stderr,none": 0.009933992677987828, "acc_norm,none": 0.5488215488215489, "acc_norm_stderr,none": 0.01021075710107347}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.20777777777777778, "acc_stderr,none": 0.013531414972025825, "acc_norm,none": 0.20777777777777778, "acc_norm_stderr,none": 0.013531414972025825}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030465, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030465}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.013714527832369674, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.013714527832369674}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.01371452783236968, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.01371452783236968}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.013740087830700165, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.013740087830700165}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.01371452783236968, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.01371452783236968}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.01417574247439196, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.01417574247439196}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308198, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308198}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683059, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683059}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.01417574247439195, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.01417574247439195}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198712, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198712}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.014398737377336079, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.014398737377336079}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198707, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198707}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.36058554072893845, "acc_stderr,none": 0.0047918906258342005, "acc_norm,none": 0.43855805616411075, "acc_norm_stderr,none": 0.0049519641319213}, "include_base_44_arabic": {"acc,none": 0.2427536231884058, "acc_stderr,none": 0.018142370994797586, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.045521571818039494}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.1951219512195122, "acc_stderr,none": 0.04403272848041175}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.04622501635210239}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.03775026958386238}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.2, "acc_stderr,none": 0.03922322702763679}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.21897810218978103, "acc_stderr,none": 0.01768768032532851, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.18674698795180722, "acc_stderr,none": 0.0303387491445006}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.0548839220351387}, "include_base_44_chinese": {"acc,none": 0.23853211009174313, "acc_stderr,none": 0.018317891695899167, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115034}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.046870495038546706}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.047554769059532744}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french": {"acc,none": 0.21241050119331742, "acc_stderr,none": 0.020011260233179177, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.02520611987225017}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.1702127659574468, "acc_stderr,none": 0.055411578656325386}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.21621621621621623, "acc_stderr,none": 0.04818155789706758}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.20863309352517986, "acc_stderr,none": 0.03421974789725493, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.08695652173913043, "acc_stderr,none": 0.06007385040937022}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.046449428524973954}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.12, "acc_stderr,none": 0.06633249580710801}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01834471470067455, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.2604166666666667, "acc_stderr,none": 0.045026287805761366}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921428}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_italian": {"acc,none": 0.2518248175182482, "acc_stderr,none": 0.01854977497074293, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.034903504674283575}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2, "acc_stderr,none": 0.03223291856101517}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.034436234538994775}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_japanese": {"acc,none": 0.22554890219560877, "acc_stderr,none": 0.01869959516444395, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.20202020202020202, "acc_stderr,none": 0.040558353023247305}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.03076944496729602}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.208955223880597, "acc_stderr,none": 0.028748298931728658}, "include_base_44_korean": {"acc,none": 0.246, "acc_stderr,none": 0.019263966403791993, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.22, "acc_stderr,none": 0.02625179282460584}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.272, "acc_stderr,none": 0.02820008829631}, "include_base_44_russian": {"acc,none": 0.2554347826086957, "acc_stderr,none": 0.018505642525106596, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.04742006474057422}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2318840579710145, "acc_stderr,none": 0.05117930441535768}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.08694008849288351}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.32941176470588235, "acc_stderr,none": 0.05128116404165497}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.30434782608695654, "acc_stderr,none": 0.0557990389495433}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.3188405797101449, "acc_stderr,none": 0.05651408783764654}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.1595744680851064, "acc_stderr,none": 0.03797430614378277}, "include_base_44_spanish": {"acc,none": 0.2290909090909091, "acc_stderr,none": 0.017907149428817545, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.196, "acc_stderr,none": 0.02515685731325594}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.26, "acc_stderr,none": 0.027797315752644304}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.07483314773547879}, "include_base_44_telugu": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.01837409781050414, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.23036649214659685, "acc_stderr,none": 0.030547441226520554}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.03175554786629921}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.09165151389911683}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.019311858755482907, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.0362933532994786}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589322}, "piqa": {"alias": "piqa", "acc,none": 0.675734494015234, "acc_stderr,none": 0.010921539041347982, "acc_norm,none": 0.6833514689880305, "acc_norm_stderr,none": 0.01085316053197848}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.4461847389558233, "acc_stderr,none": 0.009963854274139157}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4939759036144578, "acc_stderr,none": 0.010021345444047586}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.4493975903614458, "acc_stderr,none": 0.009970615649588134}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.4397590361445783, "acc_stderr,none": 0.009949067285169347}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.41405622489959837, "acc_stderr,none": 0.009872910116421196}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.41124497991967873, "acc_stderr,none": 0.00986291222354463}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.39437751004016064, "acc_stderr,none": 0.00979590623030422}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.38714859437751004, "acc_stderr,none": 0.009763465328590648}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.37028112449799194, "acc_stderr,none": 0.009678915409840288}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3469879518072289, "acc_stderr,none": 0.009541251561568397}} +{"created_at": "2025-06-30T16:55:16.914800", "global_step": 29000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.26621160409556316, "acc_stderr,none": 0.012915774781523216, "acc_norm,none": 0.2901023890784983, "acc_norm_stderr,none": 0.013261573677520766}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.625, "acc_stderr,none": 0.009933992677987828, "acc_norm,none": 0.5517676767676768, "acc_norm_stderr,none": 0.010204645126856935}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.20777777777777778, "acc_stderr,none": 0.013531414972025825, "acc_norm,none": 0.20777777777777778, "acc_norm_stderr,none": 0.013531414972025825}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579373, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579373}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2111111111111111, "acc_stderr,none": 0.013610798969328501, "acc_norm,none": 0.2111111111111111, "acc_norm_stderr,none": 0.013610798969328501}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458123, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458123}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.013740087830700177, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.013740087830700177}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.01405925666321818, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.01405925666321818}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.21777777777777776, "acc_stderr,none": 0.013765500608039473, "acc_norm,none": 0.21777777777777776, "acc_norm_stderr,none": 0.013765500608039473}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276203, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276203}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218176, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218176}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579385, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579385}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342964, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342964}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632423, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632423}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458118, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458118}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3604859589723163, "acc_stderr,none": 0.0047916019756127646, "acc_norm,none": 0.4398526190001992, "acc_norm_stderr,none": 0.00495354670851232}, "include_base_44_arabic": {"acc,none": 0.23550724637681159, "acc_stderr,none": 0.017930024374459622, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.34285714285714286, "acc_stderr,none": 0.04654465622977447}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.038505120955363834}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.03775026958386237}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "include_base_44_bengali": {"acc,none": 0.22262773722627738, "acc_stderr,none": 0.01780491961256922, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.1927710843373494, "acc_stderr,none": 0.030709824050565274}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "include_base_44_chinese": {"acc,none": 0.25504587155963304, "acc_stderr,none": 0.018717992280600334, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150345}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.046870495038546706}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.056538877391335146}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854672}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french": {"acc,none": 0.21479713603818615, "acc_stderr,none": 0.020093814365636405, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.21804511278195488, "acc_stderr,none": 0.025365363516347505}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.14893617021276595, "acc_stderr,none": 0.05249310253140092}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.21621621621621623, "acc_stderr,none": 0.04818155789706758}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.22302158273381298, "acc_stderr,none": 0.03512112157552947, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.08695652173913043, "acc_stderr,none": 0.06007385040937022}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778438}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_hindi": {"acc,none": 0.2413162705667276, "acc_stderr,none": 0.01836996505270295, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.2916666666666667, "acc_stderr,none": 0.046633760328105435}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921428}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_italian": {"acc,none": 0.2518248175182482, "acc_stderr,none": 0.01856208571815937, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700356}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.034190730421806675}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2129032258064516, "acc_stderr,none": 0.03298715238372958}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.281437125748503, "acc_stderr,none": 0.03490350467428356}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_japanese": {"acc,none": 0.21956087824351297, "acc_stderr,none": 0.018532593908442314, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.1919191919191919, "acc_stderr,none": 0.039780804479336844}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.23880597014925373, "acc_stderr,none": 0.030147775935409217}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.21393034825870647, "acc_stderr,none": 0.028996909693328906}, "include_base_44_korean": {"acc,none": 0.25, "acc_stderr,none": 0.019393618790705287, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.236, "acc_stderr,none": 0.026909337594953838}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.264, "acc_stderr,none": 0.027934518957690908}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018742571431228178, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.04742006474057422}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.052254366311072324}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.08694008849288351}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.35294117647058826, "acc_stderr,none": 0.052141485907524605}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.34782608695652173, "acc_stderr,none": 0.05775749253522359}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.30434782608695654, "acc_stderr,none": 0.0557990389495433}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.18085106382978725, "acc_stderr,none": 0.039911724709195344}, "include_base_44_spanish": {"acc,none": 0.22545454545454546, "acc_stderr,none": 0.017804952036338608, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.192, "acc_stderr,none": 0.024960691989172012}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.256, "acc_stderr,none": 0.027657108718204915}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.07483314773547879}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018252356116850756, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.0357160923005348}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2094240837696335, "acc_stderr,none": 0.029519452721613795}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_turkish": {"acc,none": 0.291970802919708, "acc_stderr,none": 0.01947440953279126, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.035509201856896294}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031024}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589322}, "piqa": {"alias": "piqa", "acc,none": 0.675734494015234, "acc_stderr,none": 0.010921539041347978, "acc_norm,none": 0.6817192600652884, "acc_norm_stderr,none": 0.010868093932082238}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.4457831325301205, "acc_stderr,none": 0.009962979511168334}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4923694779116466, "acc_stderr,none": 0.01002090573154231}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.4506024096385542, "acc_stderr,none": 0.009973042774811678}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.44136546184738956, "acc_stderr,none": 0.009952922349377747}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.40803212851405624, "acc_stderr,none": 0.009851078965044885}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.41566265060240964, "acc_stderr,none": 0.009878474341822926}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.40682730923694777, "acc_stderr,none": 0.009846529240598876}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.37991967871485943, "acc_stderr,none": 0.009728758452987865}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.37630522088353413, "acc_stderr,none": 0.009710547744216048}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101964}} +{"created_at": "2025-06-30T17:49:10.448456", "global_step": 30000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.26706484641638223, "acc_stderr,none": 0.012928933196496349, "acc_norm,none": 0.2841296928327645, "acc_norm_stderr,none": 0.013179442447653886}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6275252525252525, "acc_stderr,none": 0.00992046921573602, "acc_norm,none": 0.5513468013468014, "acc_norm_stderr,none": 0.010205540414612876}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.20666666666666667, "acc_stderr,none": 0.013504646568022655, "acc_norm,none": 0.20666666666666667, "acc_norm_stderr,none": 0.013504646568022655}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.013714527832369676, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.013714527832369676}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.21, "acc_stderr,none": 0.013584490272855378, "acc_norm,none": 0.21, "acc_norm_stderr,none": 0.013584490272855378}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198704, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198704}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579362, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579362}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.21777777777777776, "acc_stderr,none": 0.013765500608039471, "acc_norm,none": 0.21777777777777776, "acc_norm_stderr,none": 0.013765500608039471}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945585, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945585}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391958, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391958}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458127, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458127}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683048, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683048}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596793, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596793}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683059, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683059}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3602867954590719, "acc_stderr,none": 0.00479102400458801, "acc_norm,none": 0.4386576379207329, "acc_norm_stderr,none": 0.004952087083128891}, "include_base_44_arabic": {"acc,none": 0.2391304347826087, "acc_stderr,none": 0.018044215068306457, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.34285714285714286, "acc_stderr,none": 0.04654465622977447}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.038505120955363834}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.2, "acc_stderr,none": 0.03922322702763679}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "include_base_44_bengali": {"acc,none": 0.2208029197080292, "acc_stderr,none": 0.017744825334944883, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.18072289156626506, "acc_stderr,none": 0.029955737855810134}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345138}, "include_base_44_chinese": {"acc,none": 0.26055045871559634, "acc_stderr,none": 0.01886652423127394, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.046870495038546706}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.056538877391335146}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854672}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french": {"acc,none": 0.21957040572792363, "acc_stderr,none": 0.02027105137324671, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22180451127819548, "acc_stderr,none": 0.02552152403141666}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.1702127659574468, "acc_stderr,none": 0.055411578656325386}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.21621621621621623, "acc_stderr,none": 0.04818155789706758}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.20863309352517986, "acc_stderr,none": 0.034103948701685434, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.043478260869565216, "acc_stderr,none": 0.04347826086956523}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.046449428524973954}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_hindi": {"acc,none": 0.2303473491773309, "acc_stderr,none": 0.018084068866105977, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.25, "acc_stderr,none": 0.04442616583193193}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909661}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_italian": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.01835041579495303, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700356}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569707}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2064516129032258, "acc_stderr,none": 0.03261635957463419}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569706}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_japanese": {"acc,none": 0.22355289421157684, "acc_stderr,none": 0.018643089546169774, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.1919191919191919, "acc_stderr,none": 0.039780804479336844}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.24875621890547264, "acc_stderr,none": 0.030567675938916707}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.21393034825870647, "acc_stderr,none": 0.02899690969332891}, "include_base_44_korean": {"acc,none": 0.244, "acc_stderr,none": 0.01923266966204423, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268473}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.26, "acc_stderr,none": 0.0277973157526443}, "include_base_44_russian": {"acc,none": 0.26811594202898553, "acc_stderr,none": 0.018791433474280878, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.04742006474057422}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.052254366311072324}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.08694008849288351}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.35294117647058826, "acc_stderr,none": 0.052141485907524605}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750293}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.3188405797101449, "acc_stderr,none": 0.05651408783764654}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.18085106382978725, "acc_stderr,none": 0.039911724709195344}, "include_base_44_spanish": {"acc,none": 0.23272727272727273, "acc_stderr,none": 0.018025401102751793, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.204, "acc_stderr,none": 0.02553712157454817}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.264, "acc_stderr,none": 0.027934518957690908}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.07483314773547879}, "include_base_44_telugu": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.01837078308085148, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689629}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.225130890052356, "acc_stderr,none": 0.030300857413315146}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594689}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_turkish": {"acc,none": 0.2864963503649635, "acc_stderr,none": 0.019357322659668684, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.035509201856896294}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031024}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589322}, "piqa": {"alias": "piqa", "acc,none": 0.6795429815016322, "acc_stderr,none": 0.010887766073814876, "acc_norm,none": 0.6838955386289445, "acc_norm_stderr,none": 0.010848148455700443}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617607}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.4429718875502008, "acc_stderr,none": 0.009956671790008788}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4943775100401606, "acc_stderr,none": 0.0100214392037773}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.4457831325301205, "acc_stderr,none": 0.009962979511168328}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.442570281124498, "acc_stderr,none": 0.009955744325477469}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.40401606425702813, "acc_stderr,none": 0.009835674445385845}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.40923694779116465, "acc_stderr,none": 0.009855567414480236}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.41124497991967873, "acc_stderr,none": 0.009862912223544627}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.39437751004016064, "acc_stderr,none": 0.009795906230304218}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.37630522088353413, "acc_stderr,none": 0.00971054774421605}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3405622489959839, "acc_stderr,none": 0.009498886690274445}}