{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.16, "eval_steps": 10, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy_reward": 0.59375, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24375000596046448, "adam_stats/lm_head/lr_effective_max": 4.743411409435794e-06, "adam_stats/lm_head/lr_effective_mean": -1.3169262555456918e-11, "adam_stats/lm_head/lr_effective_min": -4.7434205043828115e-06, "adam_stats/lm_head/lr_effective_std": 2.875665927604132e-07, "adam_stats/lr_effective_max": 4.743411409435794e-06, "adam_stats/lr_effective_mean": 1.239900890293022e-11, "adam_stats/lr_effective_min": -4.7434205043828115e-06, "adam_stats/m_t_max": 0.004125977400690317, "adam_stats/m_t_mean": 5.769767649765889e-11, "adam_stats/m_t_min": -0.0054687513038516045, "adam_stats/v_t_max": 2.9906841518823057e-06, "adam_stats/v_t_mean": 3.238570436131516e-13, "adam_stats/v_t_min": 0.0, "advantages": 0.59375, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24375000596046448, "all_logprobs": -0.15753234922885895, "all_logprobs/max": 0.0, "all_logprobs/median": -3.838539123535156e-05, "all_logprobs/min": -10.25, "all_logprobs/p1": -2.578125, "all_logprobs/p10": -0.4296875, "all_logprobs/p25": -0.0205078125, "all_logprobs/p5": -0.9921875, "all_logprobs/p75": -7.152557373046875e-07, "all_logprobs/var": 0.2671676278114319, "clip_ratio": 0.0, "completion_length": 654.59375, "completion_length/correct": 534.631591796875, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 466.0, "completion_length/correct/min": 72.0, "completion_length/correct/p25": 391.0, "completion_length/correct/p75": 604.0, "completion_length/correct/var": 51720.6328125, "completion_length/incorrect": 829.923095703125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 20.0, "completion_length/incorrect/p25": 726.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 91857.3984375, "completion_length/max": 1024.0, "completion_length/median": 563.0, "completion_length/min": 20.0, "completion_length/p25": 406.5, "completion_length/p75": 1024.0, "completion_length/var": 88485.203125, "curvature_clip_ratio_token_fisher": 0.0, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0, "curvature_clip_ratio_total_full": 0.0, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.0016, "feature_vector_variance/max_squared_error": 91767.5390625, "feature_vector_variance/metric": 24013.947265625, "generated_tokens/total": 62841.0, "global_fisher_curvature": 5568.0, "global_fisher_curvature/max": 5568.0, "global_fisher_curvature/median": 5568.0, "global_fisher_curvature/min": 5568.0, "global_fisher_curvature/p25": 5568.0, "global_fisher_curvature/p75": 5568.0, "global_fisher_curvature/p85": 5568.0, "global_fisher_curvature/p90": 5568.0, "global_fisher_curvature/p95": 5568.0, "global_fisher_curvature/p99": 5568.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 0.0, "global_fisher_kl_divergence/max": 0.0, "global_fisher_kl_divergence/median": 0.0, "global_fisher_kl_divergence/min": 0.0, "global_fisher_kl_divergence/p25": 0.0, "global_fisher_kl_divergence/p75": 0.0, "global_fisher_kl_divergence/p85": 0.0, "global_fisher_kl_divergence/p90": 0.0, "global_fisher_kl_divergence/p95": 0.0, "global_fisher_kl_divergence/p99": 0.0, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0, "global_full_update_term/max": 0.0, "global_full_update_term/median": 0.0, "global_full_update_term/min": 0.0, "global_full_update_term/p25": 0.0, "global_full_update_term/p75": 0.0, "global_full_update_term/p85": 0.0, "global_full_update_term/p90": 0.0, "global_full_update_term/p95": 0.0, "global_full_update_term/p99": 0.0, "global_full_update_term/var": NaN, "global_hessian_coeff": 3840.0, "global_hessian_coeff/max": 3840.0, "global_hessian_coeff/median": 3840.0, "global_hessian_coeff/min": 3840.0, "global_hessian_coeff/p25": 3840.0, "global_hessian_coeff/p75": 3840.0, "global_hessian_coeff/p99": 3840.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 3840.0, "global_hessian_coeff_abs/max": 3840.0, "global_hessian_coeff_abs/median": 3840.0, "global_hessian_coeff_abs/min": 3840.0, "global_hessian_coeff_abs/p25": 3840.0, "global_hessian_coeff_abs/p75": 3840.0, "global_hessian_coeff_abs/p99": 3840.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.8449161648750305, "learning_rate": 1.5e-06, "loss": -0.5938, "masked_global_fisher_curvature": 5568.0, "masked_global_fisher_curvature/max": 5568.0, "masked_global_fisher_curvature/median": 5568.0, "masked_global_fisher_curvature/min": 5568.0, "masked_global_fisher_curvature/p25": 5568.0, "masked_global_fisher_curvature/p75": 5568.0, "masked_global_fisher_curvature/p85": 5568.0, "masked_global_fisher_curvature/p90": 5568.0, "masked_global_fisher_curvature/p95": 5568.0, "masked_global_fisher_curvature/p99": 5568.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 0.0, "masked_global_fisher_kl_divergence/max": 0.0, "masked_global_fisher_kl_divergence/median": 0.0, "masked_global_fisher_kl_divergence/min": 0.0, "masked_global_fisher_kl_divergence/p25": 0.0, "masked_global_fisher_kl_divergence/p75": 0.0, "masked_global_fisher_kl_divergence/p85": 0.0, "masked_global_fisher_kl_divergence/p90": 0.0, "masked_global_fisher_kl_divergence/p95": 0.0, "masked_global_fisher_kl_divergence/p99": 0.0, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0, "masked_global_full_update_term/max": 0.0, "masked_global_full_update_term/median": 0.0, "masked_global_full_update_term/min": 0.0, "masked_global_full_update_term/p25": 0.0, "masked_global_full_update_term/p75": 0.0, "masked_global_full_update_term/p85": 0.0, "masked_global_full_update_term/p90": 0.0, "masked_global_full_update_term/p95": 0.0, "masked_global_full_update_term/p99": 0.0, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 3840.0, "masked_global_hessian_coeff/max": 3840.0, "masked_global_hessian_coeff/median": 3840.0, "masked_global_hessian_coeff/min": 3840.0, "masked_global_hessian_coeff/p25": 3840.0, "masked_global_hessian_coeff/p75": 3840.0, "masked_global_hessian_coeff/p99": 3840.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 3840.0, "masked_global_hessian_coeff_abs/max": 3840.0, "masked_global_hessian_coeff_abs/median": 3840.0, "masked_global_hessian_coeff_abs/min": 3840.0, "masked_global_hessian_coeff_abs/p25": 3840.0, "masked_global_hessian_coeff_abs/p75": 3840.0, "masked_global_hessian_coeff_abs/p99": 3840.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 80.17448425292969, "masked_per_sentence_gradient_norm/max": 334.0, "masked_per_sentence_gradient_norm/median": 81.5, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 133.75, "masked_per_sentence_gradient_norm/var": 6744.83203125, "masked_per_token_gradient_norm": 9.554606437683105, "masked_per_token_gradient_norm/max": 386.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 0.00170135498046875, "masked_per_token_gradient_norm/var": 1398.1212158203125, "masked_sentence_fisher_curvature": 1279877.375, "masked_sentence_fisher_curvature/max": 23724032.0, "masked_sentence_fisher_curvature/median": 245760.0, "masked_sentence_fisher_curvature/min": 0.0, "masked_sentence_fisher_curvature/p25": 0.0, "masked_sentence_fisher_curvature/p75": 1130496.0, "masked_sentence_fisher_curvature/p85": 2029568.0, "masked_sentence_fisher_curvature/p90": 2654208.0, "masked_sentence_fisher_curvature/p95": 6881280.0, "masked_sentence_fisher_curvature/p99": 12268376.0, "masked_sentence_fisher_curvature/var": 9420708773888.0, "masked_sentence_fisher_kl_divergence": 0.0, "masked_sentence_fisher_kl_divergence/max": 0.0, "masked_sentence_fisher_kl_divergence/median": 0.0, "masked_sentence_fisher_kl_divergence/min": 0.0, "masked_sentence_fisher_kl_divergence/p25": 0.0, "masked_sentence_fisher_kl_divergence/p75": 0.0, "masked_sentence_fisher_kl_divergence/p85": 0.0, "masked_sentence_fisher_kl_divergence/p90": 0.0, "masked_sentence_fisher_kl_divergence/p95": 0.0, "masked_sentence_fisher_kl_divergence/p99": 0.0, "masked_sentence_fisher_kl_divergence/var": 0.0, "masked_sentence_full_gradient_variance/max_squared_error": 12895.521484375, "masked_sentence_full_gradient_variance/metric": 12895.521484375, "masked_sentence_full_gradient_variance/p75": 12895.521484375, "masked_sentence_full_gradient_variance/p90": 12895.521484375, "masked_sentence_full_gradient_variance/p95": 12895.521484375, "masked_sentence_full_gradient_variance/p99": 12895.521484375, "masked_sentence_full_update_term": 0.0, "masked_sentence_full_update_term/max": 0.0, "masked_sentence_full_update_term/median": 0.0, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0, "masked_sentence_full_update_term/p85": 0.0, "masked_sentence_full_update_term/p90": 0.0, "masked_sentence_full_update_term/p95": 0.0, "masked_sentence_full_update_term/p99": 0.0, "masked_sentence_full_update_term/var": 0.0, "masked_sentence_hessian_coeff": 921770.6875, "masked_sentence_hessian_coeff/max": 20316160.0, "masked_sentence_hessian_coeff/median": 135168.0, "masked_sentence_hessian_coeff/min": -29952.0, "masked_sentence_hessian_coeff/p25": 0.0, "masked_sentence_hessian_coeff/p75": 735232.0, "masked_sentence_hessian_coeff/p99": 9856648.0, "masked_sentence_hessian_coeff/var": 6234568130560.0, "masked_sentence_hessian_coeff_abs": 922394.6875, "masked_sentence_hessian_coeff_abs/max": 20316160.0, "masked_sentence_hessian_coeff_abs/median": 135168.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 735232.0, "masked_sentence_hessian_coeff_abs/p99": 9856648.0, "masked_sentence_hessian_coeff_abs/var": 6233405259776.0, "masked_token_fisher_curvature": 73329320.0, "masked_token_fisher_curvature/max": 22414360576.0, "masked_token_fisher_curvature/median": 0.0, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 0.0, "masked_token_fisher_curvature/p75": 8.469669410260394e-12, "masked_token_fisher_curvature/p85": 0.2216796875, "masked_token_fisher_curvature/p90": 10816.0, "masked_token_fisher_curvature/p95": 23068672.0, "masked_token_fisher_curvature/p99": 2055208960.0, "masked_token_fisher_curvature/var": 4.613118201042043e+17, "masked_token_fisher_kl_divergence": 0.0, "masked_token_fisher_kl_divergence/max": 0.0, "masked_token_fisher_kl_divergence/median": 0.0, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 0.0, "masked_token_fisher_kl_divergence/p75": 0.0, "masked_token_fisher_kl_divergence/p85": 0.0, "masked_token_fisher_kl_divergence/p90": 0.0, "masked_token_fisher_kl_divergence/p95": 0.0, "masked_token_fisher_kl_divergence/p99": 0.0, "masked_token_fisher_kl_divergence/var": 0.0, "masked_token_full_update_term": 0.0, "masked_token_full_update_term/max": 0.0, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": 0.0, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 0.0, "masked_token_full_update_term/p85": 0.0, "masked_token_full_update_term/p90": 0.0, "masked_token_full_update_term/p95": 0.0, "masked_token_full_update_term/p99": 0.0, "masked_token_full_update_term/var": 0.0, "masked_token_hessian_coeff": 52378284.0, "masked_token_hessian_coeff/max": 20401094656.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -398458880.0, "masked_token_hessian_coeff/p25": -2.223532646894455e-08, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 1333788672.0, "masked_token_hessian_coeff/var": 3.5327205521227776e+17, "masked_token_hessian_coeff_abs": 56393584.0, "masked_token_hessian_coeff_abs/max": 20401094656.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 5.736947059631348e-07, "masked_token_hessian_coeff_abs/p99": 1333788672.0, "masked_token_hessian_coeff_abs/var": 3.528353429376205e+17, "mean_logprobs": -0.1650390625, "mean_logprobs/var": 0.010009765625, "num_completions/total": 96, "per_sentence_gradient_norm": 80.17448425292969, "per_sentence_gradient_norm/max": 334.0, "per_sentence_gradient_norm/median": 81.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 133.75, "per_sentence_gradient_norm/var": 6744.83203125, "per_token_feature_norm": 161.08824157714844, "per_token_feature_norm/max": 330.0, "per_token_feature_norm/median": 150.0, "per_token_feature_norm/min": 65.0, "per_token_feature_norm/p25": 122.0, "per_token_feature_norm/p75": 192.0, "per_token_feature_norm/var": 2435.07421875, "per_token_gradient_norm": 9.554606437683105, "per_token_gradient_norm/max": 386.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 0.00170135498046875, "per_token_gradient_norm/var": 1398.1212158203125, "per_token_policy_error_norm": 0.08122844249010086, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.06751953810453415, "policy_entropy": 0.17668761312961578, "policy_entropy/max": 3.625, "policy_entropy/median": 0.000453948974609375, "policy_entropy/min": 1.5654144647214707e-14, "policy_entropy/p25": 1.2516975402832031e-05, "policy_entropy/p75": 0.10888671875, "policy_entropy/var": 0.14696310460567474, "policy_loss": -0.59375, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24375000596046448, "policy_sharpness": 7.073214530944824, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 2.9434571266174316, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 14.176162719726562, "reward": 0.59375, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24375000596046448, "rewards/accuracy_reward": 0.59375, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24375000596046448, "sentence_fisher_curvature": 1279706.75, "sentence_fisher_curvature/max": 23724032.0, "sentence_fisher_curvature/median": 245760.0, "sentence_fisher_curvature/min": 0.0, "sentence_fisher_curvature/p25": 0.0, "sentence_fisher_curvature/p75": 1130496.0, "sentence_fisher_curvature/p85": 2029568.0, "sentence_fisher_curvature/p90": 2654208.0, "sentence_fisher_curvature/p95": 6881280.0, "sentence_fisher_curvature/p99": 12268376.0, "sentence_fisher_curvature/var": 9420350160896.0, "sentence_fisher_kl_divergence": 0.0, "sentence_fisher_kl_divergence/max": 0.0, "sentence_fisher_kl_divergence/median": 0.0, "sentence_fisher_kl_divergence/min": 0.0, "sentence_fisher_kl_divergence/p25": 0.0, "sentence_fisher_kl_divergence/p75": 0.0, "sentence_fisher_kl_divergence/p85": 0.0, "sentence_fisher_kl_divergence/p90": 0.0, "sentence_fisher_kl_divergence/p95": 0.0, "sentence_fisher_kl_divergence/p99": 0.0, "sentence_fisher_kl_divergence/var": 0.0, "sentence_full_gradient_variance/max_squared_error": 12895.521484375, "sentence_full_gradient_variance/metric": 12895.521484375, "sentence_full_gradient_variance/p75": 12895.521484375, "sentence_full_gradient_variance/p90": 12895.521484375, "sentence_full_gradient_variance/p95": 12895.521484375, "sentence_full_gradient_variance/p99": 12895.521484375, "sentence_full_update_term": 0.0, "sentence_full_update_term/max": 0.0, "sentence_full_update_term/median": 0.0, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0, "sentence_full_update_term/p85": 0.0, "sentence_full_update_term/p90": 0.0, "sentence_full_update_term/p95": 0.0, "sentence_full_update_term/p99": 0.0, "sentence_full_update_term/var": 0.0, "sentence_hessian_coeff": 921770.6875, "sentence_hessian_coeff/max": 20316160.0, "sentence_hessian_coeff/median": 135168.0, "sentence_hessian_coeff/min": -29952.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 735232.0, "sentence_hessian_coeff/p99": 9856648.0, "sentence_hessian_coeff/var": 6234568130560.0, "sentence_hessian_coeff_abs": 922394.6875, "sentence_hessian_coeff_abs/max": 20316160.0, "sentence_hessian_coeff_abs/median": 135168.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 735232.0, "sentence_hessian_coeff_abs/p99": 9856648.0, "sentence_hessian_coeff_abs/var": 6233405259776.0, "step": 1, "token_fisher_curvature": 73329320.0, "token_fisher_curvature/max": 22414360576.0, "token_fisher_curvature/median": 0.0, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 0.0, "token_fisher_curvature/p75": 8.469669410260394e-12, "token_fisher_curvature/p85": 0.2216796875, "token_fisher_curvature/p90": 10816.0, "token_fisher_curvature/p95": 23068672.0, "token_fisher_curvature/p99": 2055208960.0, "token_fisher_curvature/var": 4.613118201042043e+17, "token_fisher_kl_divergence": 0.0, "token_fisher_kl_divergence/max": 0.0, "token_fisher_kl_divergence/median": 0.0, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 0.0, "token_fisher_kl_divergence/p75": 0.0, "token_fisher_kl_divergence/p85": 0.0, "token_fisher_kl_divergence/p90": 0.0, "token_fisher_kl_divergence/p95": 0.0, "token_fisher_kl_divergence/p99": 0.0, "token_fisher_kl_divergence/var": 0.0, "token_full_update_term": 0.0, "token_full_update_term/max": 0.0, "token_full_update_term/median": 0.0, "token_full_update_term/min": 0.0, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 0.0, "token_full_update_term/p85": 0.0, "token_full_update_term/p90": 0.0, "token_full_update_term/p95": 0.0, "token_full_update_term/p99": 0.0, "token_full_update_term/var": 0.0, "token_hessian_coeff": 52378284.0, "token_hessian_coeff/max": 20401094656.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -398458880.0, "token_hessian_coeff/p25": -2.223532646894455e-08, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 1333788672.0, "token_hessian_coeff/var": 3.5327205521227776e+17, "token_hessian_coeff_abs": 56393584.0, "token_hessian_coeff_abs/max": 20401094656.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 5.736947059631348e-07, "token_hessian_coeff_abs/p99": 1333788672.0, "token_hessian_coeff_abs/var": 3.528353429376205e+17 }, { "accuracy_reward": 0.625, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2368421107530594, "adam_stats/lm_head/lr_effective_max": 1.2765973224304616e-05, "adam_stats/lm_head/lr_effective_mean": -4.537918651958961e-11, "adam_stats/lm_head/lr_effective_min": -1.2765998690156266e-05, "adam_stats/lm_head/lr_effective_std": 6.378797934303293e-07, "adam_stats/lr_effective_max": 1.2765973224304616e-05, "adam_stats/lr_effective_mean": -1.2874055987932564e-10, "adam_stats/lr_effective_min": -1.2765998690156266e-05, "adam_stats/m_t_max": 0.011550314724445343, "adam_stats/m_t_mean": -2.762695974767304e-11, "adam_stats/m_t_min": -0.008782847784459591, "adam_stats/v_t_max": 1.3277519428811502e-05, "adam_stats/v_t_mean": 8.061304987254381e-13, "adam_stats/v_t_min": 0.0, "advantages": 0.625, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2368421107530594, "all_logprobs": -0.162271186709404, "all_logprobs/max": 0.0, "all_logprobs/median": -5.14984130859375e-05, "all_logprobs/min": -9.9375, "all_logprobs/p1": -2.6875, "all_logprobs/p10": -0.443359375, "all_logprobs/p25": -0.0230712890625, "all_logprobs/p5": -1.0234375, "all_logprobs/p75": -7.152557373046875e-07, "all_logprobs/var": 0.2774546146392822, "clip_ratio": 0.0, "completion_length": 543.5729370117188, "completion_length/correct": 494.3000183105469, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 362.0, "completion_length/correct/min": 66.0, "completion_length/correct/p25": 287.5, "completion_length/correct/p75": 659.0, "completion_length/correct/var": 79398.0546875, "completion_length/incorrect": 625.6944580078125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 673.0, "completion_length/incorrect/min": 45.0, "completion_length/incorrect/p25": 348.5, "completion_length/incorrect/p75": 946.5, "completion_length/incorrect/var": 123553.640625, "completion_length/max": 1024.0, "completion_length/median": 437.0, "completion_length/min": 45.0, "completion_length/p25": 301.75, "completion_length/p75": 853.25, "completion_length/var": 98919.0859375, "curvature_clip_ratio_token_fisher": 0.04639441892504692, "curvature_clip_ratio_token_hessian": 0.008316884748637676, "curvature_clip_ratio_total_fisher": 0.04639441892504692, "curvature_clip_ratio_total_full": 0.04639441892504692, "curvature_clip_ratio_total_hessian": 0.008316884748637676, "epoch": 0.0032, "feature_vector_variance/max_squared_error": 99117.1328125, "feature_vector_variance/metric": 25010.828125, "generated_tokens/total": 115024.0, "global_fisher_curvature": 524288.0, "global_fisher_curvature/max": 524288.0, "global_fisher_curvature/median": 524288.0, "global_fisher_curvature/min": 524288.0, "global_fisher_curvature/p25": 524288.0, "global_fisher_curvature/p75": 524288.0, "global_fisher_curvature/p85": 524288.0, "global_fisher_curvature/p90": 524288.0, "global_fisher_curvature/p95": 524288.0, "global_fisher_curvature/p99": 524288.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.885958671569824e-07, "global_fisher_kl_divergence/max": 5.885958671569824e-07, "global_fisher_kl_divergence/median": 5.885958671569824e-07, "global_fisher_kl_divergence/min": 5.885958671569824e-07, "global_fisher_kl_divergence/p25": 5.885958671569824e-07, "global_fisher_kl_divergence/p75": 5.885958671569824e-07, "global_fisher_kl_divergence/p85": 5.885958671569824e-07, "global_fisher_kl_divergence/p90": 5.885958671569824e-07, "global_fisher_kl_divergence/p95": 5.885958671569824e-07, "global_fisher_kl_divergence/p99": 5.885958671569824e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0301513671875, "global_full_update_term/max": 0.0301513671875, "global_full_update_term/median": 0.0301513671875, "global_full_update_term/min": 0.0301513671875, "global_full_update_term/p25": 0.0301513671875, "global_full_update_term/p75": 0.0301513671875, "global_full_update_term/p85": 0.0301513671875, "global_full_update_term/p90": 0.0301513671875, "global_full_update_term/p95": 0.0301513671875, "global_full_update_term/p99": 0.0301513671875, "global_full_update_term/var": NaN, "global_hessian_coeff": 92160.0, "global_hessian_coeff/max": 92160.0, "global_hessian_coeff/median": 92160.0, "global_hessian_coeff/min": 92160.0, "global_hessian_coeff/p25": 92160.0, "global_hessian_coeff/p75": 92160.0, "global_hessian_coeff/p99": 92160.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 92160.0, "global_hessian_coeff_abs/max": 92160.0, "global_hessian_coeff_abs/median": 92160.0, "global_hessian_coeff_abs/min": 92160.0, "global_hessian_coeff_abs/p25": 92160.0, "global_hessian_coeff_abs/p75": 92160.0, "global_hessian_coeff_abs/p99": 92160.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 1.0381879806518555, "learning_rate": 3e-06, "loss": -0.625, "masked_global_fisher_curvature": 165888.0, "masked_global_fisher_curvature/max": 165888.0, "masked_global_fisher_curvature/median": 165888.0, "masked_global_fisher_curvature/min": 165888.0, "masked_global_fisher_curvature/p25": 165888.0, "masked_global_fisher_curvature/p75": 165888.0, "masked_global_fisher_curvature/p85": 165888.0, "masked_global_fisher_curvature/p90": 165888.0, "masked_global_fisher_curvature/p95": 165888.0, "masked_global_fisher_curvature/p99": 165888.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/max": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/median": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/min": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/p25": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/p75": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/p85": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/p90": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/p95": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/p99": 1.862645149230957e-07, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00714111328125, "masked_global_full_update_term/max": 0.00714111328125, "masked_global_full_update_term/median": 0.00714111328125, "masked_global_full_update_term/min": 0.00714111328125, "masked_global_full_update_term/p25": 0.00714111328125, "masked_global_full_update_term/p75": 0.00714111328125, "masked_global_full_update_term/p85": 0.00714111328125, "masked_global_full_update_term/p90": 0.00714111328125, "masked_global_full_update_term/p95": 0.00714111328125, "masked_global_full_update_term/p99": 0.00714111328125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -121344.0, "masked_global_hessian_coeff/max": -121344.0, "masked_global_hessian_coeff/median": -121344.0, "masked_global_hessian_coeff/min": -121344.0, "masked_global_hessian_coeff/p25": -121344.0, "masked_global_hessian_coeff/p75": -121344.0, "masked_global_hessian_coeff/p99": -121344.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 121344.0, "masked_global_hessian_coeff_abs/max": 121344.0, "masked_global_hessian_coeff_abs/median": 121344.0, "masked_global_hessian_coeff_abs/min": 121344.0, "masked_global_hessian_coeff_abs/p25": 121344.0, "masked_global_hessian_coeff_abs/p75": 121344.0, "masked_global_hessian_coeff_abs/p99": 121344.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 31.34895896911621, "masked_per_sentence_gradient_norm/max": 117.5, "masked_per_sentence_gradient_norm/median": 30.75, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 49.25, "masked_per_sentence_gradient_norm/var": 916.6408081054688, "masked_per_token_gradient_norm": 3.391706705093384, "masked_per_token_gradient_norm/max": 139.0, "masked_per_token_gradient_norm/median": 2.086162567138672e-06, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 0.00164031982421875, "masked_per_token_gradient_norm/var": 180.16456604003906, "masked_sentence_fisher_curvature": 223115.671875, "masked_sentence_fisher_curvature/max": 745472.0, "masked_sentence_fisher_curvature/median": 214016.0, "masked_sentence_fisher_curvature/min": 2144.0, "masked_sentence_fisher_curvature/p25": 147712.0, "masked_sentence_fisher_curvature/p75": 272896.0, "masked_sentence_fisher_curvature/p85": 294912.0, "masked_sentence_fisher_curvature/p90": 345088.0, "masked_sentence_fisher_curvature/p95": 418304.0, "masked_sentence_fisher_curvature/p99": 473088.875, "masked_sentence_fisher_curvature/var": 11573499904.0, "masked_sentence_fisher_kl_divergence": 2.510626870844135e-07, "masked_sentence_fisher_kl_divergence/max": 8.381903171539307e-07, "masked_sentence_fisher_kl_divergence/median": 2.4028122425079346e-07, "masked_sentence_fisher_kl_divergence/min": 2.4156179279088974e-09, "masked_sentence_fisher_kl_divergence/p25": 1.6600824892520905e-07, "masked_sentence_fisher_kl_divergence/p75": 3.0780211091041565e-07, "masked_sentence_fisher_kl_divergence/p85": 3.3155083656311035e-07, "masked_sentence_fisher_kl_divergence/p90": 3.8743019104003906e-07, "masked_sentence_fisher_kl_divergence/p95": 4.7031790018081665e-07, "masked_sentence_fisher_kl_divergence/p99": 5.338350774763967e-07, "masked_sentence_fisher_kl_divergence/var": 1.4641682777229853e-14, "masked_sentence_full_gradient_variance/max_squared_error": 1854.849609375, "masked_sentence_full_gradient_variance/metric": 1854.849609375, "masked_sentence_full_gradient_variance/p75": 1854.849609375, "masked_sentence_full_gradient_variance/p90": 1854.849609375, "masked_sentence_full_gradient_variance/p95": 1854.849609375, "masked_sentence_full_gradient_variance/p99": 1854.849609375, "masked_sentence_full_update_term": 0.004836877342313528, "masked_sentence_full_update_term/max": 0.01422119140625, "masked_sentence_full_update_term/median": 0.005401611328125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.007965087890625, "masked_sentence_full_update_term/p85": 0.0100860595703125, "masked_sentence_full_update_term/p90": 0.010650634765625, "masked_sentence_full_update_term/p95": 0.0116729736328125, "masked_sentence_full_update_term/p99": 0.013583376072347164, "masked_sentence_full_update_term/var": 1.8558923329692334e-05, "masked_sentence_hessian_coeff": -323178.6875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -382976.0, "masked_sentence_hessian_coeff/min": -1441792.0, "masked_sentence_hessian_coeff/p25": -480768.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 89531670528.0, "masked_sentence_hessian_coeff_abs": 323178.6875, "masked_sentence_hessian_coeff_abs/max": 1441792.0, "masked_sentence_hessian_coeff_abs/median": 380928.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 480768.0, "masked_sentence_hessian_coeff_abs/p99": 1099367.5, "masked_sentence_hessian_coeff_abs/var": 89531670528.0, "masked_token_fisher_curvature": 239901.703125, "masked_token_fisher_curvature/max": 8847360.0, "masked_token_fisher_curvature/median": 0.00026702880859375, "masked_token_fisher_curvature/min": 1.7749370367472766e-29, "masked_token_fisher_curvature/p25": 2.782326191663742e-08, "masked_token_fisher_curvature/p75": 452.0, "masked_token_fisher_curvature/p85": 42496.0, "masked_token_fisher_curvature/p90": 274432.0, "masked_token_fisher_curvature/p95": 1458176.0, "masked_token_fisher_curvature/p99": 5832704.0, "masked_token_fisher_curvature/var": 948085194752.0, "masked_token_fisher_kl_divergence": 2.6998529278898786e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 3.0010716134398763e-16, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.134021904840911e-20, "masked_token_fisher_kl_divergence/p75": 5.093170329928398e-10, "masked_token_fisher_kl_divergence/p85": 4.7730281949043274e-08, "masked_token_fisher_kl_divergence/p90": 3.0919909477233887e-07, "masked_token_fisher_kl_divergence/p95": 1.6391277313232422e-06, "masked_token_fisher_kl_divergence/p99": 6.556510925292969e-06, "masked_token_fisher_kl_divergence/var": 1.2010780824772693e-12, "masked_token_full_update_term": 0.00015657853509765118, "masked_token_full_update_term/max": 0.00445556640625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.246830940246582e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.0489096641540527e-08, "masked_token_full_update_term/p85": 4.1425228118896484e-06, "masked_token_full_update_term/p90": 0.00014781951904296875, "masked_token_full_update_term/p95": 0.0011749267578125, "masked_token_full_update_term/p99": 0.0034942626953125, "masked_token_full_update_term/var": 3.639104591002251e-07, "masked_token_hessian_coeff": -413276.03125, "masked_token_hessian_coeff/max": 3964928.0, "masked_token_hessian_coeff/median": -4.00543212890625e-05, "masked_token_hessian_coeff/min": -18350080.0, "masked_token_hessian_coeff/p25": -36.25, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 1.46533203125, "masked_token_hessian_coeff/var": 2212457086976.0, "masked_token_hessian_coeff_abs": 414847.65625, "masked_token_hessian_coeff_abs/max": 18350080.0, "masked_token_hessian_coeff_abs/median": 0.00726318359375, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 40.25, "masked_token_hessian_coeff_abs/p99": 7634944.0, "masked_token_hessian_coeff_abs/var": 2211155542016.0, "mean_logprobs": -0.181640625, "mean_logprobs/var": 0.01336669921875, "num_completions/total": 192, "per_sentence_gradient_norm": 80.59114837646484, "per_sentence_gradient_norm/max": 360.0, "per_sentence_gradient_norm/median": 84.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 127.0, "per_sentence_gradient_norm/var": 5921.5185546875, "per_token_feature_norm": 164.20811462402344, "per_token_feature_norm/max": 334.0, "per_token_feature_norm/median": 154.0, "per_token_feature_norm/min": 64.0, "per_token_feature_norm/p25": 125.5, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 2416.109375, "per_token_gradient_norm": 10.942943572998047, "per_token_gradient_norm/max": 412.0, "per_token_gradient_norm/median": 5.036592483520508e-06, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 0.00579833984375, "per_token_gradient_norm/var": 1552.300537109375, "per_token_policy_error_norm": 0.08302175998687744, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.06909234076738358, "policy_entropy": 0.18188583850860596, "policy_entropy/max": 3.65625, "policy_entropy/median": 0.000583648681640625, "policy_entropy/min": 4.218847493575595e-14, "policy_entropy/p25": 1.1205673217773438e-05, "policy_entropy/p75": 0.11572265625, "policy_entropy/var": 0.15222567319869995, "policy_loss": -0.625, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2368421107530594, "policy_sharpness": 7.011610507965088, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 2.8163208961486816, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 14.458894729614258, "reward": 0.625, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2368421107530594, "rewards/accuracy_reward": 0.625, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2368421107530594, "sentence_fisher_curvature": 1266939.75, "sentence_fisher_curvature/max": 8716288.0, "sentence_fisher_curvature/median": 1261568.0, "sentence_fisher_curvature/min": 2144.0, "sentence_fisher_curvature/p25": 210176.0, "sentence_fisher_curvature/p75": 1765376.0, "sentence_fisher_curvature/p85": 2191360.0, "sentence_fisher_curvature/p90": 2424832.0, "sentence_fisher_curvature/p95": 3166208.0, "sentence_fisher_curvature/p99": 4544935.0, "sentence_fisher_curvature/var": 1508162535424.0, "sentence_fisher_kl_divergence": 1.425489585926698e-06, "sentence_fisher_kl_divergence/max": 9.834766387939453e-06, "sentence_fisher_kl_divergence/median": 1.4156103134155273e-06, "sentence_fisher_kl_divergence/min": 2.4156179279088974e-09, "sentence_fisher_kl_divergence/p25": 2.360902726650238e-07, "sentence_fisher_kl_divergence/p75": 1.9818544387817383e-06, "sentence_fisher_kl_divergence/p85": 2.466142177581787e-06, "sentence_fisher_kl_divergence/p90": 2.726912498474121e-06, "sentence_fisher_kl_divergence/p95": 3.5651028156280518e-06, "sentence_fisher_kl_divergence/p99": 5.106643129693111e-06, "sentence_fisher_kl_divergence/var": 1.913726555696771e-12, "sentence_full_gradient_variance/max_squared_error": 12152.7685546875, "sentence_full_gradient_variance/metric": 12152.7685546875, "sentence_full_gradient_variance/p75": 12152.7685546875, "sentence_full_gradient_variance/p90": 12152.7685546875, "sentence_full_gradient_variance/p95": 12152.7685546875, "sentence_full_gradient_variance/p99": 12152.7685546875, "sentence_full_update_term": 0.014390945434570312, "sentence_full_update_term/max": 0.054443359375, "sentence_full_update_term/median": 0.016845703125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.023651123046875, "sentence_full_update_term/p85": 0.028472900390625, "sentence_full_update_term/p90": 0.0296630859375, "sentence_full_update_term/p95": 0.03564453125, "sentence_full_update_term/p99": 0.05374756082892418, "sentence_full_update_term/var": 0.00018366100266575813, "sentence_hessian_coeff": 427322.6875, "sentence_hessian_coeff/max": 4784128.0, "sentence_hessian_coeff/median": 221184.0, "sentence_hessian_coeff/min": -53504.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 687104.0, "sentence_hessian_coeff/p99": 2130338.0, "sentence_hessian_coeff/var": 419623108608.0, "sentence_hessian_coeff_abs": 431018.6875, "sentence_hessian_coeff_abs/max": 4784128.0, "sentence_hessian_coeff_abs/median": 221184.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 687104.0, "sentence_hessian_coeff_abs/p99": 2130338.0, "sentence_hessian_coeff_abs/var": 416417218560.0, "step": 2, "token_fisher_curvature": 1576625.75, "token_fisher_curvature/max": 126877696.0, "token_fisher_curvature/median": 0.0007476806640625, "token_fisher_curvature/min": 1.7749370367472766e-29, "token_fisher_curvature/p25": 4.284083843231201e-08, "token_fisher_curvature/p75": 2544.0, "token_fisher_curvature/p85": 205824.0, "token_fisher_curvature/p90": 1179648.0, "token_fisher_curvature/p95": 7471104.0, "token_fisher_curvature/p99": 40108032.0, "token_fisher_curvature/var": 54505416687616.0, "token_fisher_kl_divergence": 1.7740637758834055e-06, "token_fisher_kl_divergence/max": 0.0001430511474609375, "token_fisher_kl_divergence/median": 8.396061623727746e-16, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.828087799349512e-20, "token_fisher_kl_divergence/p75": 2.86672729998827e-09, "token_fisher_kl_divergence/p85": 2.3189932107925415e-07, "token_fisher_kl_divergence/p90": 1.3262033462524414e-06, "token_fisher_kl_divergence/p95": 8.404254913330078e-06, "token_fisher_kl_divergence/p99": 4.506111145019531e-05, "token_fisher_kl_divergence/var": 6.900634230699865e-11, "token_full_update_term": 0.0005038469098508358, "token_full_update_term/max": 0.0169677734375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.246830940246582e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.0081566870212555e-07, "token_full_update_term/p85": 8.869171142578125e-05, "token_full_update_term/p90": 0.00091552734375, "token_full_update_term/p95": 0.003997802734375, "token_full_update_term/p99": 0.009521484375, "token_full_update_term/var": 3.142873993056128e-06, "token_hessian_coeff": 414570.1875, "token_hessian_coeff/max": 125829120.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -19005440.0, "token_hessian_coeff/p25": -32.25, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 31981568.0, "token_hessian_coeff/var": 38765489815552.0, "token_hessian_coeff_abs": 1292060.25, "token_hessian_coeff_abs/max": 125829120.0, "token_hessian_coeff_abs/median": 0.02197265625, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 191.0, "token_hessian_coeff_abs/p99": 31981568.0, "token_hessian_coeff_abs/var": 37267909378048.0 }, { "accuracy_reward": 0.6770833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.22094297409057617, "adam_stats/lm_head/lr_effective_max": 2.2347601770889014e-05, "adam_stats/lm_head/lr_effective_mean": -2.4999341138864928e-11, "adam_stats/lm_head/lr_effective_min": -2.2355210603564046e-05, "adam_stats/lm_head/lr_effective_std": 9.779835181689123e-07, "adam_stats/lr_effective_max": 2.2349069695337676e-05, "adam_stats/lr_effective_mean": -2.6395000074508346e-10, "adam_stats/lr_effective_min": -2.2355210603564046e-05, "adam_stats/m_t_max": 0.01018679328262806, "adam_stats/m_t_mean": 9.684799490150553e-11, "adam_stats/m_t_min": -0.011322279460728168, "adam_stats/v_t_max": 1.5342115148087032e-05, "adam_stats/v_t_mean": 1.308063249730318e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6770833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.22094297409057617, "all_logprobs": -0.14896607398986816, "all_logprobs/max": 0.0, "all_logprobs/median": -2.0503997802734375e-05, "all_logprobs/min": -12.125, "all_logprobs/p1": -2.5625, "all_logprobs/p10": -0.388671875, "all_logprobs/p25": -0.01104736328125, "all_logprobs/p5": -0.9765625, "all_logprobs/p75": -5.960464477539062e-07, "all_logprobs/var": 0.2582499086856842, "clip_ratio": 0.0, "completion_length": 618.0, "completion_length/correct": 536.984619140625, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 452.0, "completion_length/correct/min": 16.0, "completion_length/correct/p25": 309.0, "completion_length/correct/p75": 790.0, "completion_length/correct/var": 87893.109375, "completion_length/incorrect": 787.8709716796875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 31.0, "completion_length/incorrect/p25": 546.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 119453.0390625, "completion_length/max": 1024.0, "completion_length/median": 574.0, "completion_length/min": 16.0, "completion_length/p25": 334.5, "completion_length/p75": 1024.0, "completion_length/var": 110841.2421875, "curvature_clip_ratio_token_fisher": 0.07338862121105194, "curvature_clip_ratio_token_hessian": 0.03305353224277496, "curvature_clip_ratio_total_fisher": 0.07338862121105194, "curvature_clip_ratio_total_full": 0.07338862121105194, "curvature_clip_ratio_total_hessian": 0.03305353224277496, "epoch": 0.0048, "feature_vector_variance/max_squared_error": 91614.9921875, "feature_vector_variance/metric": 24356.921875, "generated_tokens/total": 174352.0, "global_fisher_curvature": 372736.0, "global_fisher_curvature/max": 372736.0, "global_fisher_curvature/median": 372736.0, "global_fisher_curvature/min": 372736.0, "global_fisher_curvature/p25": 372736.0, "global_fisher_curvature/p75": 372736.0, "global_fisher_curvature/p85": 372736.0, "global_fisher_curvature/p90": 372736.0, "global_fisher_curvature/p95": 372736.0, "global_fisher_curvature/p99": 372736.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.6763806343078613e-06, "global_fisher_kl_divergence/max": 1.6763806343078613e-06, "global_fisher_kl_divergence/median": 1.6763806343078613e-06, "global_fisher_kl_divergence/min": 1.6763806343078613e-06, "global_fisher_kl_divergence/p25": 1.6763806343078613e-06, "global_fisher_kl_divergence/p75": 1.6763806343078613e-06, "global_fisher_kl_divergence/p85": 1.6763806343078613e-06, "global_fisher_kl_divergence/p90": 1.6763806343078613e-06, "global_fisher_kl_divergence/p95": 1.6763806343078613e-06, "global_fisher_kl_divergence/p99": 1.6763806343078613e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.05224609375, "global_full_update_term/max": 0.05224609375, "global_full_update_term/median": 0.05224609375, "global_full_update_term/min": 0.05224609375, "global_full_update_term/p25": 0.05224609375, "global_full_update_term/p75": 0.05224609375, "global_full_update_term/p85": 0.05224609375, "global_full_update_term/p90": 0.05224609375, "global_full_update_term/p95": 0.05224609375, "global_full_update_term/p99": 0.05224609375, "global_full_update_term/var": NaN, "global_hessian_coeff": 49152.0, "global_hessian_coeff/max": 49152.0, "global_hessian_coeff/median": 49152.0, "global_hessian_coeff/min": 49152.0, "global_hessian_coeff/p25": 49152.0, "global_hessian_coeff/p75": 49152.0, "global_hessian_coeff/p99": 49152.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 49152.0, "global_hessian_coeff_abs/max": 49152.0, "global_hessian_coeff_abs/median": 49152.0, "global_hessian_coeff_abs/min": 49152.0, "global_hessian_coeff_abs/p25": 49152.0, "global_hessian_coeff_abs/p75": 49152.0, "global_hessian_coeff_abs/p99": 49152.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 1.133514165878296, "learning_rate": 4.5e-06, "loss": -0.6771, "masked_global_fisher_curvature": 51456.0, "masked_global_fisher_curvature/max": 51456.0, "masked_global_fisher_curvature/median": 51456.0, "masked_global_fisher_curvature/min": 51456.0, "masked_global_fisher_curvature/p25": 51456.0, "masked_global_fisher_curvature/p75": 51456.0, "masked_global_fisher_curvature/p85": 51456.0, "masked_global_fisher_curvature/p90": 51456.0, "masked_global_fisher_curvature/p95": 51456.0, "masked_global_fisher_curvature/p99": 51456.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/max": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/median": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/min": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p25": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p75": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p85": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p90": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p95": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p99": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0084228515625, "masked_global_full_update_term/max": 0.0084228515625, "masked_global_full_update_term/median": 0.0084228515625, "masked_global_full_update_term/min": 0.0084228515625, "masked_global_full_update_term/p25": 0.0084228515625, "masked_global_full_update_term/p75": 0.0084228515625, "masked_global_full_update_term/p85": 0.0084228515625, "masked_global_full_update_term/p90": 0.0084228515625, "masked_global_full_update_term/p95": 0.0084228515625, "masked_global_full_update_term/p99": 0.0084228515625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -61952.0, "masked_global_hessian_coeff/max": -61952.0, "masked_global_hessian_coeff/median": -61952.0, "masked_global_hessian_coeff/min": -61952.0, "masked_global_hessian_coeff/p25": -61952.0, "masked_global_hessian_coeff/p75": -61952.0, "masked_global_hessian_coeff/p99": -61952.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 61952.0, "masked_global_hessian_coeff_abs/max": 61952.0, "masked_global_hessian_coeff_abs/median": 61952.0, "masked_global_hessian_coeff_abs/min": 61952.0, "masked_global_hessian_coeff_abs/p25": 61952.0, "masked_global_hessian_coeff_abs/p75": 61952.0, "masked_global_hessian_coeff_abs/p99": 61952.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 16.613933563232422, "masked_per_sentence_gradient_norm/max": 46.25, "masked_per_sentence_gradient_norm/median": 18.5, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 25.5625, "masked_per_sentence_gradient_norm/var": 177.83848571777344, "masked_per_token_gradient_norm": 1.728881597518921, "masked_per_token_gradient_norm/max": 94.0, "masked_per_token_gradient_norm/median": 3.427267074584961e-06, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 0.00098419189453125, "masked_per_token_gradient_norm/var": 54.537940979003906, "masked_sentence_fisher_curvature": 58129.0, "masked_sentence_fisher_curvature/max": 164864.0, "masked_sentence_fisher_curvature/median": 52736.0, "masked_sentence_fisher_curvature/min": 5728.0, "masked_sentence_fisher_curvature/p25": 40192.0, "masked_sentence_fisher_curvature/p75": 70656.0, "masked_sentence_fisher_curvature/p85": 82304.0, "masked_sentence_fisher_curvature/p90": 90368.0, "masked_sentence_fisher_curvature/p95": 103936.0, "masked_sentence_fisher_curvature/p99": 162918.40625, "masked_sentence_fisher_curvature/var": 797826560.0, "masked_sentence_fisher_kl_divergence": 2.615524863358587e-07, "masked_sentence_fisher_kl_divergence/max": 7.413327693939209e-07, "masked_sentence_fisher_kl_divergence/median": 2.3748725652694702e-07, "masked_sentence_fisher_kl_divergence/min": 2.5727786123752594e-08, "masked_sentence_fisher_kl_divergence/p25": 1.8067657947540283e-07, "masked_sentence_fisher_kl_divergence/p75": 3.1851232051849365e-07, "masked_sentence_fisher_kl_divergence/p85": 3.702007234096527e-07, "masked_sentence_fisher_kl_divergence/p90": 4.069879651069641e-07, "masked_sentence_fisher_kl_divergence/p95": 4.670582711696625e-07, "masked_sentence_fisher_kl_divergence/p99": 7.342547405642108e-07, "masked_sentence_fisher_kl_divergence/var": 1.615151731164475e-14, "masked_sentence_full_gradient_variance/max_squared_error": 440.5087585449219, "masked_sentence_full_gradient_variance/metric": 440.5087585449219, "masked_sentence_full_gradient_variance/p75": 440.5087585449219, "masked_sentence_full_gradient_variance/p90": 440.5087585449219, "masked_sentence_full_gradient_variance/p95": 440.5087585449219, "masked_sentence_full_gradient_variance/p99": 440.5087585449219, "masked_sentence_full_update_term": 0.004615148063749075, "masked_sentence_full_update_term/max": 0.0142822265625, "masked_sentence_full_update_term/median": 0.0048828125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0073394775390625, "masked_sentence_full_update_term/p85": 0.0091400146484375, "masked_sentence_full_update_term/p90": 0.0098876953125, "masked_sentence_full_update_term/p95": 0.010528564453125, "masked_sentence_full_update_term/p99": 0.012542730197310448, "masked_sentence_full_update_term/var": 1.4838998140476178e-05, "masked_sentence_hessian_coeff": -181888.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -197632.0, "masked_sentence_hessian_coeff/min": -1019904.0, "masked_sentence_hessian_coeff/p25": -279552.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 31147520000.0, "masked_sentence_hessian_coeff_abs": 181888.0, "masked_sentence_hessian_coeff_abs/max": 1019904.0, "masked_sentence_hessian_coeff_abs/median": 195584.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 279552.0, "masked_sentence_hessian_coeff_abs/p99": 984883.3125, "masked_sentence_hessian_coeff_abs/var": 31147520000.0, "masked_token_fisher_curvature": 64051.85546875, "masked_token_fisher_curvature/max": 2211840.0, "masked_token_fisher_curvature/median": 1.6450881958007812e-05, "masked_token_fisher_curvature/min": 1.137373342260413e-24, "masked_token_fisher_curvature/p25": 1.100124791264534e-08, "masked_token_fisher_curvature/p75": 6.25, "masked_token_fisher_curvature/p85": 5536.0, "masked_token_fisher_curvature/p90": 56064.0, "masked_token_fisher_curvature/p95": 395264.0, "masked_token_fisher_curvature/p99": 1622016.0, "masked_token_fisher_curvature/var": 70033997824.0, "masked_token_fisher_kl_divergence": 2.8832320708715997e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 7.41594285980085e-17, "masked_token_fisher_kl_divergence/min": 5.1251553695851735e-36, "masked_token_fisher_kl_divergence/p25": 4.955142741437657e-20, "masked_token_fisher_kl_divergence/p75": 2.808064891723916e-11, "masked_token_fisher_kl_divergence/p85": 2.491287887096405e-08, "masked_token_fisher_kl_divergence/p90": 2.514570951461792e-07, "masked_token_fisher_kl_divergence/p95": 1.780688762664795e-06, "masked_token_fisher_kl_divergence/p99": 7.3015689849853516e-06, "masked_token_fisher_kl_divergence/var": 1.4192430867684958e-12, "masked_token_full_update_term": 0.00013811199460178614, "masked_token_full_update_term/max": 0.00445556640625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.8477439880371094e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.3096723705530167e-08, "masked_token_full_update_term/p85": 1.8700957298278809e-06, "masked_token_full_update_term/p90": 8.440017700195312e-05, "masked_token_full_update_term/p95": 0.000885009765625, "masked_token_full_update_term/p99": 0.0034942626953125, "masked_token_full_update_term/var": 3.2676641126272443e-07, "masked_token_hessian_coeff": -204204.875, "masked_token_hessian_coeff/max": 5984.0, "masked_token_hessian_coeff/median": -1.71661376953125e-05, "masked_token_hessian_coeff/min": -11468800.0, "masked_token_hessian_coeff/p25": -10.5, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 2.9927978515625, "masked_token_hessian_coeff/var": 655533342720.0, "masked_token_hessian_coeff_abs": 204207.484375, "masked_token_hessian_coeff_abs/max": 11468800.0, "masked_token_hessian_coeff_abs/median": 0.00921630859375, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 12.8125, "masked_token_hessian_coeff_abs/p99": 4423680.0, "masked_token_hessian_coeff_abs/var": 655532359680.0, "mean_logprobs": -0.166015625, "mean_logprobs/var": 0.01458740234375, "num_completions/total": 288, "per_sentence_gradient_norm": 90.40104675292969, "per_sentence_gradient_norm/max": 370.0, "per_sentence_gradient_norm/median": 88.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 134.0, "per_sentence_gradient_norm/var": 7332.69140625, "per_token_feature_norm": 160.5414581298828, "per_token_feature_norm/max": 334.0, "per_token_feature_norm/median": 150.0, "per_token_feature_norm/min": 63.25, "per_token_feature_norm/p25": 123.0, "per_token_feature_norm/p75": 191.0, "per_token_feature_norm/var": 2344.118896484375, "per_token_gradient_norm": 10.631272315979004, "per_token_gradient_norm/max": 412.0, "per_token_gradient_norm/median": 8.165836334228516e-06, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 0.005218505859375, "per_token_gradient_norm/var": 1499.9722900390625, "per_token_policy_error_norm": 0.07611149549484253, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.06280320882797241, "policy_entropy": 0.16807840764522552, "policy_entropy/max": 3.765625, "policy_entropy/median": 0.0002574920654296875, "policy_entropy/min": 1.8616219676914625e-12, "policy_entropy/p25": 1.0132789611816406e-05, "policy_entropy/p75": 0.06396484375, "policy_entropy/var": 0.14907778799533844, "policy_loss": -0.6770833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.22094297409057617, "policy_sharpness": 7.296797275543213, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 3.43359375, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 13.803716659545898, "reward": 0.6770833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.22094297409057617, "rewards/accuracy_reward": 0.6770833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.22094297409057617, "sentence_fisher_curvature": 1019639.375, "sentence_fisher_curvature/max": 12058624.0, "sentence_fisher_curvature/median": 913408.0, "sentence_fisher_curvature/min": 12992.0, "sentence_fisher_curvature/p25": 219904.0, "sentence_fisher_curvature/p75": 1339392.0, "sentence_fisher_curvature/p85": 1538048.0, "sentence_fisher_curvature/p90": 1634304.0, "sentence_fisher_curvature/p95": 1755136.0, "sentence_fisher_curvature/p99": 6922256.5, "sentence_fisher_curvature/var": 2021904482304.0, "sentence_fisher_kl_divergence": 4.590379830915481e-06, "sentence_fisher_kl_divergence/max": 5.435943603515625e-05, "sentence_fisher_kl_divergence/median": 4.112720489501953e-06, "sentence_fisher_kl_divergence/min": 5.844049155712128e-08, "sentence_fisher_kl_divergence/p25": 9.890645742416382e-07, "sentence_fisher_kl_divergence/p75": 6.034970283508301e-06, "sentence_fisher_kl_divergence/p85": 6.92903995513916e-06, "sentence_fisher_kl_divergence/p90": 7.3462724685668945e-06, "sentence_fisher_kl_divergence/p95": 7.912516593933105e-06, "sentence_fisher_kl_divergence/p99": 3.1143499654717743e-05, "sentence_fisher_kl_divergence/var": 4.104516379555001e-11, "sentence_full_gradient_variance/max_squared_error": 15221.658203125, "sentence_full_gradient_variance/metric": 15221.658203125, "sentence_full_gradient_variance/p75": 15221.658203125, "sentence_full_gradient_variance/p90": 15221.658203125, "sentence_full_gradient_variance/p95": 15221.658203125, "sentence_full_gradient_variance/p99": 15221.658203125, "sentence_full_update_term": 0.026844661682844162, "sentence_full_update_term/max": 0.083984375, "sentence_full_update_term/median": 0.0267333984375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.03857421875, "sentence_full_update_term/p85": 0.05218505859375, "sentence_full_update_term/p90": 0.060546875, "sentence_full_update_term/p95": 0.0728759765625, "sentence_full_update_term/p99": 0.08120118081569672, "sentence_full_update_term/var": 0.0005545857129618526, "sentence_hessian_coeff": 350702.34375, "sentence_hessian_coeff/max": 6651904.0, "sentence_hessian_coeff/median": 178176.0, "sentence_hessian_coeff/min": -198656.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 440320.0, "sentence_hessian_coeff/p99": 3772425.25, "sentence_hessian_coeff/var": 641977548800.0, "sentence_hessian_coeff_abs": 360465.6875, "sentence_hessian_coeff_abs/max": 6651904.0, "sentence_hessian_coeff_abs/median": 183296.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 440320.0, "sentence_hessian_coeff_abs/p99": 3772425.25, "sentence_hessian_coeff_abs/var": 634961068032.0, "step": 3, "token_fisher_curvature": 1113496.375, "token_fisher_curvature/max": 110100480.0, "token_fisher_curvature/median": 7.200241088867188e-05, "token_fisher_curvature/min": 1.137373342260413e-24, "token_fisher_curvature/p25": 1.862645149230957e-08, "token_fisher_curvature/p75": 608.0, "token_fisher_curvature/p85": 114104.0, "token_fisher_curvature/p90": 839680.0, "token_fisher_curvature/p95": 5373952.0, "token_fisher_curvature/p99": 28467712.0, "token_fisher_curvature/var": 27570718375936.0, "token_fisher_kl_divergence": 5.011833309254143e-06, "token_fisher_kl_divergence/max": 0.00049591064453125, "token_fisher_kl_divergence/median": 3.2439329000766293e-16, "token_fisher_kl_divergence/min": 5.1251553695851735e-36, "token_fisher_kl_divergence/p25": 8.385626177817573e-20, "token_fisher_kl_divergence/p75": 2.735760062932968e-09, "token_fisher_kl_divergence/p85": 5.137408152222633e-07, "token_fisher_kl_divergence/p90": 3.7848949432373047e-06, "token_fisher_kl_divergence/p95": 2.4199485778808594e-05, "token_fisher_kl_divergence/p99": 0.000128231942653656, "token_fisher_kl_divergence/var": 5.585357176762784e-10, "token_full_update_term": 0.000842912879306823, "token_full_update_term/max": 0.031982421875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.8477439880371094e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.1362135410308838e-07, "token_full_update_term/p85": 0.00014495849609375, "token_full_update_term/p90": 0.00148773193359375, "token_full_update_term/p95": 0.006683349609375, "token_full_update_term/p99": 0.01611328125, "token_full_update_term/var": 8.88154227141058e-06, "token_hessian_coeff": 281482.03125, "token_hessian_coeff/max": 108003328.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -14745600.0, "token_hessian_coeff/p25": -16.125, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 21460480.0, "token_hessian_coeff/var": 19289566347264.0, "token_hessian_coeff_abs": 900471.0, "token_hessian_coeff_abs/max": 108003328.0, "token_hessian_coeff_abs/median": 0.02685546875, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 95.125, "token_hessian_coeff_abs/p99": 21460480.0, "token_hessian_coeff_abs/var": 18557937123328.0 }, { "accuracy_reward": 0.6041666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24166667461395264, "adam_stats/lm_head/lr_effective_max": 3.273080073995516e-05, "adam_stats/lm_head/lr_effective_mean": -7.38041398828937e-12, "adam_stats/lm_head/lr_effective_min": -3.2633070077281445e-05, "adam_stats/lm_head/lr_effective_std": 1.2708222811852465e-06, "adam_stats/lr_effective_max": 3.273080073995516e-05, "adam_stats/lr_effective_mean": -3.3210645344894374e-10, "adam_stats/lr_effective_min": -3.274960181443021e-05, "adam_stats/m_t_max": 0.010016502812504768, "adam_stats/m_t_mean": 8.317567468107967e-11, "adam_stats/m_t_min": -0.0099962642416358, "adam_stats/v_t_max": 1.534144212200772e-05, "adam_stats/v_t_mean": 1.3437110582201206e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6041666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24166667461395264, "all_logprobs": -0.1397826075553894, "all_logprobs/max": 0.0, "all_logprobs/median": -8.225440979003906e-06, "all_logprobs/min": -15.625, "all_logprobs/p1": -2.5437498092651367, "all_logprobs/p10": -0.3515625, "all_logprobs/p25": -0.006805419921875, "all_logprobs/p5": -0.8828125, "all_logprobs/p75": -2.384185791015625e-07, "all_logprobs/var": 0.2449142187833786, "clip_ratio": 0.0, "completion_length": 706.46875, "completion_length/correct": 632.27587890625, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 597.0, "completion_length/correct/min": 212.0, "completion_length/correct/p25": 402.0, "completion_length/correct/p75": 802.0, "completion_length/correct/var": 64575.88671875, "completion_length/incorrect": 819.7105102539062, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 13.0, "completion_length/incorrect/p25": 734.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 107071.8359375, "completion_length/max": 1024.0, "completion_length/median": 721.0, "completion_length/min": 13.0, "completion_length/p25": 478.25, "completion_length/p75": 1024.0, "completion_length/var": 88937.3671875, "curvature_clip_ratio_token_fisher": 0.0783090814948082, "curvature_clip_ratio_token_hessian": 0.037790656089782715, "curvature_clip_ratio_total_fisher": 0.0783090814948082, "curvature_clip_ratio_total_full": 0.0783090814948082, "curvature_clip_ratio_total_hessian": 0.037790656089782715, "epoch": 0.0064, "feature_vector_variance/max_squared_error": 95890.9609375, "feature_vector_variance/metric": 24016.69921875, "generated_tokens/total": 242173.0, "global_fisher_curvature": 272384.0, "global_fisher_curvature/max": 272384.0, "global_fisher_curvature/median": 272384.0, "global_fisher_curvature/min": 272384.0, "global_fisher_curvature/p25": 272384.0, "global_fisher_curvature/p75": 272384.0, "global_fisher_curvature/p85": 272384.0, "global_fisher_curvature/p90": 272384.0, "global_fisher_curvature/p95": 272384.0, "global_fisher_curvature/p99": 272384.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.7567148208618164e-06, "global_fisher_kl_divergence/max": 2.7567148208618164e-06, "global_fisher_kl_divergence/median": 2.7567148208618164e-06, "global_fisher_kl_divergence/min": 2.7567148208618164e-06, "global_fisher_kl_divergence/p25": 2.7567148208618164e-06, "global_fisher_kl_divergence/p75": 2.7567148208618164e-06, "global_fisher_kl_divergence/p85": 2.7567148208618164e-06, "global_fisher_kl_divergence/p90": 2.7567148208618164e-06, "global_fisher_kl_divergence/p95": 2.7567148208618164e-06, "global_fisher_kl_divergence/p99": 2.7567148208618164e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.04931640625, "global_full_update_term/max": 0.04931640625, "global_full_update_term/median": 0.04931640625, "global_full_update_term/min": 0.04931640625, "global_full_update_term/p25": 0.04931640625, "global_full_update_term/p75": 0.04931640625, "global_full_update_term/p85": 0.04931640625, "global_full_update_term/p90": 0.04931640625, "global_full_update_term/p95": 0.04931640625, "global_full_update_term/p99": 0.04931640625, "global_full_update_term/var": NaN, "global_hessian_coeff": 35072.0, "global_hessian_coeff/max": 35072.0, "global_hessian_coeff/median": 35072.0, "global_hessian_coeff/min": 35072.0, "global_hessian_coeff/p25": 35072.0, "global_hessian_coeff/p75": 35072.0, "global_hessian_coeff/p99": 35072.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 35072.0, "global_hessian_coeff_abs/max": 35072.0, "global_hessian_coeff_abs/median": 35072.0, "global_hessian_coeff_abs/min": 35072.0, "global_hessian_coeff_abs/p25": 35072.0, "global_hessian_coeff_abs/p75": 35072.0, "global_hessian_coeff_abs/p99": 35072.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.2898753583431244, "learning_rate": 6e-06, "loss": -0.6042, "masked_global_fisher_curvature": 23680.0, "masked_global_fisher_curvature/max": 23680.0, "masked_global_fisher_curvature/median": 23680.0, "masked_global_fisher_curvature/min": 23680.0, "masked_global_fisher_curvature/p25": 23680.0, "masked_global_fisher_curvature/p75": 23680.0, "masked_global_fisher_curvature/p85": 23680.0, "masked_global_fisher_curvature/p90": 23680.0, "masked_global_fisher_curvature/p95": 23680.0, "masked_global_fisher_curvature/p99": 23680.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/max": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/median": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/min": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/p25": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/p75": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/p85": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/p90": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/p95": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/p99": 2.4028122425079346e-07, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.004608154296875, "masked_global_full_update_term/max": 0.004608154296875, "masked_global_full_update_term/median": 0.004608154296875, "masked_global_full_update_term/min": 0.004608154296875, "masked_global_full_update_term/p25": 0.004608154296875, "masked_global_full_update_term/p75": 0.004608154296875, "masked_global_full_update_term/p85": 0.004608154296875, "masked_global_full_update_term/p90": 0.004608154296875, "masked_global_full_update_term/p95": 0.004608154296875, "masked_global_full_update_term/p99": 0.004608154296875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -23552.0, "masked_global_hessian_coeff/max": -23552.0, "masked_global_hessian_coeff/median": -23552.0, "masked_global_hessian_coeff/min": -23552.0, "masked_global_hessian_coeff/p25": -23552.0, "masked_global_hessian_coeff/p75": -23552.0, "masked_global_hessian_coeff/p99": -23552.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 23552.0, "masked_global_hessian_coeff_abs/max": 23552.0, "masked_global_hessian_coeff_abs/median": 23552.0, "masked_global_hessian_coeff_abs/min": 23552.0, "masked_global_hessian_coeff_abs/p25": 23552.0, "masked_global_hessian_coeff_abs/p75": 23552.0, "masked_global_hessian_coeff_abs/p99": 23552.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 10.6923828125, "masked_per_sentence_gradient_norm/max": 41.0, "masked_per_sentence_gradient_norm/median": 10.25, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 17.65625, "masked_per_sentence_gradient_norm/var": 109.32778930664062, "masked_per_token_gradient_norm": 0.9315317273139954, "masked_per_token_gradient_norm/max": 61.25, "masked_per_token_gradient_norm/median": 1.0710209608078003e-07, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 0.000179290771484375, "masked_per_token_gradient_norm/var": 21.0537166595459, "masked_sentence_fisher_curvature": 27021.66796875, "masked_sentence_fisher_curvature/max": 199680.0, "masked_sentence_fisher_curvature/median": 21376.0, "masked_sentence_fisher_curvature/min": 6048.0, "masked_sentence_fisher_curvature/p25": 15680.0, "masked_sentence_fisher_curvature/p75": 29056.0, "masked_sentence_fisher_curvature/p85": 37056.0, "masked_sentence_fisher_curvature/p90": 40576.0, "masked_sentence_fisher_curvature/p95": 47872.0, "masked_sentence_fisher_curvature/p99": 124288.2421875, "masked_sentence_fisher_curvature/var": 643151872.0, "masked_sentence_fisher_kl_divergence": 2.737215254455805e-07, "masked_sentence_fisher_kl_divergence/max": 2.0265579223632812e-06, "masked_sentence_fisher_kl_divergence/median": 2.1606683731079102e-07, "masked_sentence_fisher_kl_divergence/min": 6.146728992462158e-08, "masked_sentence_fisher_kl_divergence/p25": 1.5855766832828522e-07, "masked_sentence_fisher_kl_divergence/p75": 2.942979335784912e-07, "masked_sentence_fisher_kl_divergence/p85": 3.748573362827301e-07, "masked_sentence_fisher_kl_divergence/p90": 4.10713255405426e-07, "masked_sentence_fisher_kl_divergence/p95": 4.842877388000488e-07, "masked_sentence_fisher_kl_divergence/p99": 1.2621308087545913e-06, "masked_sentence_fisher_kl_divergence/var": 6.613612923370843e-14, "masked_sentence_full_gradient_variance/max_squared_error": 217.7034912109375, "masked_sentence_full_gradient_variance/metric": 217.7034912109375, "masked_sentence_full_gradient_variance/p75": 217.7034912109375, "masked_sentence_full_gradient_variance/p90": 217.7034912109375, "masked_sentence_full_gradient_variance/p95": 217.7034912109375, "masked_sentence_full_gradient_variance/p99": 217.7034912109375, "masked_sentence_full_update_term": 0.0036683082580566406, "masked_sentence_full_update_term/max": 0.010986328125, "masked_sentence_full_update_term/median": 0.0037078857421875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0062103271484375, "masked_sentence_full_update_term/p85": 0.0079345703125, "masked_sentence_full_update_term/p90": 0.00885009765625, "masked_sentence_full_update_term/p95": 0.009765625, "masked_sentence_full_update_term/p99": 0.010812378488481045, "masked_sentence_full_update_term/var": 1.2477362361096311e-05, "masked_sentence_hessian_coeff": -78805.3359375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -87040.0, "masked_sentence_hessian_coeff/min": -239616.0, "masked_sentence_hessian_coeff/p25": -133376.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 5360765440.0, "masked_sentence_hessian_coeff_abs": 78805.3359375, "masked_sentence_hessian_coeff_abs/max": 239616.0, "masked_sentence_hessian_coeff_abs/median": 83456.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 133376.0, "masked_sentence_hessian_coeff_abs/p99": 228915.234375, "masked_sentence_hessian_coeff_abs/var": 5360765440.0, "masked_token_fisher_curvature": 27227.9609375, "masked_token_fisher_curvature/max": 983040.0, "masked_token_fisher_curvature/median": 1.475214958190918e-06, "masked_token_fisher_curvature/min": 9.781875224740546e-28, "masked_token_fisher_curvature/p25": 1.229636836796999e-09, "masked_token_fisher_curvature/p75": 0.58203125, "masked_token_fisher_curvature/p85": 1560.0, "masked_token_fisher_curvature/p90": 20480.0, "masked_token_fisher_curvature/p95": 162816.0, "masked_token_fisher_curvature/p99": 704512.0, "masked_token_fisher_curvature/var": 13171709952.0, "masked_token_fisher_kl_divergence": 2.757111872142559e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.496198998029996e-17, "masked_token_fisher_kl_divergence/min": 9.918233585063051e-39, "masked_token_fisher_kl_divergence/p25": 1.2440796412797536e-20, "masked_token_fisher_kl_divergence/p75": 5.8832938520936295e-12, "masked_token_fisher_kl_divergence/p85": 1.5832483768463135e-08, "masked_token_fisher_kl_divergence/p90": 2.076849341392517e-07, "masked_token_fisher_kl_divergence/p95": 1.646578311920166e-06, "masked_token_fisher_kl_divergence/p99": 7.12275505065918e-06, "masked_token_fisher_kl_divergence/var": 1.3506296775434024e-12, "masked_token_full_update_term": 0.000102467522083316, "masked_token_full_update_term/max": 0.004425048828125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.0505318641662598e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.431544660590589e-09, "masked_token_full_update_term/p85": 1.8035279936157167e-07, "masked_token_full_update_term/p90": 6.22868537902832e-06, "masked_token_full_update_term/p95": 0.00045013427734375, "masked_token_full_update_term/p99": 0.0030975341796875, "masked_token_full_update_term/var": 2.409648800494324e-07, "masked_token_hessian_coeff": -102404.0078125, "masked_token_hessian_coeff/max": 3712.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -6914048.0, "masked_token_hessian_coeff/p25": -0.88671875, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 1.7471923828125, "masked_token_hessian_coeff/var": 225010712576.0, "masked_token_hessian_coeff_abs": 102406.0546875, "masked_token_hessian_coeff_abs/max": 6914048.0, "masked_token_hessian_coeff_abs/median": 0.0001926422119140625, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.296875, "masked_token_hessian_coeff_abs/p99": 2752512.0, "masked_token_hessian_coeff_abs/var": 225010302976.0, "mean_logprobs": -0.1826171875, "mean_logprobs/var": 0.06298828125, "num_completions/total": 384, "per_sentence_gradient_norm": 80.42448425292969, "per_sentence_gradient_norm/max": 370.0, "per_sentence_gradient_norm/median": 73.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 122.75, "per_sentence_gradient_norm/var": 7309.4189453125, "per_token_feature_norm": 157.44090270996094, "per_token_feature_norm/max": 332.0, "per_token_feature_norm/median": 147.0, "per_token_feature_norm/min": 59.75, "per_token_feature_norm/p25": 121.5, "per_token_feature_norm/p75": 186.0, "per_token_feature_norm/var": 2138.04541015625, "per_token_gradient_norm": 8.527828216552734, "per_token_gradient_norm/max": 406.0, "per_token_gradient_norm/median": 5.513429641723633e-07, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 0.000743865966796875, "per_token_gradient_norm/var": 1195.7529296875, "per_token_policy_error_norm": 0.07273751497268677, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.061565447598695755, "policy_entropy": 0.15185335278511047, "policy_entropy/max": 3.734375, "policy_entropy/median": 0.000110626220703125, "policy_entropy/min": 1.4299672557172016e-13, "policy_entropy/p25": 4.26173210144043e-06, "policy_entropy/p75": 0.044189453125, "policy_entropy/var": 0.12720026075839996, "policy_loss": -0.6041666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24166667461395264, "policy_sharpness": 7.519021034240723, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 4.011474609375, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 12.989287376403809, "reward": 0.6041666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24166667461395264, "rewards/accuracy_reward": 0.6041666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24166667461395264, "sentence_fisher_curvature": 604850.6875, "sentence_fisher_curvature/max": 1720320.0, "sentence_fisher_curvature/median": 602112.0, "sentence_fisher_curvature/min": 45568.0, "sentence_fisher_curvature/p25": 138240.0, "sentence_fisher_curvature/p75": 968704.0, "sentence_fisher_curvature/p85": 1060864.0, "sentence_fisher_curvature/p90": 1187840.0, "sentence_fisher_curvature/p95": 1255424.0, "sentence_fisher_curvature/p99": 1634713.875, "sentence_fisher_curvature/var": 188721201152.0, "sentence_fisher_kl_divergence": 6.12426083534956e-06, "sentence_fisher_kl_divergence/max": 1.7404556274414062e-05, "sentence_fisher_kl_divergence/median": 6.109476089477539e-06, "sentence_fisher_kl_divergence/min": 4.6193599700927734e-07, "sentence_fisher_kl_divergence/p25": 1.4007091522216797e-06, "sentence_fisher_kl_divergence/p75": 9.804964065551758e-06, "sentence_fisher_kl_divergence/p85": 1.074373722076416e-05, "sentence_fisher_kl_divergence/p90": 1.2040138244628906e-05, "sentence_fisher_kl_divergence/p95": 1.271069049835205e-05, "sentence_fisher_kl_divergence/p99": 1.649856858421117e-05, "sentence_fisher_kl_divergence/var": 1.9327921207956678e-11, "sentence_full_gradient_variance/max_squared_error": 13519.375, "sentence_full_gradient_variance/metric": 13519.375, "sentence_full_gradient_variance/p75": 13519.375, "sentence_full_gradient_variance/p90": 13519.375, "sentence_full_gradient_variance/p95": 13519.375, "sentence_full_gradient_variance/p99": 13519.375, "sentence_full_update_term": 0.0319671630859375, "sentence_full_update_term/max": 0.11328125, "sentence_full_update_term/median": 0.031494140625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.05914306640625, "sentence_full_update_term/p85": 0.0679931640625, "sentence_full_update_term/p90": 0.071533203125, "sentence_full_update_term/p95": 0.080322265625, "sentence_full_update_term/p99": 0.08823250234127045, "sentence_full_update_term/var": 0.0009218906634487212, "sentence_hessian_coeff": 185216.671875, "sentence_hessian_coeff/max": 1044480.0, "sentence_hessian_coeff/median": 97792.0, "sentence_hessian_coeff/min": -89600.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 329216.0, "sentence_hessian_coeff/p99": 713729.0625, "sentence_hessian_coeff/var": 48413585408.0, "sentence_hessian_coeff_abs": 187215.34375, "sentence_hessian_coeff_abs/max": 1044480.0, "sentence_hessian_coeff_abs/median": 97792.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 329216.0, "sentence_hessian_coeff_abs/p99": 713729.0625, "sentence_hessian_coeff_abs/var": 47661383680.0, "step": 4, "token_fisher_curvature": 767528.0, "token_fisher_curvature/max": 95944704.0, "token_fisher_curvature/median": 6.556510925292969e-06, "token_fisher_curvature/min": 9.781875224740546e-28, "token_fisher_curvature/p25": 2.1827872842550278e-09, "token_fisher_curvature/p75": 198.0, "token_fisher_curvature/p85": 54784.0, "token_fisher_curvature/p90": 436224.0, "token_fisher_curvature/p95": 3063808.0, "token_fisher_curvature/p99": 21233664.0, "token_fisher_curvature/var": 15770028343296.0, "token_fisher_kl_divergence": 7.772055141685996e-06, "token_fisher_kl_divergence/max": 0.000972747802734375, "token_fisher_kl_divergence/median": 6.635317295611287e-17, "token_fisher_kl_divergence/min": 9.918233585063051e-39, "token_fisher_kl_divergence/p25": 2.2128735747018596e-20, "token_fisher_kl_divergence/p75": 2.0081643015146255e-09, "token_fisher_kl_divergence/p85": 5.550682544708252e-07, "token_fisher_kl_divergence/p90": 4.410743713378906e-06, "token_fisher_kl_divergence/p95": 3.0994415283203125e-05, "token_fisher_kl_divergence/p99": 0.00021457672119140625, "token_fisher_kl_divergence/var": 1.6171398664610592e-09, "token_full_update_term": 0.0009372858330607414, "token_full_update_term/max": 0.044921875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.0505318641662598e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.0419171303510666e-08, "token_full_update_term/p85": 9.894371032714844e-06, "token_full_update_term/p90": 0.00077056884765625, "token_full_update_term/p95": 0.006866455078125, "token_full_update_term/p99": 0.0208740234375, "token_full_update_term/var": 1.3735356333199888e-05, "token_hessian_coeff": 188844.84375, "token_hessian_coeff/max": 95420416.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -12320768.0, "token_hessian_coeff/p25": -1.484375, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 15335424.0, "token_hessian_coeff/var": 10993865326592.0, "token_hessian_coeff_abs": 606598.625, "token_hessian_coeff_abs/max": 95420416.0, "token_hessian_coeff_abs/median": 0.00122833251953125, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 6.78125, "token_hessian_coeff_abs/p99": 15335424.0, "token_hessian_coeff_abs/var": 10661559009280.0 }, { "accuracy_reward": 0.6041666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24166667461395264, "adam_stats/lm_head/lr_effective_max": 4.3027142965001985e-05, "adam_stats/lm_head/lr_effective_mean": -7.580794325612317e-11, "adam_stats/lm_head/lr_effective_min": -4.326616908656433e-05, "adam_stats/lm_head/lr_effective_std": 1.5547150269412668e-06, "adam_stats/lr_effective_max": 4.375269782030955e-05, "adam_stats/lr_effective_mean": -3.521727354183213e-10, "adam_stats/lr_effective_min": -4.3753829231718555e-05, "adam_stats/m_t_max": 0.008605916984379292, "adam_stats/m_t_mean": 4.7043261647283785e-11, "adam_stats/m_t_min": -0.008358820341527462, "adam_stats/v_t_max": 1.5819679902051575e-05, "adam_stats/v_t_mean": 1.398914512874827e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6041666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24166667461395264, "all_logprobs": -0.14800159633159637, "all_logprobs/max": 0.0, "all_logprobs/median": -8.58306884765625e-06, "all_logprobs/min": -11.5, "all_logprobs/p1": -2.5625, "all_logprobs/p10": -0.390625, "all_logprobs/p25": -0.01416015625, "all_logprobs/p5": -0.921875, "all_logprobs/p75": -1.1920928955078125e-07, "all_logprobs/var": 0.24746602773666382, "clip_ratio": 0.0, "completion_length": 644.9896240234375, "completion_length/correct": 587.6896362304688, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 563.0, "completion_length/correct/min": 206.0, "completion_length/correct/p25": 405.0, "completion_length/correct/p75": 731.5, "completion_length/correct/var": 52895.30859375, "completion_length/incorrect": 732.4473876953125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 947.0, "completion_length/incorrect/min": 10.0, "completion_length/incorrect/p25": 466.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 125947.3359375, "completion_length/max": 1024.0, "completion_length/median": 596.0, "completion_length/min": 10.0, "completion_length/p25": 411.0, "completion_length/p75": 996.25, "completion_length/var": 85854.4296875, "curvature_clip_ratio_token_fisher": 0.09539882838726044, "curvature_clip_ratio_token_hessian": 0.048757247626781464, "curvature_clip_ratio_total_fisher": 0.09539882838726044, "curvature_clip_ratio_total_full": 0.09539882838726044, "curvature_clip_ratio_total_hessian": 0.048757247626781464, "epoch": 0.008, "feature_vector_variance/max_squared_error": 92143.765625, "feature_vector_variance/metric": 24862.53125, "generated_tokens/total": 304092.0, "global_fisher_curvature": 240640.0, "global_fisher_curvature/max": 240640.0, "global_fisher_curvature/median": 240640.0, "global_fisher_curvature/min": 240640.0, "global_fisher_curvature/p25": 240640.0, "global_fisher_curvature/p75": 240640.0, "global_fisher_curvature/p85": 240640.0, "global_fisher_curvature/p90": 240640.0, "global_fisher_curvature/p95": 240640.0, "global_fisher_curvature/p99": 240640.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.32133674621582e-06, "global_fisher_kl_divergence/max": 4.32133674621582e-06, "global_fisher_kl_divergence/median": 4.32133674621582e-06, "global_fisher_kl_divergence/min": 4.32133674621582e-06, "global_fisher_kl_divergence/p25": 4.32133674621582e-06, "global_fisher_kl_divergence/p75": 4.32133674621582e-06, "global_fisher_kl_divergence/p85": 4.32133674621582e-06, "global_fisher_kl_divergence/p90": 4.32133674621582e-06, "global_fisher_kl_divergence/p95": 4.32133674621582e-06, "global_fisher_kl_divergence/p99": 4.32133674621582e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.060791015625, "global_full_update_term/max": 0.060791015625, "global_full_update_term/median": 0.060791015625, "global_full_update_term/min": 0.060791015625, "global_full_update_term/p25": 0.060791015625, "global_full_update_term/p75": 0.060791015625, "global_full_update_term/p85": 0.060791015625, "global_full_update_term/p90": 0.060791015625, "global_full_update_term/p95": 0.060791015625, "global_full_update_term/p99": 0.060791015625, "global_full_update_term/var": NaN, "global_hessian_coeff": 30080.0, "global_hessian_coeff/max": 30080.0, "global_hessian_coeff/median": 30080.0, "global_hessian_coeff/min": 30080.0, "global_hessian_coeff/p25": 30080.0, "global_hessian_coeff/p75": 30080.0, "global_hessian_coeff/p99": 30080.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 30080.0, "global_hessian_coeff_abs/max": 30080.0, "global_hessian_coeff_abs/median": 30080.0, "global_hessian_coeff_abs/min": 30080.0, "global_hessian_coeff_abs/p25": 30080.0, "global_hessian_coeff_abs/p75": 30080.0, "global_hessian_coeff_abs/p99": 30080.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.34652185440063477, "learning_rate": 7.5e-06, "loss": -0.6042, "masked_global_fisher_curvature": 16512.0, "masked_global_fisher_curvature/max": 16512.0, "masked_global_fisher_curvature/median": 16512.0, "masked_global_fisher_curvature/min": 16512.0, "masked_global_fisher_curvature/p25": 16512.0, "masked_global_fisher_curvature/p75": 16512.0, "masked_global_fisher_curvature/p85": 16512.0, "masked_global_fisher_curvature/p90": 16512.0, "masked_global_fisher_curvature/p95": 16512.0, "masked_global_fisher_curvature/p99": 16512.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/max": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/median": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/min": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/p25": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/p75": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/p85": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/p90": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/p95": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/p99": 2.980232238769531e-07, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.006011962890625, "masked_global_full_update_term/max": 0.006011962890625, "masked_global_full_update_term/median": 0.006011962890625, "masked_global_full_update_term/min": 0.006011962890625, "masked_global_full_update_term/p25": 0.006011962890625, "masked_global_full_update_term/p75": 0.006011962890625, "masked_global_full_update_term/p85": 0.006011962890625, "masked_global_full_update_term/p90": 0.006011962890625, "masked_global_full_update_term/p95": 0.006011962890625, "masked_global_full_update_term/p99": 0.006011962890625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -17408.0, "masked_global_hessian_coeff/max": -17408.0, "masked_global_hessian_coeff/median": -17408.0, "masked_global_hessian_coeff/min": -17408.0, "masked_global_hessian_coeff/p25": -17408.0, "masked_global_hessian_coeff/p75": -17408.0, "masked_global_hessian_coeff/p99": -17408.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 17408.0, "masked_global_hessian_coeff_abs/max": 17408.0, "masked_global_hessian_coeff_abs/median": 17408.0, "masked_global_hessian_coeff_abs/min": 17408.0, "masked_global_hessian_coeff_abs/p25": 17408.0, "masked_global_hessian_coeff_abs/p75": 17408.0, "masked_global_hessian_coeff_abs/p99": 17408.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 8.682291984558105, "masked_per_sentence_gradient_norm/max": 29.0, "masked_per_sentence_gradient_norm/median": 8.9375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 14.328125, "masked_per_sentence_gradient_norm/var": 71.96261596679688, "masked_per_token_gradient_norm": 0.7592794895172119, "masked_per_token_gradient_norm/max": 46.5, "masked_per_token_gradient_norm/median": 2.514570951461792e-07, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 0.0001659393310546875, "masked_per_token_gradient_norm/var": 12.953351974487305, "masked_sentence_fisher_curvature": 16606.0, "masked_sentence_fisher_curvature/max": 101376.0, "masked_sentence_fisher_curvature/median": 11520.0, "masked_sentence_fisher_curvature/min": 4704.0, "masked_sentence_fisher_curvature/p25": 8736.0, "masked_sentence_fisher_curvature/p75": 16704.0, "masked_sentence_fisher_curvature/p85": 27200.0, "masked_sentence_fisher_curvature/p90": 34688.0, "masked_sentence_fisher_curvature/p95": 38656.0, "masked_sentence_fisher_curvature/p99": 100403.203125, "masked_sentence_fisher_curvature/var": 241698672.0, "masked_sentence_fisher_kl_divergence": 2.9890119890296774e-07, "masked_sentence_fisher_kl_divergence/max": 1.8253922462463379e-06, "masked_sentence_fisher_kl_divergence/median": 2.076849341392517e-07, "masked_sentence_fisher_kl_divergence/min": 8.475035429000854e-08, "masked_sentence_fisher_kl_divergence/p25": 1.5692785382270813e-07, "masked_sentence_fisher_kl_divergence/p75": 3.0081719160079956e-07, "masked_sentence_fisher_kl_divergence/p85": 4.898756742477417e-07, "masked_sentence_fisher_kl_divergence/p90": 6.239861249923706e-07, "masked_sentence_fisher_kl_divergence/p95": 6.966292858123779e-07, "masked_sentence_fisher_kl_divergence/p99": 1.8041581597572076e-06, "masked_sentence_fisher_kl_divergence/var": 7.82111327741683e-14, "masked_sentence_full_gradient_variance/max_squared_error": 142.75144958496094, "masked_sentence_full_gradient_variance/metric": 142.75144958496094, "masked_sentence_full_gradient_variance/p75": 142.75144958496094, "masked_sentence_full_gradient_variance/p90": 142.75144958496094, "masked_sentence_full_gradient_variance/p95": 142.75144958496094, "masked_sentence_full_gradient_variance/p99": 142.75144958496094, "masked_sentence_full_update_term": 0.0038390159606933594, "masked_sentence_full_update_term/max": 0.0115966796875, "masked_sentence_full_update_term/median": 0.0042724609375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00635528564453125, "masked_sentence_full_update_term/p85": 0.0073394775390625, "masked_sentence_full_update_term/p90": 0.008880615234375, "masked_sentence_full_update_term/p95": 0.0102691650390625, "masked_sentence_full_update_term/p99": 0.0115966796875, "masked_sentence_full_update_term/var": 1.320671526627848e-05, "masked_sentence_hessian_coeff": -61320.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -76800.0, "masked_sentence_hessian_coeff/min": -257024.0, "masked_sentence_hessian_coeff/p25": -103936.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 3132712960.0, "masked_sentence_hessian_coeff_abs": 61320.0, "masked_sentence_hessian_coeff_abs/max": 257024.0, "masked_sentence_hessian_coeff_abs/median": 75264.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 103936.0, "masked_sentence_hessian_coeff_abs/p99": 180173.046875, "masked_sentence_hessian_coeff_abs/var": 3132712960.0, "masked_token_fisher_curvature": 16929.103515625, "masked_token_fisher_curvature/max": 552960.0, "masked_token_fisher_curvature/median": 7.040798664093018e-07, "masked_token_fisher_curvature/min": 6.113672015462841e-29, "masked_token_fisher_curvature/p25": 4.911271389573812e-10, "masked_token_fisher_curvature/p75": 1.765625, "masked_token_fisher_curvature/p85": 1808.0, "masked_token_fisher_curvature/p90": 16896.0, "masked_token_fisher_curvature/p95": 111616.0, "masked_token_fisher_curvature/p99": 415072.0, "masked_token_fisher_curvature/var": 4589030400.0, "masked_token_fisher_kl_divergence": 3.0483224122690444e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.2685165418080402e-17, "masked_token_fisher_kl_divergence/min": 1.1020259538958945e-39, "masked_token_fisher_kl_divergence/p25": 8.84090638696676e-21, "masked_token_fisher_kl_divergence/p75": 3.183231456205249e-11, "masked_token_fisher_kl_divergence/p85": 3.259629011154175e-08, "masked_token_fisher_kl_divergence/p90": 3.03611159324646e-07, "masked_token_fisher_kl_divergence/p95": 2.0116567611694336e-06, "masked_token_fisher_kl_divergence/p99": 7.467344403266907e-06, "masked_token_fisher_kl_divergence/var": 1.4881553021123262e-12, "masked_token_full_update_term": 0.0001078791101463139, "masked_token_full_update_term/max": 0.004425048828125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.291534423828125e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.1350493878126144e-09, "masked_token_full_update_term/p85": 1.5925616025924683e-07, "masked_token_full_update_term/p90": 9.834766387939453e-06, "masked_token_full_update_term/p95": 0.0005340576171875, "masked_token_full_update_term/p99": 0.003141641616821289, "masked_token_full_update_term/var": 2.50071337859481e-07, "masked_token_hessian_coeff": -81979.3828125, "masked_token_hessian_coeff/max": 5792.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -5177344.0, "masked_token_hessian_coeff/p25": -0.498046875, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 1.4609375, "masked_token_hessian_coeff/var": 139442896896.0, "masked_token_hessian_coeff_abs": 81980.8046875, "masked_token_hessian_coeff_abs/max": 5177344.0, "masked_token_hessian_coeff_abs/median": 0.0004062652587890625, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.7890625, "masked_token_hessian_coeff_abs/p99": 2179072.0, "masked_token_hessian_coeff_abs/var": 139442667520.0, "mean_logprobs": -0.16796875, "mean_logprobs/var": 0.0235595703125, "num_completions/total": 480, "per_sentence_gradient_norm": 76.83854675292969, "per_sentence_gradient_norm/max": 308.0, "per_sentence_gradient_norm/median": 81.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 122.75, "per_sentence_gradient_norm/var": 5912.73291015625, "per_token_feature_norm": 160.04248046875, "per_token_feature_norm/max": 328.0, "per_token_feature_norm/median": 150.0, "per_token_feature_norm/min": 62.25, "per_token_feature_norm/p25": 123.0, "per_token_feature_norm/p75": 190.0, "per_token_feature_norm/var": 2228.767578125, "per_token_gradient_norm": 9.033638954162598, "per_token_gradient_norm/max": 408.0, "per_token_gradient_norm/median": 1.0505318641662598e-06, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 0.00090789794921875, "per_token_gradient_norm/var": 1230.943115234375, "per_token_policy_error_norm": 0.07718577235937119, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0647139623761177, "policy_entropy": 0.1640373319387436, "policy_entropy/max": 3.71875, "policy_entropy/median": 0.00011491775512695312, "policy_entropy/min": 3.7969627442180354e-14, "policy_entropy/p25": 3.2782554626464844e-06, "policy_entropy/p75": 0.080078125, "policy_entropy/var": 0.13270173966884613, "policy_loss": -0.6041666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24166667461395264, "policy_sharpness": 7.298753261566162, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 3.31951904296875, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 13.797625541687012, "reward": 0.6041666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24166667461395264, "rewards/accuracy_reward": 0.6041666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24166667461395264, "sentence_fisher_curvature": 544840.6875, "sentence_fisher_curvature/max": 2621440.0, "sentence_fisher_curvature/median": 581632.0, "sentence_fisher_curvature/min": 11648.0, "sentence_fisher_curvature/p25": 130048.0, "sentence_fisher_curvature/p75": 828416.0, "sentence_fisher_curvature/p85": 958464.0, "sentence_fisher_curvature/p90": 989184.0, "sentence_fisher_curvature/p95": 1183744.0, "sentence_fisher_curvature/p99": 1306218.625, "sentence_fisher_curvature/var": 183257284608.0, "sentence_fisher_kl_divergence": 9.808408321987372e-06, "sentence_fisher_kl_divergence/max": 4.7206878662109375e-05, "sentence_fisher_kl_divergence/median": 1.049041748046875e-05, "sentence_fisher_kl_divergence/min": 2.0954757928848267e-07, "sentence_fisher_kl_divergence/p25": 2.346932888031006e-06, "sentence_fisher_kl_divergence/p75": 1.4916062355041504e-05, "sentence_fisher_kl_divergence/p85": 1.722574234008789e-05, "sentence_fisher_kl_divergence/p90": 1.7821788787841797e-05, "sentence_fisher_kl_divergence/p95": 2.130866050720215e-05, "sentence_fisher_kl_divergence/p99": 2.3537950255558826e-05, "sentence_fisher_kl_divergence/var": 5.940806874216165e-11, "sentence_full_gradient_variance/max_squared_error": 11540.3037109375, "sentence_full_gradient_variance/metric": 11540.3037109375, "sentence_full_gradient_variance/p75": 11540.3037109375, "sentence_full_gradient_variance/p90": 11540.3037109375, "sentence_full_gradient_variance/p95": 11540.3037109375, "sentence_full_gradient_variance/p99": 11540.3037109375, "sentence_full_update_term": 0.03913625329732895, "sentence_full_update_term/max": 0.1279296875, "sentence_full_update_term/median": 0.045654296875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0670166015625, "sentence_full_update_term/p85": 0.076904296875, "sentence_full_update_term/p90": 0.083251953125, "sentence_full_update_term/p95": 0.089599609375, "sentence_full_update_term/p99": 0.11679691076278687, "sentence_full_update_term/var": 0.0012705640401691198, "sentence_hessian_coeff": 157914.0, "sentence_hessian_coeff/max": 1187840.0, "sentence_hessian_coeff/median": 89088.0, "sentence_hessian_coeff/min": -86528.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 256768.0, "sentence_hessian_coeff/p99": 685876.8125, "sentence_hessian_coeff/var": 42496606208.0, "sentence_hessian_coeff_abs": 161627.34375, "sentence_hessian_coeff_abs/max": 1187840.0, "sentence_hessian_coeff_abs/median": 89088.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 256768.0, "sentence_hessian_coeff_abs/p99": 685876.8125, "sentence_hessian_coeff_abs/var": 41297551360.0, "step": 5, "token_fisher_curvature": 728024.0625, "token_fisher_curvature/max": 80216064.0, "token_fisher_curvature/median": 5.513429641723633e-06, "token_fisher_curvature/min": 6.113672015462841e-29, "token_fisher_curvature/p25": 1.0040821507573128e-09, "token_fisher_curvature/p75": 586.0, "token_fisher_curvature/p85": 78336.0, "token_fisher_curvature/p90": 473088.0, "token_fisher_curvature/p95": 3047424.0, "token_fisher_curvature/p99": 19791872.0, "token_fisher_curvature/var": 13278067556352.0, "token_fisher_kl_divergence": 1.3107679478707723e-05, "token_fisher_kl_divergence/max": 0.00144195556640625, "token_fisher_kl_divergence/median": 9.93129189996722e-17, "token_fisher_kl_divergence/min": 1.1020259538958945e-39, "token_fisher_kl_divergence/p25": 1.810532924756067e-20, "token_fisher_kl_divergence/p75": 1.0564690455794334e-08, "token_fisher_kl_divergence/p85": 1.4081597328186035e-06, "token_fisher_kl_divergence/p90": 8.52346420288086e-06, "token_fisher_kl_divergence/p95": 5.4836273193359375e-05, "token_fisher_kl_divergence/p99": 0.0003566741943359375, "token_fisher_kl_divergence/var": 4.304668621557539e-09, "token_full_update_term": 0.0012714894255623221, "token_full_update_term/max": 0.054931640625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.291534423828125e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.100124791264534e-08, "token_full_update_term/p85": 9.312666952610016e-05, "token_full_update_term/p90": 0.00153350830078125, "token_full_update_term/p95": 0.00970458984375, "token_full_update_term/p99": 0.027099609375, "token_full_update_term/var": 2.3269185476237908e-05, "token_hessian_coeff": 171610.484375, "token_hessian_coeff/max": 78118912.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -10354688.0, "token_hessian_coeff/p25": -1.0703125, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 14733824.0, "token_hessian_coeff/var": 9346950889472.0, "token_hessian_coeff_abs": 587226.1875, "token_hessian_coeff_abs/max": 78118912.0, "token_hessian_coeff_abs/median": 0.00188446044921875, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 5.4375, "token_hessian_coeff_abs/p99": 14733824.0, "token_hessian_coeff_abs/var": 9031561248768.0 }, { "accuracy_reward": 0.625, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2368421107530594, "adam_stats/lm_head/lr_effective_max": 5.434885679278523e-05, "adam_stats/lm_head/lr_effective_mean": -7.398642809519629e-11, "adam_stats/lm_head/lr_effective_min": -5.429330121842213e-05, "adam_stats/lm_head/lr_effective_std": 1.803916234166536e-06, "adam_stats/lr_effective_max": 5.507927562575787e-05, "adam_stats/lr_effective_mean": -3.581885343884039e-10, "adam_stats/lr_effective_min": -5.512959978659637e-05, "adam_stats/m_t_max": 0.009059252217411995, "adam_stats/m_t_mean": 4.110942836144105e-11, "adam_stats/m_t_min": -0.006717273965477943, "adam_stats/v_t_max": 1.6047997632995248e-05, "adam_stats/v_t_mean": 1.481897395592957e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.625, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2368421107530594, "all_logprobs": -0.13870376348495483, "all_logprobs/max": 0.0, "all_logprobs/median": -4.291534423828125e-06, "all_logprobs/min": -11.625, "all_logprobs/p1": -2.625, "all_logprobs/p10": -0.3203125, "all_logprobs/p25": -0.00701904296875, "all_logprobs/p5": -0.85546875, "all_logprobs/p75": -1.1920928955078125e-07, "all_logprobs/var": 0.2491973638534546, "clip_ratio": 0.0, "completion_length": 650.28125, "completion_length/correct": 570.6500244140625, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 530.0, "completion_length/correct/min": 145.0, "completion_length/correct/p25": 348.5, "completion_length/correct/p75": 778.25, "completion_length/correct/var": 71363.3203125, "completion_length/incorrect": 783.0, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 56.0, "completion_length/incorrect/p25": 611.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 112566.859375, "completion_length/max": 1024.0, "completion_length/median": 648.0, "completion_length/min": 56.0, "completion_length/p25": 362.0, "completion_length/p75": 1024.0, "completion_length/var": 96472.1875, "curvature_clip_ratio_token_fisher": 0.093645378947258, "curvature_clip_ratio_token_hessian": 0.04829641059041023, "curvature_clip_ratio_total_fisher": 0.093645378947258, "curvature_clip_ratio_total_full": 0.093645378947258, "curvature_clip_ratio_total_hessian": 0.04829641059041023, "epoch": 0.0096, "feature_vector_variance/max_squared_error": 93370.921875, "feature_vector_variance/metric": 24965.291015625, "generated_tokens/total": 366519.0, "global_fisher_curvature": 185344.0, "global_fisher_curvature/max": 185344.0, "global_fisher_curvature/median": 185344.0, "global_fisher_curvature/min": 185344.0, "global_fisher_curvature/p25": 185344.0, "global_fisher_curvature/p75": 185344.0, "global_fisher_curvature/p85": 185344.0, "global_fisher_curvature/p90": 185344.0, "global_fisher_curvature/p95": 185344.0, "global_fisher_curvature/p99": 185344.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.21540641784668e-06, "global_fisher_kl_divergence/max": 5.21540641784668e-06, "global_fisher_kl_divergence/median": 5.21540641784668e-06, "global_fisher_kl_divergence/min": 5.21540641784668e-06, "global_fisher_kl_divergence/p25": 5.21540641784668e-06, "global_fisher_kl_divergence/p75": 5.21540641784668e-06, "global_fisher_kl_divergence/p85": 5.21540641784668e-06, "global_fisher_kl_divergence/p90": 5.21540641784668e-06, "global_fisher_kl_divergence/p95": 5.21540641784668e-06, "global_fisher_kl_divergence/p99": 5.21540641784668e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.1025390625, "global_full_update_term/max": 0.1025390625, "global_full_update_term/median": 0.1025390625, "global_full_update_term/min": 0.1025390625, "global_full_update_term/p25": 0.1025390625, "global_full_update_term/p75": 0.1025390625, "global_full_update_term/p85": 0.1025390625, "global_full_update_term/p90": 0.1025390625, "global_full_update_term/p95": 0.1025390625, "global_full_update_term/p99": 0.1025390625, "global_full_update_term/var": NaN, "global_hessian_coeff": 29568.0, "global_hessian_coeff/max": 29568.0, "global_hessian_coeff/median": 29568.0, "global_hessian_coeff/min": 29568.0, "global_hessian_coeff/p25": 29568.0, "global_hessian_coeff/p75": 29568.0, "global_hessian_coeff/p99": 29568.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 29568.0, "global_hessian_coeff_abs/max": 29568.0, "global_hessian_coeff_abs/median": 29568.0, "global_hessian_coeff_abs/min": 29568.0, "global_hessian_coeff_abs/p25": 29568.0, "global_hessian_coeff_abs/p75": 29568.0, "global_hessian_coeff_abs/p99": 29568.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.41212695837020874, "learning_rate": 9e-06, "loss": -0.625, "masked_global_fisher_curvature": 6272.0, "masked_global_fisher_curvature/max": 6272.0, "masked_global_fisher_curvature/median": 6272.0, "masked_global_fisher_curvature/min": 6272.0, "masked_global_fisher_curvature/p25": 6272.0, "masked_global_fisher_curvature/p75": 6272.0, "masked_global_fisher_curvature/p85": 6272.0, "masked_global_fisher_curvature/p90": 6272.0, "masked_global_fisher_curvature/p95": 6272.0, "masked_global_fisher_curvature/p99": 6272.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/max": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/median": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/min": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/p25": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/p75": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/p85": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/p90": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/p95": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/p99": 1.7601996660232544e-07, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0042724609375, "masked_global_full_update_term/max": 0.0042724609375, "masked_global_full_update_term/median": 0.0042724609375, "masked_global_full_update_term/min": 0.0042724609375, "masked_global_full_update_term/p25": 0.0042724609375, "masked_global_full_update_term/p75": 0.0042724609375, "masked_global_full_update_term/p85": 0.0042724609375, "masked_global_full_update_term/p90": 0.0042724609375, "masked_global_full_update_term/p95": 0.0042724609375, "masked_global_full_update_term/p99": 0.0042724609375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -13120.0, "masked_global_hessian_coeff/max": -13120.0, "masked_global_hessian_coeff/median": -13120.0, "masked_global_hessian_coeff/min": -13120.0, "masked_global_hessian_coeff/p25": -13120.0, "masked_global_hessian_coeff/p75": -13120.0, "masked_global_hessian_coeff/p99": -13120.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 13120.0, "masked_global_hessian_coeff_abs/max": 13120.0, "masked_global_hessian_coeff_abs/median": 13120.0, "masked_global_hessian_coeff_abs/min": 13120.0, "masked_global_hessian_coeff_abs/p25": 13120.0, "masked_global_hessian_coeff_abs/p75": 13120.0, "masked_global_hessian_coeff_abs/p99": 13120.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 6.7530927658081055, "masked_per_sentence_gradient_norm/max": 28.75, "masked_per_sentence_gradient_norm/median": 6.4375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 10.4375, "masked_per_sentence_gradient_norm/var": 44.170780181884766, "masked_per_token_gradient_norm": 0.5670995116233826, "masked_per_token_gradient_norm/max": 42.25, "masked_per_token_gradient_norm/median": 8.288770914077759e-08, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 8.821487426757812e-05, "masked_per_token_gradient_norm/var": 7.849676132202148, "masked_sentence_fisher_curvature": 7756.0732421875, "masked_sentence_fisher_curvature/max": 20992.0, "masked_sentence_fisher_curvature/median": 6912.0, "masked_sentence_fisher_curvature/min": 195.0, "masked_sentence_fisher_curvature/p25": 5072.0, "masked_sentence_fisher_curvature/p75": 9472.0, "masked_sentence_fisher_curvature/p85": 12208.0, "masked_sentence_fisher_curvature/p90": 13120.0, "masked_sentence_fisher_curvature/p95": 14208.0, "masked_sentence_fisher_curvature/p99": 20262.40234375, "masked_sentence_fisher_curvature/var": 15290519.0, "masked_sentence_fisher_kl_divergence": 2.180919835836903e-07, "masked_sentence_fisher_kl_divergence/max": 5.885958671569824e-07, "masked_sentence_fisher_kl_divergence/median": 1.94646418094635e-07, "masked_sentence_fisher_kl_divergence/min": 5.471520125865936e-09, "masked_sentence_fisher_kl_divergence/p25": 1.4295801520347595e-07, "masked_sentence_fisher_kl_divergence/p75": 2.6635825634002686e-07, "masked_sentence_fisher_kl_divergence/p85": 3.427267074584961e-07, "masked_sentence_fisher_kl_divergence/p90": 3.688037395477295e-07, "masked_sentence_fisher_kl_divergence/p95": 4.00003045797348e-07, "masked_sentence_fisher_kl_divergence/p99": 5.709007950827072e-07, "masked_sentence_fisher_kl_divergence/var": 1.208507645626862e-14, "masked_sentence_full_gradient_variance/max_squared_error": 87.57273864746094, "masked_sentence_full_gradient_variance/metric": 87.57273864746094, "masked_sentence_full_gradient_variance/p75": 87.57273864746094, "masked_sentence_full_gradient_variance/p90": 87.57273864746094, "masked_sentence_full_gradient_variance/p95": 87.57273864746094, "masked_sentence_full_gradient_variance/p99": 87.57273864746094, "masked_sentence_full_update_term": 0.003435293911024928, "masked_sentence_full_update_term/max": 0.01104736328125, "masked_sentence_full_update_term/median": 0.003875732421875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00534820556640625, "masked_sentence_full_update_term/p85": 0.00661468505859375, "masked_sentence_full_update_term/p90": 0.00762939453125, "masked_sentence_full_update_term/p95": 0.0098419189453125, "masked_sentence_full_update_term/p99": 0.01081543043255806, "masked_sentence_full_update_term/var": 1.0165311323362403e-05, "masked_sentence_hessian_coeff": -46820.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -59136.0, "masked_sentence_hessian_coeff/min": -145408.0, "masked_sentence_hessian_coeff/p25": -76160.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 1622523904.0, "masked_sentence_hessian_coeff_abs": 46820.0, "masked_sentence_hessian_coeff_abs/max": 145408.0, "masked_sentence_hessian_coeff_abs/median": 56320.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 76160.0, "masked_sentence_hessian_coeff_abs/p99": 136652.828125, "masked_sentence_hessian_coeff_abs/var": 1622523904.0, "masked_token_fisher_curvature": 9536.2783203125, "masked_token_fisher_curvature/max": 354304.0, "masked_token_fisher_curvature/median": 1.5273690223693848e-07, "masked_token_fisher_curvature/min": 8.589647551967072e-32, "masked_token_fisher_curvature/p25": 1.127773430198431e-10, "masked_token_fisher_curvature/p75": 0.1435546875, "masked_token_fisher_curvature/p85": 524.0, "masked_token_fisher_curvature/p90": 7136.0, "masked_token_fisher_curvature/p95": 55296.0, "masked_token_fisher_curvature/p99": 245760.0, "masked_token_fisher_curvature/var": 1636290048.0, "masked_token_fisher_kl_divergence": 2.6812466558112646e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 4.2825985813177425e-18, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.1763735522036263e-21, "masked_token_fisher_kl_divergence/p75": 4.035882739117369e-12, "masked_token_fisher_kl_divergence/p85": 1.4726538211107254e-08, "masked_token_fisher_kl_divergence/p90": 2.0023435354232788e-07, "masked_token_fisher_kl_divergence/p95": 1.55717134475708e-06, "masked_token_fisher_kl_divergence/p99": 6.9141387939453125e-06, "masked_token_fisher_kl_divergence/var": 1.2934826820742984e-12, "masked_token_full_update_term": 9.950123057933524e-05, "masked_token_full_update_term/max": 0.00439453125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.4123659133911133e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.493347998708487e-10, "masked_token_full_update_term/p85": 9.406358003616333e-08, "masked_token_full_update_term/p90": 5.930662155151367e-06, "masked_token_full_update_term/p95": 0.0004425048828125, "masked_token_full_update_term/p99": 0.0030364990234375, "masked_token_full_update_term/var": 2.300645434161197e-07, "masked_token_hessian_coeff": -61570.26171875, "masked_token_hessian_coeff/max": 8448.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -4325376.0, "masked_token_hessian_coeff/p25": -0.1982421875, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.946868896484375, "masked_token_hessian_coeff/var": 86529359872.0, "masked_token_hessian_coeff_abs": 61571.4296875, "masked_token_hessian_coeff_abs/max": 4325376.0, "masked_token_hessian_coeff_abs/median": 0.00011873245239257812, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.341796875, "masked_token_hessian_coeff_abs/p99": 1744896.0, "masked_token_hessian_coeff_abs/var": 86529212416.0, "mean_logprobs": -0.13671875, "mean_logprobs/var": 0.00531005859375, "num_completions/total": 576, "per_sentence_gradient_norm": 65.25260925292969, "per_sentence_gradient_norm/max": 260.0, "per_sentence_gradient_norm/median": 70.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 106.0, "per_sentence_gradient_norm/var": 3646.012939453125, "per_token_feature_norm": 158.7433624267578, "per_token_feature_norm/max": 314.0, "per_token_feature_norm/median": 152.0, "per_token_feature_norm/min": 65.0, "per_token_feature_norm/p25": 126.0, "per_token_feature_norm/p75": 187.0, "per_token_feature_norm/var": 1792.5269775390625, "per_token_gradient_norm": 7.7942938804626465, "per_token_gradient_norm/max": 402.0, "per_token_gradient_norm/median": 3.4458935260772705e-07, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 0.0004482269287109375, "per_token_gradient_norm/var": 1040.1470947265625, "per_token_policy_error_norm": 0.0710952877998352, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.06107174605131149, "policy_entropy": 0.14950913190841675, "policy_entropy/max": 3.90625, "policy_entropy/median": 6.031990051269531e-05, "policy_entropy/min": 1.682681771697503e-16, "policy_entropy/p25": 1.817941665649414e-06, "policy_entropy/p75": 0.045166015625, "policy_entropy/var": 0.12919816374778748, "policy_loss": -0.625, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2368421107530594, "policy_sharpness": 7.516861915588379, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 3.9375, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 13.066253662109375, "reward": 0.625, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2368421107530594, "rewards/accuracy_reward": 0.625, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2368421107530594, "sentence_fisher_curvature": 489244.0, "sentence_fisher_curvature/max": 3129344.0, "sentence_fisher_curvature/median": 497664.0, "sentence_fisher_curvature/min": 25728.0, "sentence_fisher_curvature/p25": 80640.0, "sentence_fisher_curvature/p75": 701440.0, "sentence_fisher_curvature/p85": 792576.0, "sentence_fisher_curvature/p90": 1032192.0, "sentence_fisher_curvature/p95": 1116160.0, "sentence_fisher_curvature/p99": 1378309.625, "sentence_fisher_curvature/var": 200864284672.0, "sentence_fisher_kl_divergence": 1.375811825710116e-05, "sentence_fisher_kl_divergence/max": 8.821487426757812e-05, "sentence_fisher_kl_divergence/median": 1.4007091522216797e-05, "sentence_fisher_kl_divergence/min": 7.227063179016113e-07, "sentence_fisher_kl_divergence/p25": 2.2724270820617676e-06, "sentence_fisher_kl_divergence/p75": 1.96993350982666e-05, "sentence_fisher_kl_divergence/p85": 2.22623348236084e-05, "sentence_fisher_kl_divergence/p90": 2.9027462005615234e-05, "sentence_fisher_kl_divergence/p95": 3.129243850708008e-05, "sentence_fisher_kl_divergence/p99": 3.883854515152052e-05, "sentence_fisher_kl_divergence/var": 1.5914833340069379e-10, "sentence_full_gradient_variance/max_squared_error": 7735.935546875, "sentence_full_gradient_variance/metric": 7735.935546875, "sentence_full_gradient_variance/p75": 7735.935546875, "sentence_full_gradient_variance/p90": 7735.935546875, "sentence_full_gradient_variance/p95": 7735.935546875, "sentence_full_gradient_variance/p99": 7735.935546875, "sentence_full_update_term": 0.0432383231818676, "sentence_full_update_term/max": 0.1884765625, "sentence_full_update_term/median": 0.048095703125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0703125, "sentence_full_update_term/p85": 0.077392578125, "sentence_full_update_term/p90": 0.085205078125, "sentence_full_update_term/p95": 0.107666015625, "sentence_full_update_term/p99": 0.1504395753145218, "sentence_full_update_term/var": 0.0016270518535748124, "sentence_hessian_coeff": 158767.046875, "sentence_hessian_coeff/max": 2244608.0, "sentence_hessian_coeff/median": 48896.0, "sentence_hessian_coeff/min": -75776.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 231936.0, "sentence_hessian_coeff/p99": 727044.875, "sentence_hessian_coeff/var": 76863528960.0, "sentence_hessian_coeff_abs": 162373.71875, "sentence_hessian_coeff_abs/max": 2244608.0, "sentence_hessian_coeff_abs/median": 65024.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 231936.0, "sentence_hessian_coeff_abs/p99": 727044.875, "sentence_hessian_coeff_abs/var": 75693072384.0, "step": 6, "token_fisher_curvature": 601322.8125, "token_fisher_curvature/max": 78118912.0, "token_fisher_curvature/median": 1.043081283569336e-06, "token_fisher_curvature/min": 8.589647551967072e-32, "token_fisher_curvature/p25": 2.3646862246096134e-10, "token_fisher_curvature/p75": 166.0, "token_fisher_curvature/p85": 34560.0, "token_fisher_curvature/p90": 274432.0, "token_fisher_curvature/p95": 2031616.0, "token_fisher_curvature/p99": 17496064.0, "token_fisher_curvature/var": 10806966091776.0, "token_fisher_kl_divergence": 1.6905965821933933e-05, "token_fisher_kl_divergence/max": 0.002197265625, "token_fisher_kl_divergence/median": 2.927345865710862e-17, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 6.643914680025918e-21, "token_fisher_kl_divergence/p75": 4.6566128730773926e-09, "token_fisher_kl_divergence/p85": 9.685754776000977e-07, "token_fisher_kl_divergence/p90": 7.68899917602539e-06, "token_fisher_kl_divergence/p95": 5.7220458984375e-05, "token_fisher_kl_divergence/p99": 0.0004906207323074341, "token_fisher_kl_divergence/var": 8.541721108201727e-09, "token_full_update_term": 0.0013701432617381215, "token_full_update_term/max": 0.068359375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.4123659133911133e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.558831617236137e-09, "token_full_update_term/p85": 1.633167266845703e-05, "token_full_update_term/p90": 0.001129150390625, "token_full_update_term/p95": 0.0093994140625, "token_full_update_term/p99": 0.03173828125, "token_full_update_term/var": 3.0663515644846484e-05, "token_hessian_coeff": 133886.25, "token_hessian_coeff/max": 76546048.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -9568256.0, "token_hessian_coeff/p25": -0.470703125, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 12320768.0, "token_hessian_coeff/var": 7783619493888.0, "token_hessian_coeff_abs": 504882.40625, "token_hessian_coeff_abs/max": 76546048.0, "token_hessian_coeff_abs/median": 0.0005645751953125, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.34375, "token_hessian_coeff_abs/p99": 12320768.0, "token_hessian_coeff_abs/var": 7546634502144.0 }, { "accuracy_reward": 0.625, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2368421107530594, "adam_stats/lm_head/lr_effective_max": 6.543585914187133e-05, "adam_stats/lm_head/lr_effective_mean": -1.4519134505786013e-10, "adam_stats/lm_head/lr_effective_min": -6.574553844984621e-05, "adam_stats/lm_head/lr_effective_std": 2.0371307982713915e-06, "adam_stats/lr_effective_max": 6.6525382862892e-05, "adam_stats/lr_effective_mean": -2.908279728153218e-10, "adam_stats/lr_effective_min": -6.652583397226408e-05, "adam_stats/m_t_max": 0.01821192353963852, "adam_stats/m_t_mean": -7.151883946177051e-11, "adam_stats/m_t_min": -0.011275473982095718, "adam_stats/v_t_max": 2.6020674340543337e-05, "adam_stats/v_t_mean": 1.6506673415281337e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.625, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2368421107530594, "all_logprobs": -0.08662707358598709, "all_logprobs/max": 0.0, "all_logprobs/median": -2.384185791015625e-07, "all_logprobs/min": -12.5, "all_logprobs/p1": -1.9140625, "all_logprobs/p10": -0.12890625, "all_logprobs/p25": -0.000335693359375, "all_logprobs/p5": -0.4921875, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.14315679669380188, "clip_ratio": 0.0, "completion_length": 713.1666870117188, "completion_length/correct": 583.7000122070312, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 573.0, "completion_length/correct/min": 170.0, "completion_length/correct/p25": 333.0, "completion_length/correct/p75": 820.0, "completion_length/correct/var": 79252.3125, "completion_length/incorrect": 928.9444580078125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 65.0, "completion_length/incorrect/p25": 864.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 38550.16796875, "completion_length/max": 1024.0, "completion_length/median": 739.0, "completion_length/min": 65.0, "completion_length/p25": 432.5, "completion_length/p75": 1024.0, "completion_length/var": 91652.640625, "curvature_clip_ratio_token_fisher": 0.07142439484596252, "curvature_clip_ratio_token_hessian": 0.03933453932404518, "curvature_clip_ratio_total_fisher": 0.07142439484596252, "curvature_clip_ratio_total_full": 0.07142439484596252, "curvature_clip_ratio_total_hessian": 0.03933453932404518, "epoch": 0.0112, "feature_vector_variance/max_squared_error": 116114.3515625, "feature_vector_variance/metric": 25765.302734375, "generated_tokens/total": 434983.0, "global_fisher_curvature": 146432.0, "global_fisher_curvature/max": 146432.0, "global_fisher_curvature/median": 146432.0, "global_fisher_curvature/min": 146432.0, "global_fisher_curvature/p25": 146432.0, "global_fisher_curvature/p75": 146432.0, "global_fisher_curvature/p85": 146432.0, "global_fisher_curvature/p90": 146432.0, "global_fisher_curvature/p95": 146432.0, "global_fisher_curvature/p99": 146432.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.930662155151367e-06, "global_fisher_kl_divergence/max": 5.930662155151367e-06, "global_fisher_kl_divergence/median": 5.930662155151367e-06, "global_fisher_kl_divergence/min": 5.930662155151367e-06, "global_fisher_kl_divergence/p25": 5.930662155151367e-06, "global_fisher_kl_divergence/p75": 5.930662155151367e-06, "global_fisher_kl_divergence/p85": 5.930662155151367e-06, "global_fisher_kl_divergence/p90": 5.930662155151367e-06, "global_fisher_kl_divergence/p95": 5.930662155151367e-06, "global_fisher_kl_divergence/p99": 5.930662155151367e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.11767578125, "global_full_update_term/max": 0.11767578125, "global_full_update_term/median": 0.11767578125, "global_full_update_term/min": 0.11767578125, "global_full_update_term/p25": 0.11767578125, "global_full_update_term/p75": 0.11767578125, "global_full_update_term/p85": 0.11767578125, "global_full_update_term/p90": 0.11767578125, "global_full_update_term/p95": 0.11767578125, "global_full_update_term/p99": 0.11767578125, "global_full_update_term/var": NaN, "global_hessian_coeff": 28032.0, "global_hessian_coeff/max": 28032.0, "global_hessian_coeff/median": 28032.0, "global_hessian_coeff/min": 28032.0, "global_hessian_coeff/p25": 28032.0, "global_hessian_coeff/p75": 28032.0, "global_hessian_coeff/p99": 28032.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 28032.0, "global_hessian_coeff_abs/max": 28032.0, "global_hessian_coeff_abs/median": 28032.0, "global_hessian_coeff_abs/min": 28032.0, "global_hessian_coeff_abs/p25": 28032.0, "global_hessian_coeff_abs/p75": 28032.0, "global_hessian_coeff_abs/p99": 28032.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.5744803547859192, "learning_rate": 1.05e-05, "loss": -0.625, "masked_global_fisher_curvature": 3744.0, "masked_global_fisher_curvature/max": 3744.0, "masked_global_fisher_curvature/median": 3744.0, "masked_global_fisher_curvature/min": 3744.0, "masked_global_fisher_curvature/p25": 3744.0, "masked_global_fisher_curvature/p75": 3744.0, "masked_global_fisher_curvature/p85": 3744.0, "masked_global_fisher_curvature/p90": 3744.0, "masked_global_fisher_curvature/p95": 3744.0, "masked_global_fisher_curvature/p99": 3744.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/max": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/median": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/min": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/p25": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/p75": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/p85": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/p90": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/p95": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/p99": 1.51805579662323e-07, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00665283203125, "masked_global_full_update_term/max": 0.00665283203125, "masked_global_full_update_term/median": 0.00665283203125, "masked_global_full_update_term/min": 0.00665283203125, "masked_global_full_update_term/p25": 0.00665283203125, "masked_global_full_update_term/p75": 0.00665283203125, "masked_global_full_update_term/p85": 0.00665283203125, "masked_global_full_update_term/p90": 0.00665283203125, "masked_global_full_update_term/p95": 0.00665283203125, "masked_global_full_update_term/p99": 0.00665283203125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -7648.0, "masked_global_hessian_coeff/max": -7648.0, "masked_global_hessian_coeff/median": -7648.0, "masked_global_hessian_coeff/min": -7648.0, "masked_global_hessian_coeff/p25": -7648.0, "masked_global_hessian_coeff/p75": -7648.0, "masked_global_hessian_coeff/p99": -7648.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 7648.0, "masked_global_hessian_coeff_abs/max": 7648.0, "masked_global_hessian_coeff_abs/median": 7648.0, "masked_global_hessian_coeff_abs/min": 7648.0, "masked_global_hessian_coeff_abs/p25": 7648.0, "masked_global_hessian_coeff_abs/p75": 7648.0, "masked_global_hessian_coeff_abs/p99": 7648.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 5.238118648529053, "masked_per_sentence_gradient_norm/max": 28.125, "masked_per_sentence_gradient_norm/median": 4.84375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 8.421875, "masked_per_sentence_gradient_norm/var": 28.728900909423828, "masked_per_token_gradient_norm": 0.3270646333694458, "masked_per_token_gradient_norm/max": 29.875, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 9.59634780883789e-06, "masked_per_token_gradient_norm/var": 3.642202377319336, "masked_sentence_fisher_curvature": 4270.6669921875, "masked_sentence_fisher_curvature/max": 46336.0, "masked_sentence_fisher_curvature/median": 3376.0, "masked_sentence_fisher_curvature/min": 816.0, "masked_sentence_fisher_curvature/p25": 2648.0, "masked_sentence_fisher_curvature/p75": 4960.0, "masked_sentence_fisher_curvature/p85": 5568.0, "masked_sentence_fisher_curvature/p90": 6032.0, "masked_sentence_fisher_curvature/p95": 7088.0, "masked_sentence_fisher_curvature/p99": 15388.8994140625, "masked_sentence_fisher_curvature/var": 22434798.0, "masked_sentence_fisher_kl_divergence": 1.7301985621998028e-07, "masked_sentence_fisher_kl_divergence/max": 1.8775463104248047e-06, "masked_sentence_fisher_kl_divergence/median": 1.3690441846847534e-07, "masked_sentence_fisher_kl_divergence/min": 3.306195139884949e-08, "masked_sentence_fisher_kl_divergence/p25": 1.0721851140260696e-07, "masked_sentence_fisher_kl_divergence/p75": 2.0116567611694336e-07, "masked_sentence_fisher_kl_divergence/p85": 2.2584572434425354e-07, "masked_sentence_fisher_kl_divergence/p90": 2.4493783712387085e-07, "masked_sentence_fisher_kl_divergence/p95": 2.868473529815674e-07, "masked_sentence_fisher_kl_divergence/p99": 6.247352075661183e-07, "masked_sentence_fisher_kl_divergence/var": 3.684518442843669e-14, "masked_sentence_full_gradient_variance/max_squared_error": 54.586273193359375, "masked_sentence_full_gradient_variance/metric": 54.586273193359375, "masked_sentence_full_gradient_variance/p75": 54.586273193359375, "masked_sentence_full_gradient_variance/p90": 54.586273193359375, "masked_sentence_full_gradient_variance/p95": 54.586273193359375, "masked_sentence_full_gradient_variance/p99": 54.586273193359375, "masked_sentence_full_update_term": 0.0032181739807128906, "masked_sentence_full_update_term/max": 0.01214599609375, "masked_sentence_full_update_term/median": 0.00274658203125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0052947998046875, "masked_sentence_full_update_term/p85": 0.00677490234375, "masked_sentence_full_update_term/p90": 0.007720947265625, "masked_sentence_full_update_term/p95": 0.0089569091796875, "masked_sentence_full_update_term/p99": 0.010870365425944328, "masked_sentence_full_update_term/var": 1.0083650522574317e-05, "masked_sentence_hessian_coeff": -31289.333984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -38144.0, "masked_sentence_hessian_coeff/min": -177152.0, "masked_sentence_hessian_coeff/p25": -52480.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 861794880.0, "masked_sentence_hessian_coeff_abs": 31289.333984375, "masked_sentence_hessian_coeff_abs/max": 177152.0, "masked_sentence_hessian_coeff_abs/median": 36864.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 52480.0, "masked_sentence_hessian_coeff_abs/p99": 78413.1171875, "masked_sentence_hessian_coeff_abs/var": 861794880.0, "masked_token_fisher_curvature": 4870.51123046875, "masked_token_fisher_curvature/max": 245760.0, "masked_token_fisher_curvature/median": 9.89530235528946e-10, "masked_token_fisher_curvature/min": 4.089454932665725e-27, "masked_token_fisher_curvature/p25": 1.1866063687193673e-12, "masked_token_fisher_curvature/p75": 9.775161743164062e-05, "masked_token_fisher_curvature/p85": 4.9375, "masked_token_fisher_curvature/p90": 652.0, "masked_token_fisher_curvature/p95": 17920.0, "masked_token_fisher_curvature/p99": 155648.0, "masked_token_fisher_curvature/var": 608383680.0, "masked_token_fisher_kl_divergence": 1.9727148981019127e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 4.002230675776569e-20, "masked_token_fisher_kl_divergence/min": 1.6530389308438418e-37, "masked_token_fisher_kl_divergence/p25": 4.7976475528075605e-23, "masked_token_fisher_kl_divergence/p75": 3.969047313034935e-15, "masked_token_fisher_kl_divergence/p85": 2.000888343900442e-10, "masked_token_fisher_kl_divergence/p90": 2.6426278054714203e-08, "masked_token_fisher_kl_divergence/p95": 7.264316082000732e-07, "masked_token_fisher_kl_divergence/p99": 6.318092346191406e-06, "masked_token_fisher_kl_divergence/var": 9.981030758832166e-13, "masked_token_full_update_term": 7.132449536584318e-05, "masked_token_full_update_term/max": 0.00439453125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -5.245208740234375e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.9331204132176936e-11, "masked_token_full_update_term/p85": 2.9249349609017372e-09, "masked_token_full_update_term/p90": 1.8998980522155762e-07, "masked_token_full_update_term/p95": 0.00015038996934890747, "masked_token_full_update_term/p99": 0.002598106861114502, "masked_token_full_update_term/var": 1.678626659895599e-07, "masked_token_hessian_coeff": -39823.296875, "masked_token_hessian_coeff/max": 2464.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -3637248.0, "masked_token_hessian_coeff/p25": -0.01177978515625, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.28763580322265625, "masked_token_hessian_coeff/var": 52294836224.0, "masked_token_hessian_coeff_abs": 39823.8828125, "masked_token_hessian_coeff_abs/max": 3637248.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.023681640625, "masked_token_hessian_coeff_abs/p99": 1359872.0, "masked_token_hessian_coeff_abs/var": 52294782976.0, "mean_logprobs": -0.091796875, "mean_logprobs/var": 0.0125732421875, "num_completions/total": 672, "per_sentence_gradient_norm": 71.7421875, "per_sentence_gradient_norm/max": 260.0, "per_sentence_gradient_norm/median": 64.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 118.375, "per_sentence_gradient_norm/var": 5141.421875, "per_token_feature_norm": 163.52976989746094, "per_token_feature_norm/max": 320.0, "per_token_feature_norm/median": 162.0, "per_token_feature_norm/min": 65.5, "per_token_feature_norm/p25": 133.0, "per_token_feature_norm/p75": 191.0, "per_token_feature_norm/var": 1502.8323974609375, "per_token_gradient_norm": 5.505533218383789, "per_token_gradient_norm/max": 392.0, "per_token_gradient_norm/median": 1.7389538697898388e-09, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.765655517578125e-05, "per_token_gradient_norm/var": 711.0484008789062, "per_token_policy_error_norm": 0.04671410471200943, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.04074104130268097, "policy_entropy": 0.09451044350862503, "policy_entropy/max": 3.796875, "policy_entropy/median": 5.066394805908203e-06, "policy_entropy/min": 4.085620730620576e-14, "policy_entropy/p25": 1.9744038581848145e-07, "policy_entropy/p75": 0.0030364990234375, "policy_entropy/var": 0.0696253776550293, "policy_loss": -0.625, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2368421107530594, "policy_sharpness": 8.235678672790527, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 8.125, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 10.043190956115723, "reward": 0.625, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2368421107530594, "rewards/accuracy_reward": 0.625, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2368421107530594, "sentence_fisher_curvature": 380870.6875, "sentence_fisher_curvature/max": 1761280.0, "sentence_fisher_curvature/median": 413696.0, "sentence_fisher_curvature/min": 9536.0, "sentence_fisher_curvature/p25": 47296.0, "sentence_fisher_curvature/p75": 609280.0, "sentence_fisher_curvature/p85": 692224.0, "sentence_fisher_curvature/p90": 743424.0, "sentence_fisher_curvature/p95": 892928.0, "sentence_fisher_curvature/p99": 1123125.25, "sentence_fisher_curvature/var": 107291082752.0, "sentence_fisher_kl_divergence": 1.5427165635628626e-05, "sentence_fisher_kl_divergence/max": 7.152557373046875e-05, "sentence_fisher_kl_divergence/median": 1.6808509826660156e-05, "sentence_fisher_kl_divergence/min": 3.855675458908081e-07, "sentence_fisher_kl_divergence/p25": 1.914799213409424e-06, "sentence_fisher_kl_divergence/p75": 2.467632293701172e-05, "sentence_fisher_kl_divergence/p85": 2.8014183044433594e-05, "sentence_fisher_kl_divergence/p90": 3.0100345611572266e-05, "sentence_fisher_kl_divergence/p95": 3.62396240234375e-05, "sentence_fisher_kl_divergence/p99": 4.547842763713561e-05, "sentence_fisher_kl_divergence/var": 1.7629901705173978e-10, "sentence_full_gradient_variance/max_squared_error": 10071.806640625, "sentence_full_gradient_variance/metric": 10071.806640625, "sentence_full_gradient_variance/p75": 10071.806640625, "sentence_full_gradient_variance/p90": 10071.806640625, "sentence_full_gradient_variance/p95": 10071.806640625, "sentence_full_gradient_variance/p99": 10071.806640625, "sentence_full_update_term": 0.05313364788889885, "sentence_full_update_term/max": 0.1787109375, "sentence_full_update_term/median": 0.048583984375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.09033203125, "sentence_full_update_term/p85": 0.1131591796875, "sentence_full_update_term/p90": 0.122802734375, "sentence_full_update_term/p95": 0.1484375, "sentence_full_update_term/p99": 0.17778320610523224, "sentence_full_update_term/var": 0.0027160823810845613, "sentence_hessian_coeff": 95900.671875, "sentence_hessian_coeff/max": 876544.0, "sentence_hessian_coeff/median": 20864.0, "sentence_hessian_coeff/min": -147456.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 172544.0, "sentence_hessian_coeff/p99": 526337.125, "sentence_hessian_coeff/var": 24664080384.0, "sentence_hessian_coeff_abs": 111579.3359375, "sentence_hessian_coeff_abs/max": 876544.0, "sentence_hessian_coeff_abs/median": 53760.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 172544.0, "sentence_hessian_coeff_abs/p99": 526337.125, "sentence_hessian_coeff_abs/var": 21376827392.0, "step": 7, "token_fisher_curvature": 437362.46875, "token_fisher_curvature/max": 73400320.0, "token_fisher_curvature/median": 3.2159732654690742e-09, "token_fisher_curvature/min": 4.089454932665725e-27, "token_fisher_curvature/p25": 2.0889956431346945e-12, "token_fisher_curvature/p75": 0.026611328125, "token_fisher_curvature/p85": 2024.0, "token_fisher_curvature/p90": 51456.0, "token_fisher_curvature/p95": 839680.0, "token_fisher_curvature/p99": 14531584.0, "token_fisher_curvature/var": 7715641884672.0, "token_fisher_kl_divergence": 1.77150377567159e-05, "token_fisher_kl_divergence/max": 0.0029754638671875, "token_fisher_kl_divergence/median": 1.3044307387716225e-19, "token_fisher_kl_divergence/min": 1.6530389308438418e-37, "token_fisher_kl_divergence/p25": 8.478601278668534e-23, "token_fisher_kl_divergence/p75": 1.0800249583553523e-12, "token_fisher_kl_divergence/p85": 8.195638656616211e-08, "token_fisher_kl_divergence/p90": 2.086162567138672e-06, "token_fisher_kl_divergence/p95": 3.409385681152344e-05, "token_fisher_kl_divergence/p99": 0.0005888640880584717, "token_fisher_kl_divergence/var": 1.2659406856130317e-08, "token_full_update_term": 0.0012095811543986201, "token_full_update_term/max": 0.07958984375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -5.245208740234375e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.5370460459962487e-10, "token_full_update_term/p85": 1.5459954738616943e-07, "token_full_update_term/p90": 0.0001697540283203125, "token_full_update_term/p95": 0.005859375, "token_full_update_term/p99": 0.03466796875, "token_full_update_term/var": 3.300748721812852e-05, "token_hessian_coeff": 73946.78125, "token_hessian_coeff/max": 72876032.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -9043968.0, "token_hessian_coeff/p25": -0.023193359375, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 8040192.0, "token_hessian_coeff/var": 5254649741312.0, "token_hessian_coeff_abs": 373407.5, "token_hessian_coeff_abs/max": 72876032.0, "token_hessian_coeff_abs/median": 1.4603137969970703e-06, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.08642578125, "token_hessian_coeff_abs/p99": 8257536.0, "token_hessian_coeff_abs/var": 5120682622976.0 }, { "accuracy_reward": 0.7291666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.19956141710281372, "adam_stats/lm_head/lr_effective_max": 7.546547567471862e-05, "adam_stats/lm_head/lr_effective_mean": -1.6367791844107415e-10, "adam_stats/lm_head/lr_effective_min": -7.519872451666743e-05, "adam_stats/lm_head/lr_effective_std": 2.250467787234811e-06, "adam_stats/lr_effective_max": 7.792942051310092e-05, "adam_stats/lr_effective_mean": -2.125425807575354e-10, "adam_stats/lr_effective_min": -7.790880772517994e-05, "adam_stats/m_t_max": 0.018710067495703697, "adam_stats/m_t_mean": -8.639632065321479e-11, "adam_stats/m_t_min": -0.012015602551400661, "adam_stats/v_t_max": 2.653257797646802e-05, "adam_stats/v_t_mean": 1.7189733794373274e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7291666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.19956141710281372, "all_logprobs": -0.06173933669924736, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.8125, "all_logprobs/p1": -1.515625, "all_logprobs/p10": -0.048583984375, "all_logprobs/p25": -3.147125244140625e-05, "all_logprobs/p5": -0.3125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.09609781950712204, "clip_ratio": 0.0, "completion_length": 732.3854370117188, "completion_length/correct": 655.3142700195312, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 693.0, "completion_length/correct/min": 81.0, "completion_length/correct/p25": 369.25, "completion_length/correct/p75": 877.75, "completion_length/correct/var": 76626.859375, "completion_length/incorrect": 939.8846435546875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 47.0, "completion_length/incorrect/p25": 1024.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 50170.18359375, "completion_length/max": 1024.0, "completion_length/median": 784.0, "completion_length/min": 47.0, "completion_length/p25": 432.0, "completion_length/p75": 1024.0, "completion_length/var": 85018.5234375, "curvature_clip_ratio_token_fisher": 0.06632152199745178, "curvature_clip_ratio_token_hessian": 0.042867910116910934, "curvature_clip_ratio_total_fisher": 0.06632152199745178, "curvature_clip_ratio_total_full": 0.06632152199745178, "curvature_clip_ratio_total_hessian": 0.042867910116910934, "epoch": 0.0128, "feature_vector_variance/max_squared_error": 135175.0625, "feature_vector_variance/metric": 26642.2734375, "generated_tokens/total": 505292.0, "global_fisher_curvature": 152576.0, "global_fisher_curvature/max": 152576.0, "global_fisher_curvature/median": 152576.0, "global_fisher_curvature/min": 152576.0, "global_fisher_curvature/p25": 152576.0, "global_fisher_curvature/p75": 152576.0, "global_fisher_curvature/p85": 152576.0, "global_fisher_curvature/p90": 152576.0, "global_fisher_curvature/p95": 152576.0, "global_fisher_curvature/p99": 152576.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 8.404254913330078e-06, "global_fisher_kl_divergence/max": 8.404254913330078e-06, "global_fisher_kl_divergence/median": 8.404254913330078e-06, "global_fisher_kl_divergence/min": 8.404254913330078e-06, "global_fisher_kl_divergence/p25": 8.404254913330078e-06, "global_fisher_kl_divergence/p75": 8.404254913330078e-06, "global_fisher_kl_divergence/p85": 8.404254913330078e-06, "global_fisher_kl_divergence/p90": 8.404254913330078e-06, "global_fisher_kl_divergence/p95": 8.404254913330078e-06, "global_fisher_kl_divergence/p99": 8.404254913330078e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.12353515625, "global_full_update_term/max": 0.12353515625, "global_full_update_term/median": 0.12353515625, "global_full_update_term/min": 0.12353515625, "global_full_update_term/p25": 0.12353515625, "global_full_update_term/p75": 0.12353515625, "global_full_update_term/p85": 0.12353515625, "global_full_update_term/p90": 0.12353515625, "global_full_update_term/p95": 0.12353515625, "global_full_update_term/p99": 0.12353515625, "global_full_update_term/var": NaN, "global_hessian_coeff": 34048.0, "global_hessian_coeff/max": 34048.0, "global_hessian_coeff/median": 34048.0, "global_hessian_coeff/min": 34048.0, "global_hessian_coeff/p25": 34048.0, "global_hessian_coeff/p75": 34048.0, "global_hessian_coeff/p99": 34048.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 34048.0, "global_hessian_coeff_abs/max": 34048.0, "global_hessian_coeff_abs/median": 34048.0, "global_hessian_coeff_abs/min": 34048.0, "global_hessian_coeff_abs/p25": 34048.0, "global_hessian_coeff_abs/p75": 34048.0, "global_hessian_coeff_abs/p99": 34048.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.37228286266326904, "learning_rate": 1.2e-05, "loss": -0.7292, "masked_global_fisher_curvature": 1568.0, "masked_global_fisher_curvature/max": 1568.0, "masked_global_fisher_curvature/median": 1568.0, "masked_global_fisher_curvature/min": 1568.0, "masked_global_fisher_curvature/p25": 1568.0, "masked_global_fisher_curvature/p75": 1568.0, "masked_global_fisher_curvature/p85": 1568.0, "masked_global_fisher_curvature/p90": 1568.0, "masked_global_fisher_curvature/p95": 1568.0, "masked_global_fisher_curvature/p99": 1568.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/max": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/median": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/min": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/p25": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/p75": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/p85": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/p90": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/p95": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/p99": 8.66129994392395e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.005584716796875, "masked_global_full_update_term/max": 0.005584716796875, "masked_global_full_update_term/median": 0.005584716796875, "masked_global_full_update_term/min": 0.005584716796875, "masked_global_full_update_term/p25": 0.005584716796875, "masked_global_full_update_term/p75": 0.005584716796875, "masked_global_full_update_term/p85": 0.005584716796875, "masked_global_full_update_term/p90": 0.005584716796875, "masked_global_full_update_term/p95": 0.005584716796875, "masked_global_full_update_term/p99": 0.005584716796875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -7328.0, "masked_global_hessian_coeff/max": -7328.0, "masked_global_hessian_coeff/median": -7328.0, "masked_global_hessian_coeff/min": -7328.0, "masked_global_hessian_coeff/p25": -7328.0, "masked_global_hessian_coeff/p75": -7328.0, "masked_global_hessian_coeff/p99": -7328.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 7328.0, "masked_global_hessian_coeff_abs/max": 7328.0, "masked_global_hessian_coeff_abs/median": 7328.0, "masked_global_hessian_coeff_abs/min": 7328.0, "masked_global_hessian_coeff_abs/p25": 7328.0, "masked_global_hessian_coeff_abs/p75": 7328.0, "masked_global_hessian_coeff_abs/p99": 7328.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 4.695556640625, "masked_per_sentence_gradient_norm/max": 15.8125, "masked_per_sentence_gradient_norm/median": 4.15625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 7.453125, "masked_per_sentence_gradient_norm/var": 15.98851490020752, "masked_per_token_gradient_norm": 0.2952186167240143, "masked_per_token_gradient_norm/max": 32.5, "masked_per_token_gradient_norm/median": 6.705522537231445e-08, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.519918441772461e-05, "masked_per_token_gradient_norm/var": 2.605372667312622, "masked_sentence_fisher_curvature": 2114.229248046875, "masked_sentence_fisher_curvature/max": 6432.0, "masked_sentence_fisher_curvature/median": 1776.0, "masked_sentence_fisher_curvature/min": 454.0, "masked_sentence_fisher_curvature/p25": 1318.0, "masked_sentence_fisher_curvature/p75": 2772.0, "masked_sentence_fisher_curvature/p85": 3488.0, "masked_sentence_fisher_curvature/p90": 3736.0, "masked_sentence_fisher_curvature/p95": 4448.0, "masked_sentence_fisher_curvature/p99": 5276.8037109375, "masked_sentence_fisher_curvature/var": 1391842.0, "masked_sentence_fisher_kl_divergence": 1.1654750409206827e-07, "masked_sentence_fisher_kl_divergence/max": 3.5390257835388184e-07, "masked_sentence_fisher_kl_divergence/median": 9.778887033462524e-08, "masked_sentence_fisher_kl_divergence/min": 2.5029294192790985e-08, "masked_sentence_fisher_kl_divergence/p25": 7.252674549818039e-08, "masked_sentence_fisher_kl_divergence/p75": 1.5273690223693848e-07, "masked_sentence_fisher_kl_divergence/p85": 1.9208528101444244e-07, "masked_sentence_fisher_kl_divergence/p90": 2.062879502773285e-07, "masked_sentence_fisher_kl_divergence/p95": 2.454034984111786e-07, "masked_sentence_fisher_kl_divergence/p99": 2.90200318886491e-07, "masked_sentence_fisher_kl_divergence/var": 4.223475681599392e-15, "masked_sentence_full_gradient_variance/max_squared_error": 36.96787643432617, "masked_sentence_full_gradient_variance/metric": 36.96787643432617, "masked_sentence_full_gradient_variance/p75": 36.96787643432617, "masked_sentence_full_gradient_variance/p90": 36.96787643432617, "masked_sentence_full_gradient_variance/p95": 36.96787643432617, "masked_sentence_full_gradient_variance/p99": 36.96787643432617, "masked_sentence_full_update_term": 0.003295819042250514, "masked_sentence_full_update_term/max": 0.01043701171875, "masked_sentence_full_update_term/median": 0.0031280517578125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.005279541015625, "masked_sentence_full_update_term/p85": 0.00623321533203125, "masked_sentence_full_update_term/p90": 0.006591796875, "masked_sentence_full_update_term/p95": 0.007659912109375, "masked_sentence_full_update_term/p99": 0.009799196384847164, "masked_sentence_full_update_term/var": 7.1806489359005354e-06, "masked_sentence_hessian_coeff": -28533.333984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -29440.0, "masked_sentence_hessian_coeff/min": -92160.0, "masked_sentence_hessian_coeff/p25": -43456.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 549806656.0, "masked_sentence_hessian_coeff_abs": 28533.333984375, "masked_sentence_hessian_coeff_abs/max": 92160.0, "masked_sentence_hessian_coeff_abs/median": 29184.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 43456.0, "masked_sentence_hessian_coeff_abs/p99": 90214.40625, "masked_sentence_hessian_coeff_abs/var": 549806656.0, "masked_token_fisher_curvature": 2570.09423828125, "masked_token_fisher_curvature/max": 180224.0, "masked_token_fisher_curvature/median": 2.9103830456733704e-11, "masked_token_fisher_curvature/min": 1.0008672734991587e-29, "masked_token_fisher_curvature/p25": 1.8540724511240114e-14, "masked_token_fisher_curvature/p75": 1.4379620552062988e-06, "masked_token_fisher_curvature/p85": 0.03564453125, "masked_token_fisher_curvature/p90": 30.4375, "masked_token_fisher_curvature/p95": 5056.0, "masked_token_fisher_curvature/p99": 90112.0, "masked_token_fisher_curvature/var": 228268928.0, "masked_token_fisher_kl_divergence": 1.4168652739954268e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.6014216659026616e-21, "masked_token_fisher_kl_divergence/min": 5.510129769479473e-40, "masked_token_fisher_kl_divergence/p25": 1.0210510686201435e-24, "masked_token_fisher_kl_divergence/p75": 7.936359902593892e-17, "masked_token_fisher_kl_divergence/p85": 1.9610979506978765e-12, "masked_token_fisher_kl_divergence/p90": 1.6771082300692797e-09, "masked_token_fisher_kl_divergence/p95": 2.7939677238464355e-07, "masked_token_fisher_kl_divergence/p99": 4.976987838745117e-06, "masked_token_fisher_kl_divergence/var": 6.937816206947778e-13, "masked_token_full_update_term": 7.645998994121328e-05, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.738569259643555e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 6.230038707144558e-11, "masked_token_full_update_term/p85": 9.487848728895187e-09, "masked_token_full_update_term/p90": 7.729977369308472e-07, "masked_token_full_update_term/p95": 0.000209808349609375, "masked_token_full_update_term/p99": 0.002593994140625, "masked_token_full_update_term/var": 1.7225562487510615e-07, "masked_token_hessian_coeff": -39677.27734375, "masked_token_hessian_coeff/max": 1192.0, "masked_token_hessian_coeff/median": -5.774199962615967e-07, "masked_token_hessian_coeff/min": -3014656.0, "masked_token_hessian_coeff/p25": -0.0166015625, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.29599761962890625, "masked_token_hessian_coeff/var": 46952984576.0, "masked_token_hessian_coeff_abs": 39677.6640625, "masked_token_hessian_coeff_abs/max": 3014656.0, "masked_token_hessian_coeff_abs/median": 8.678436279296875e-05, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.035400390625, "masked_token_hessian_coeff_abs/p99": 1269760.0, "masked_token_hessian_coeff_abs/var": 46952951808.0, "mean_logprobs": -0.06640625, "mean_logprobs/var": 0.001861572265625, "num_completions/total": 768, "per_sentence_gradient_norm": 70.93880462646484, "per_sentence_gradient_norm/max": 334.0, "per_sentence_gradient_norm/median": 65.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 108.25, "per_sentence_gradient_norm/var": 4215.27392578125, "per_token_feature_norm": 172.6943817138672, "per_token_feature_norm/max": 326.0, "per_token_feature_norm/median": 175.0, "per_token_feature_norm/min": 65.5, "per_token_feature_norm/p25": 145.0, "per_token_feature_norm/p75": 200.0, "per_token_feature_norm/var": 1377.701416015625, "per_token_gradient_norm": 5.582922458648682, "per_token_gradient_norm/max": 408.0, "per_token_gradient_norm/median": 1.2479722499847412e-07, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 5.2928924560546875e-05, "per_token_gradient_norm/var": 719.6566162109375, "per_token_policy_error_norm": 0.03405911102890968, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.029980776831507683, "policy_entropy": 0.06825251877307892, "policy_entropy/max": 3.765625, "policy_entropy/median": 9.462237358093262e-07, "policy_entropy/min": 1.2559397966072083e-15, "policy_entropy/p25": 2.561137080192566e-08, "policy_entropy/p75": 0.00037384033203125, "policy_entropy/var": 0.04926850274205208, "policy_loss": -0.7291666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.19956141710281372, "policy_sharpness": 8.603001594543457, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 8.212301254272461, "reward": 0.7291666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.19956141710281372, "rewards/accuracy_reward": 0.7291666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.19956141710281372, "sentence_fisher_curvature": 412947.0, "sentence_fisher_curvature/max": 1605632.0, "sentence_fisher_curvature/median": 368640.0, "sentence_fisher_curvature/min": 7328.0, "sentence_fisher_curvature/p25": 42240.0, "sentence_fisher_curvature/p75": 594944.0, "sentence_fisher_curvature/p85": 714752.0, "sentence_fisher_curvature/p90": 821248.0, "sentence_fisher_curvature/p95": 1057792.0, "sentence_fisher_curvature/p99": 1465549.25, "sentence_fisher_curvature/var": 126233182208.0, "sentence_fisher_kl_divergence": 2.276565646752715e-05, "sentence_fisher_kl_divergence/max": 8.869171142578125e-05, "sentence_fisher_kl_divergence/median": 2.0265579223632812e-05, "sentence_fisher_kl_divergence/min": 4.041939973831177e-07, "sentence_fisher_kl_divergence/p25": 2.3283064365386963e-06, "sentence_fisher_kl_divergence/p75": 3.272294998168945e-05, "sentence_fisher_kl_divergence/p85": 3.9458274841308594e-05, "sentence_fisher_kl_divergence/p90": 4.5180320739746094e-05, "sentence_fisher_kl_divergence/p95": 5.829334259033203e-05, "sentence_fisher_kl_divergence/p99": 8.099081605905667e-05, "sentence_fisher_kl_divergence/var": 3.840274764854712e-10, "sentence_full_gradient_variance/max_squared_error": 9064.6787109375, "sentence_full_gradient_variance/metric": 9064.6787109375, "sentence_full_gradient_variance/p75": 9064.6787109375, "sentence_full_gradient_variance/p90": 9064.6787109375, "sentence_full_gradient_variance/p95": 9064.6787109375, "sentence_full_gradient_variance/p99": 9064.6787109375, "sentence_full_update_term": 0.060403190553188324, "sentence_full_update_term/max": 0.2138671875, "sentence_full_update_term/median": 0.05615234375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0950927734375, "sentence_full_update_term/p85": 0.1126708984375, "sentence_full_update_term/p90": 0.119384765625, "sentence_full_update_term/p95": 0.16064453125, "sentence_full_update_term/p99": 0.2045898735523224, "sentence_full_update_term/var": 0.0026994014624506235, "sentence_hessian_coeff": 107627.3359375, "sentence_hessian_coeff/max": 794624.0, "sentence_hessian_coeff/median": 36096.0, "sentence_hessian_coeff/min": -101888.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 185088.0, "sentence_hessian_coeff/p99": 627302.9375, "sentence_hessian_coeff/var": 26130227200.0, "sentence_hessian_coeff_abs": 119890.0, "sentence_hessian_coeff_abs/max": 794624.0, "sentence_hessian_coeff_abs/median": 58880.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 185088.0, "sentence_hessian_coeff_abs/p99": 627302.9375, "sentence_hessian_coeff_abs/var": 23310891008.0, "step": 8, "token_fisher_curvature": 466811.6875, "token_fisher_curvature/max": 68157440.0, "token_fisher_curvature/median": 8.86757334228605e-11, "token_fisher_curvature/min": 1.0008672734991587e-29, "token_fisher_curvature/p25": 3.3084646133829665e-14, "token_fisher_curvature/p75": 0.000156402587890625, "token_fisher_curvature/p85": 181.0, "token_fisher_curvature/p90": 14272.0, "token_fisher_curvature/p95": 638976.0, "token_fisher_curvature/p99": 15859712.0, "token_fisher_curvature/var": 9338554941440.0, "token_fisher_kl_divergence": 2.5736399038578384e-05, "token_fisher_kl_divergence/max": 0.003753662109375, "token_fisher_kl_divergence/median": 4.896909226313924e-21, "token_fisher_kl_divergence/min": 5.510129769479473e-40, "token_fisher_kl_divergence/p25": 1.822382287030889e-24, "token_fisher_kl_divergence/p75": 8.604228440844963e-15, "token_fisher_kl_divergence/p85": 9.953510016202927e-09, "token_fisher_kl_divergence/p90": 7.860362529754639e-07, "token_fisher_kl_divergence/p95": 3.528594970703125e-05, "token_fisher_kl_divergence/p99": 0.000873565673828125, "token_fisher_kl_divergence/var": 2.838633683666103e-08, "token_full_update_term": 0.0014775557210668921, "token_full_update_term/max": 0.0908203125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.738569259643555e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.147295840084553e-10, "token_full_update_term/p85": 8.754432201385498e-07, "token_full_update_term/p90": 0.0002899169921875, "token_full_update_term/p95": 0.00667119026184082, "token_full_update_term/p99": 0.042217254638671875, "token_full_update_term/var": 4.962654202245176e-05, "token_hessian_coeff": 71163.109375, "token_hessian_coeff/max": 66846720.0, "token_hessian_coeff/median": -1.2367963790893555e-06, "token_hessian_coeff/min": -8650752.0, "token_hessian_coeff/p25": -0.039306640625, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 8388608.0, "token_hessian_coeff/var": 6319401598976.0, "token_hessian_coeff_abs": 411142.8125, "token_hessian_coeff_abs/max": 66846720.0, "token_hessian_coeff_abs/median": 0.00016498565673828125, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.15625, "token_hessian_coeff_abs/p99": 8454144.0, "token_hessian_coeff_abs/var": 6155425284096.0 }, { "accuracy_reward": 0.4479166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 0.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24989037215709686, "adam_stats/lm_head/lr_effective_max": 8.056629303609952e-05, "adam_stats/lm_head/lr_effective_mean": -1.1098184871105587e-10, "adam_stats/lm_head/lr_effective_min": -8.023638656595722e-05, "adam_stats/lm_head/lr_effective_std": 2.3595512175234035e-06, "adam_stats/lr_effective_max": 8.744129445403814e-05, "adam_stats/lr_effective_mean": -2.58149807086383e-10, "adam_stats/lr_effective_min": -8.755554154049605e-05, "adam_stats/m_t_max": 0.017602000385522842, "adam_stats/m_t_mean": -9.18516027126337e-11, "adam_stats/m_t_min": -0.011201615445315838, "adam_stats/v_t_max": 2.6564253857941367e-05, "adam_stats/v_t_mean": 1.7300937158598595e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.4479166865348816, "advantages/max": 1.0, "advantages/median": 0.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24989037215709686, "all_logprobs": -0.07466791570186615, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.6875, "all_logprobs/p1": -1.775937557220459, "all_logprobs/p10": -0.07861328125, "all_logprobs/p25": -7.581710815429688e-05, "all_logprobs/p5": -0.38671875, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.12839266657829285, "clip_ratio": 0.0, "completion_length": 790.3021240234375, "completion_length/correct": 668.1162719726562, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 664.0, "completion_length/correct/min": 253.0, "completion_length/correct/p25": 348.5, "completion_length/correct/p75": 974.5, "completion_length/correct/var": 83024.6328125, "completion_length/incorrect": 889.4339599609375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 293.0, "completion_length/incorrect/p25": 831.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 52470.9453125, "completion_length/max": 1024.0, "completion_length/median": 921.0, "completion_length/min": 253.0, "completion_length/p25": 561.0, "completion_length/p75": 1024.0, "completion_length/var": 77666.5703125, "curvature_clip_ratio_token_fisher": 0.05185253545641899, "curvature_clip_ratio_token_hessian": 0.023804189637303352, "curvature_clip_ratio_total_fisher": 0.05185253545641899, "curvature_clip_ratio_total_full": 0.05185253545641899, "curvature_clip_ratio_total_hessian": 0.023804189637303352, "epoch": 0.0144, "feature_vector_variance/max_squared_error": 145840.09375, "feature_vector_variance/metric": 30129.021484375, "generated_tokens/total": 581161.0, "global_fisher_curvature": 113152.0, "global_fisher_curvature/max": 113152.0, "global_fisher_curvature/median": 113152.0, "global_fisher_curvature/min": 113152.0, "global_fisher_curvature/p25": 113152.0, "global_fisher_curvature/p75": 113152.0, "global_fisher_curvature/p85": 113152.0, "global_fisher_curvature/p90": 113152.0, "global_fisher_curvature/p95": 113152.0, "global_fisher_curvature/p99": 113152.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 8.165836334228516e-06, "global_fisher_kl_divergence/max": 8.165836334228516e-06, "global_fisher_kl_divergence/median": 8.165836334228516e-06, "global_fisher_kl_divergence/min": 8.165836334228516e-06, "global_fisher_kl_divergence/p25": 8.165836334228516e-06, "global_fisher_kl_divergence/p75": 8.165836334228516e-06, "global_fisher_kl_divergence/p85": 8.165836334228516e-06, "global_fisher_kl_divergence/p90": 8.165836334228516e-06, "global_fisher_kl_divergence/p95": 8.165836334228516e-06, "global_fisher_kl_divergence/p99": 8.165836334228516e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.185546875, "global_full_update_term/max": 0.185546875, "global_full_update_term/median": 0.185546875, "global_full_update_term/min": 0.185546875, "global_full_update_term/p25": 0.185546875, "global_full_update_term/p75": 0.185546875, "global_full_update_term/p85": 0.185546875, "global_full_update_term/p90": 0.185546875, "global_full_update_term/p95": 0.185546875, "global_full_update_term/p99": 0.185546875, "global_full_update_term/var": NaN, "global_hessian_coeff": 35072.0, "global_hessian_coeff/max": 35072.0, "global_hessian_coeff/median": 35072.0, "global_hessian_coeff/min": 35072.0, "global_hessian_coeff/p25": 35072.0, "global_hessian_coeff/p75": 35072.0, "global_hessian_coeff/p99": 35072.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 35072.0, "global_hessian_coeff_abs/max": 35072.0, "global_hessian_coeff_abs/median": 35072.0, "global_hessian_coeff_abs/min": 35072.0, "global_hessian_coeff_abs/p25": 35072.0, "global_hessian_coeff_abs/p75": 35072.0, "global_hessian_coeff_abs/p99": 35072.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.158815398812294, "learning_rate": 1.3500000000000001e-05, "loss": -0.4479, "masked_global_fisher_curvature": 3216.0, "masked_global_fisher_curvature/max": 3216.0, "masked_global_fisher_curvature/median": 3216.0, "masked_global_fisher_curvature/min": 3216.0, "masked_global_fisher_curvature/p25": 3216.0, "masked_global_fisher_curvature/p75": 3216.0, "masked_global_fisher_curvature/p85": 3216.0, "masked_global_fisher_curvature/p90": 3216.0, "masked_global_fisher_curvature/p95": 3216.0, "masked_global_fisher_curvature/p99": 3216.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/max": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/median": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/min": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p25": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p75": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p85": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p90": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p95": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/p99": 2.3189932107925415e-07, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.004119873046875, "masked_global_full_update_term/max": 0.004119873046875, "masked_global_full_update_term/median": 0.004119873046875, "masked_global_full_update_term/min": 0.004119873046875, "masked_global_full_update_term/p25": 0.004119873046875, "masked_global_full_update_term/p75": 0.004119873046875, "masked_global_full_update_term/p85": 0.004119873046875, "masked_global_full_update_term/p90": 0.004119873046875, "masked_global_full_update_term/p95": 0.004119873046875, "masked_global_full_update_term/p99": 0.004119873046875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2640.0, "masked_global_hessian_coeff/max": -2640.0, "masked_global_hessian_coeff/median": -2640.0, "masked_global_hessian_coeff/min": -2640.0, "masked_global_hessian_coeff/p25": -2640.0, "masked_global_hessian_coeff/p75": -2640.0, "masked_global_hessian_coeff/p99": -2640.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2640.0, "masked_global_hessian_coeff_abs/max": 2640.0, "masked_global_hessian_coeff_abs/median": 2640.0, "masked_global_hessian_coeff_abs/min": 2640.0, "masked_global_hessian_coeff_abs/p25": 2640.0, "masked_global_hessian_coeff_abs/p75": 2640.0, "masked_global_hessian_coeff_abs/p99": 2640.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.4208579063415527, "masked_per_sentence_gradient_norm/max": 14.75, "masked_per_sentence_gradient_norm/median": 0.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 4.390625, "masked_per_sentence_gradient_norm/var": 12.260400772094727, "masked_per_token_gradient_norm": 0.12837859988212585, "masked_per_token_gradient_norm/max": 22.625, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 5.681067705154419e-08, "masked_per_token_gradient_norm/var": 1.023850440979004, "masked_sentence_fisher_curvature": 1954.9896240234375, "masked_sentence_fisher_curvature/max": 8128.0, "masked_sentence_fisher_curvature/median": 1704.0, "masked_sentence_fisher_curvature/min": 157.0, "masked_sentence_fisher_curvature/p25": 888.0, "masked_sentence_fisher_curvature/p75": 2476.0, "masked_sentence_fisher_curvature/p85": 3288.0, "masked_sentence_fisher_curvature/p90": 3584.0, "masked_sentence_fisher_curvature/p95": 4016.0, "masked_sentence_fisher_curvature/p99": 6334.40576171875, "masked_sentence_fisher_curvature/var": 1864593.875, "masked_sentence_fisher_kl_divergence": 1.40758857014589e-07, "masked_sentence_fisher_kl_divergence/max": 5.848705768585205e-07, "masked_sentence_fisher_kl_divergence/median": 1.2293457984924316e-07, "masked_sentence_fisher_kl_divergence/min": 1.1292286217212677e-08, "masked_sentence_fisher_kl_divergence/p25": 6.379559636116028e-08, "masked_sentence_fisher_kl_divergence/p75": 1.778826117515564e-07, "masked_sentence_fisher_kl_divergence/p85": 2.367887645959854e-07, "masked_sentence_fisher_kl_divergence/p90": 2.5797635316848755e-07, "masked_sentence_fisher_kl_divergence/p95": 2.896413207054138e-07, "masked_sentence_fisher_kl_divergence/p99": 4.556965507163113e-07, "masked_sentence_fisher_kl_divergence/var": 9.658637076443745e-15, "masked_sentence_full_gradient_variance/max_squared_error": 17.647537231445312, "masked_sentence_full_gradient_variance/metric": 17.647537231445312, "masked_sentence_full_gradient_variance/p75": 17.647537231445312, "masked_sentence_full_gradient_variance/p90": 17.647537231445312, "masked_sentence_full_gradient_variance/p95": 17.647537231445312, "masked_sentence_full_gradient_variance/p99": 17.647537231445312, "masked_sentence_full_update_term": 0.0016794006805866957, "masked_sentence_full_update_term/max": 0.0081787109375, "masked_sentence_full_update_term/median": 0.0, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.002841949462890625, "masked_sentence_full_update_term/p85": 0.00473785400390625, "masked_sentence_full_update_term/p90": 0.00567626953125, "masked_sentence_full_update_term/p95": 0.0064697265625, "masked_sentence_full_update_term/p99": 0.0081787109375, "masked_sentence_full_update_term/var": 5.63524281460559e-06, "masked_sentence_hessian_coeff": -12201.6669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -69120.0, "masked_sentence_hessian_coeff/p25": -23552.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 280933760.0, "masked_sentence_hessian_coeff_abs": 12201.6669921875, "masked_sentence_hessian_coeff_abs/max": 69120.0, "masked_sentence_hessian_coeff_abs/median": 0.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 23552.0, "masked_sentence_hessian_coeff_abs/p99": 66201.609375, "masked_sentence_hessian_coeff_abs/var": 280933760.0, "masked_token_fisher_curvature": 2233.387939453125, "masked_token_fisher_curvature/max": 138240.0, "masked_token_fisher_curvature/median": 8.981260180007666e-12, "masked_token_fisher_curvature/min": 1.0607661021820322e-34, "masked_token_fisher_curvature/p25": 2.67841304690819e-15, "masked_token_fisher_curvature/p75": 6.198883056640625e-06, "masked_token_fisher_curvature/p85": 0.5035247802734375, "masked_token_fisher_curvature/p90": 155.0, "masked_token_fisher_curvature/p95": 5696.0, "masked_token_fisher_curvature/p99": 76288.0, "masked_token_fisher_curvature/var": 152609984.0, "masked_token_fisher_kl_divergence": 1.608561461807767e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 6.452008777913616e-22, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.9306266250016955e-25, "masked_token_fisher_kl_divergence/p75": 4.475586568020162e-16, "masked_token_fisher_kl_divergence/p85": 3.6335379149932123e-11, "masked_token_fisher_kl_divergence/p90": 1.1175870895385742e-08, "masked_token_fisher_kl_divergence/p95": 4.0978193283081055e-07, "masked_token_fisher_kl_divergence/p99": 5.4836273193359375e-06, "masked_token_fisher_kl_divergence/var": 7.917673678151105e-13, "masked_token_full_update_term": 3.731408651219681e-05, "masked_token_full_update_term/max": 0.00433349609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.887580871582031e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 0.0, "masked_token_full_update_term/p85": 4.263256414560601e-12, "masked_token_full_update_term/p90": 2.510205376893282e-10, "masked_token_full_update_term/p95": 8.344650268554688e-07, "masked_token_full_update_term/p99": 0.0014241337776184082, "masked_token_full_update_term/var": 8.536976281448005e-08, "masked_token_hessian_coeff": -19304.2578125, "masked_token_hessian_coeff/max": 468.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -2621440.0, "masked_token_hessian_coeff/p25": -4.06801700592041e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.0294189453125, "masked_token_hessian_coeff/var": 23387457536.0, "masked_token_hessian_coeff_abs": 19304.357421875, "masked_token_hessian_coeff_abs/max": 2621440.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 6.389617919921875e-05, "masked_token_hessian_coeff_abs/p99": 733184.0, "masked_token_hessian_coeff_abs/var": 23387453440.0, "mean_logprobs": -0.07275390625, "mean_logprobs/var": 0.00188446044921875, "num_completions/total": 864, "per_sentence_gradient_norm": 42.505210876464844, "per_sentence_gradient_norm/max": 378.0, "per_sentence_gradient_norm/median": 0.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 60.125, "per_sentence_gradient_norm/var": 4748.66162109375, "per_token_feature_norm": 184.5764617919922, "per_token_feature_norm/max": 334.0, "per_token_feature_norm/median": 186.0, "per_token_feature_norm/min": 65.0, "per_token_feature_norm/p25": 161.0, "per_token_feature_norm/p75": 208.0, "per_token_feature_norm/var": 1272.052734375, "per_token_gradient_norm": 3.2624733448028564, "per_token_gradient_norm/max": 420.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.257285475730896e-07, "per_token_gradient_norm/var": 494.634521484375, "per_token_policy_error_norm": 0.04007040336728096, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.03647702932357788, "policy_entropy": 0.08031972497701645, "policy_entropy/max": 3.75, "policy_entropy/median": 5.364418029785156e-07, "policy_entropy/min": 3.144186300207963e-17, "policy_entropy/p25": 1.0943040251731873e-08, "policy_entropy/p75": 0.00083160400390625, "policy_entropy/var": 0.06500303745269775, "policy_loss": -0.4479166865348816, "policy_loss/max": 0.0, "policy_loss/median": 0.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24989037215709686, "policy_sharpness": 8.454666137695312, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 9.026004791259766, "reward": 0.4479166865348816, "reward/max": 1.0, "reward/median": 0.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24989037215709686, "rewards/accuracy_reward": 0.4479166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 0.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24989037215709686, "sentence_fisher_curvature": 263978.0, "sentence_fisher_curvature/max": 1343488.0, "sentence_fisher_curvature/median": 39936.0, "sentence_fisher_curvature/min": 9280.0, "sentence_fisher_curvature/p25": 22144.0, "sentence_fisher_curvature/p75": 468992.0, "sentence_fisher_curvature/p85": 621568.0, "sentence_fisher_curvature/p90": 688128.0, "sentence_fisher_curvature/p95": 922624.0, "sentence_fisher_curvature/p99": 1250099.5, "sentence_fisher_curvature/var": 110238572544.0, "sentence_fisher_kl_divergence": 1.9008022718480788e-05, "sentence_fisher_kl_divergence/max": 9.679794311523438e-05, "sentence_fisher_kl_divergence/median": 2.8759241104125977e-06, "sentence_fisher_kl_divergence/min": 6.668269634246826e-07, "sentence_fisher_kl_divergence/p25": 1.5944242477416992e-06, "sentence_fisher_kl_divergence/p75": 3.3795833587646484e-05, "sentence_fisher_kl_divergence/p85": 4.476308822631836e-05, "sentence_fisher_kl_divergence/p90": 4.9591064453125e-05, "sentence_fisher_kl_divergence/p95": 6.651878356933594e-05, "sentence_fisher_kl_divergence/p99": 9.000303543871269e-05, "sentence_fisher_kl_divergence/var": 5.718596152171074e-10, "sentence_full_gradient_variance/max_squared_error": 6410.3896484375, "sentence_full_gradient_variance/metric": 6410.3896484375, "sentence_full_gradient_variance/p75": 6410.3896484375, "sentence_full_gradient_variance/p90": 6410.3896484375, "sentence_full_gradient_variance/p95": 6410.3896484375, "sentence_full_gradient_variance/p99": 6410.3896484375, "sentence_full_update_term": 0.041235607117414474, "sentence_full_update_term/max": 0.3046875, "sentence_full_update_term/median": 0.0, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0562744140625, "sentence_full_update_term/p85": 0.0968017578125, "sentence_full_update_term/p90": 0.14013671875, "sentence_full_update_term/p95": 0.1884765625, "sentence_full_update_term/p99": 0.262011855840683, "sentence_full_update_term/var": 0.0044460417702794075, "sentence_hessian_coeff": 77457.3359375, "sentence_hessian_coeff/max": 634880.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -136192.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 131200.0, "sentence_hessian_coeff/p99": 496742.84375, "sentence_hessian_coeff/var": 21490122752.0, "sentence_hessian_coeff_abs": 86568.0, "sentence_hessian_coeff_abs/max": 634880.0, "sentence_hessian_coeff_abs/median": 0.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 133888.0, "sentence_hessian_coeff_abs/p99": 496742.84375, "sentence_hessian_coeff_abs/var": 19980013568.0, "step": 9, "token_fisher_curvature": 320747.625, "token_fisher_curvature/max": 68157440.0, "token_fisher_curvature/median": 2.1941559680271894e-11, "token_fisher_curvature/min": 1.0607661021820322e-34, "token_fisher_curvature/p25": 4.3021142204224816e-15, "token_fisher_curvature/p75": 0.00041961669921875, "token_fisher_curvature/p85": 106.201171875, "token_fisher_curvature/p90": 5472.0, "token_fisher_curvature/p95": 156264.0, "token_fisher_curvature/p99": 9982464.0, "token_fisher_curvature/var": 7961573851136.0, "token_fisher_kl_divergence": 2.3100126782082953e-05, "token_fisher_kl_divergence/max": 0.004913330078125, "token_fisher_kl_divergence/median": 1.581569331201389e-21, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.101927297073854e-25, "token_fisher_kl_divergence/p75": 3.019806626980426e-14, "token_fisher_kl_divergence/p85": 7.660219125682488e-09, "token_fisher_kl_divergence/p90": 3.948807716369629e-07, "token_fisher_kl_divergence/p95": 1.1241529136896133e-05, "token_fisher_kl_divergence/p99": 0.0007183849811553955, "token_fisher_kl_divergence/var": 4.1298754638319224e-08, "token_full_update_term": 0.000982654164545238, "token_full_update_term/max": 0.10302734375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.887580871582031e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 3.452099717193846e-16, "token_full_update_term/p85": 2.9331204132176936e-11, "token_full_update_term/p90": 9.19681042432785e-09, "token_full_update_term/p95": 0.0004215240478515625, "token_full_update_term/p99": 0.03791999816894531, "token_full_update_term/var": 4.463018922251649e-05, "token_hessian_coeff": 70520.03125, "token_hessian_coeff/max": 66846720.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -8650752.0, "token_hessian_coeff/p25": -8.285045623779297e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 65536.0, "token_hessian_coeff/var": 5369511280640.0, "token_hessian_coeff_abs": 277739.34375, "token_hessian_coeff_abs/max": 66846720.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.00014495849609375, "token_hessian_coeff_abs/p99": 6127616.0, "token_hessian_coeff_abs/var": 5297344086016.0 }, { "accuracy_reward": 0.5416666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2508772015571594, "adam_stats/lm_head/lr_effective_max": 8.785472164163366e-05, "adam_stats/lm_head/lr_effective_mean": -1.1262257099131645e-10, "adam_stats/lm_head/lr_effective_min": -8.587617776356637e-05, "adam_stats/lm_head/lr_effective_std": 2.483756588844699e-06, "adam_stats/lr_effective_max": 9.841598512139171e-05, "adam_stats/lr_effective_mean": -3.3848199243458055e-10, "adam_stats/lr_effective_min": -9.839264384936541e-05, "adam_stats/m_t_max": 0.015912754461169243, "adam_stats/m_t_mean": -8.484210556325422e-11, "adam_stats/m_t_min": -0.010256145149469376, "adam_stats/v_t_max": 2.6538193196756765e-05, "adam_stats/v_t_mean": 1.7296897421303914e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5416666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2508772015571594, "all_logprobs": -0.05705731362104416, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.9375, "all_logprobs/p1": -1.5546875, "all_logprobs/p10": -0.016357421875, "all_logprobs/p25": -1.5497207641601562e-06, "all_logprobs/p5": -0.203125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.10517187416553497, "clip_ratio": 0.0, "completion_length": 865.5104370117188, "completion_length/correct": 756.5, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 719.0, "completion_length/correct/min": 316.0, "completion_length/correct/p25": 559.0, "completion_length/correct/p75": 1024.0, "completion_length/correct/var": 59982.5703125, "completion_length/incorrect": 994.3409423828125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 615.0, "completion_length/incorrect/p25": 1024.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 9318.8349609375, "completion_length/max": 1024.0, "completion_length/median": 1024.0, "completion_length/min": 316.0, "completion_length/p25": 674.75, "completion_length/p75": 1024.0, "completion_length/var": 50610.8671875, "curvature_clip_ratio_token_fisher": 0.04463887959718704, "curvature_clip_ratio_token_hessian": 0.024467740207910538, "curvature_clip_ratio_total_fisher": 0.04463887959718704, "curvature_clip_ratio_total_full": 0.04463887959718704, "curvature_clip_ratio_total_hessian": 0.024467740207910538, "epoch": 0.016, "feature_vector_variance/max_squared_error": 160543.09375, "feature_vector_variance/metric": 32080.42578125, "generated_tokens/total": 664250.0, "global_fisher_curvature": 111104.0, "global_fisher_curvature/max": 111104.0, "global_fisher_curvature/median": 111104.0, "global_fisher_curvature/min": 111104.0, "global_fisher_curvature/p25": 111104.0, "global_fisher_curvature/p75": 111104.0, "global_fisher_curvature/p85": 111104.0, "global_fisher_curvature/p90": 111104.0, "global_fisher_curvature/p95": 111104.0, "global_fisher_curvature/p99": 111104.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.0132789611816406e-05, "global_fisher_kl_divergence/max": 1.0132789611816406e-05, "global_fisher_kl_divergence/median": 1.0132789611816406e-05, "global_fisher_kl_divergence/min": 1.0132789611816406e-05, "global_fisher_kl_divergence/p25": 1.0132789611816406e-05, "global_fisher_kl_divergence/p75": 1.0132789611816406e-05, "global_fisher_kl_divergence/p85": 1.0132789611816406e-05, "global_fisher_kl_divergence/p90": 1.0132789611816406e-05, "global_fisher_kl_divergence/p95": 1.0132789611816406e-05, "global_fisher_kl_divergence/p99": 1.0132789611816406e-05, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.212890625, "global_full_update_term/max": 0.212890625, "global_full_update_term/median": 0.212890625, "global_full_update_term/min": 0.212890625, "global_full_update_term/p25": 0.212890625, "global_full_update_term/p75": 0.212890625, "global_full_update_term/p85": 0.212890625, "global_full_update_term/p90": 0.212890625, "global_full_update_term/p95": 0.212890625, "global_full_update_term/p99": 0.212890625, "global_full_update_term/var": NaN, "global_hessian_coeff": 36864.0, "global_hessian_coeff/max": 36864.0, "global_hessian_coeff/median": 36864.0, "global_hessian_coeff/min": 36864.0, "global_hessian_coeff/p25": 36864.0, "global_hessian_coeff/p75": 36864.0, "global_hessian_coeff/p99": 36864.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 36864.0, "global_hessian_coeff_abs/max": 36864.0, "global_hessian_coeff_abs/median": 36864.0, "global_hessian_coeff_abs/min": 36864.0, "global_hessian_coeff_abs/p25": 36864.0, "global_hessian_coeff_abs/p75": 36864.0, "global_hessian_coeff_abs/p99": 36864.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.055179156363010406, "learning_rate": 1.5e-05, "loss": -0.5417, "masked_global_fisher_curvature": 892.0, "masked_global_fisher_curvature/max": 892.0, "masked_global_fisher_curvature/median": 892.0, "masked_global_fisher_curvature/min": 892.0, "masked_global_fisher_curvature/p25": 892.0, "masked_global_fisher_curvature/p75": 892.0, "masked_global_fisher_curvature/p85": 892.0, "masked_global_fisher_curvature/p90": 892.0, "masked_global_fisher_curvature/p95": 892.0, "masked_global_fisher_curvature/p99": 892.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/max": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/median": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/min": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/p25": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/p75": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/p85": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/p90": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/p95": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/p99": 8.149072527885437e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.008544921875, "masked_global_full_update_term/max": 0.008544921875, "masked_global_full_update_term/median": 0.008544921875, "masked_global_full_update_term/min": 0.008544921875, "masked_global_full_update_term/p25": 0.008544921875, "masked_global_full_update_term/p75": 0.008544921875, "masked_global_full_update_term/p85": 0.008544921875, "masked_global_full_update_term/p90": 0.008544921875, "masked_global_full_update_term/p95": 0.008544921875, "masked_global_full_update_term/p99": 0.008544921875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2368.0, "masked_global_hessian_coeff/max": -2368.0, "masked_global_hessian_coeff/median": -2368.0, "masked_global_hessian_coeff/min": -2368.0, "masked_global_hessian_coeff/p25": -2368.0, "masked_global_hessian_coeff/p75": -2368.0, "masked_global_hessian_coeff/p99": -2368.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2368.0, "masked_global_hessian_coeff_abs/max": 2368.0, "masked_global_hessian_coeff_abs/median": 2368.0, "masked_global_hessian_coeff_abs/min": 2368.0, "masked_global_hessian_coeff_abs/p25": 2368.0, "masked_global_hessian_coeff_abs/p75": 2368.0, "masked_global_hessian_coeff_abs/p99": 2368.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.458740234375, "masked_per_sentence_gradient_norm/max": 12.9375, "masked_per_sentence_gradient_norm/median": 1.703125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.94921875, "masked_per_sentence_gradient_norm/var": 8.687018394470215, "masked_per_token_gradient_norm": 0.10699301213026047, "masked_per_token_gradient_norm/max": 22.75, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 4.6100467443466187e-08, "masked_per_token_gradient_norm/var": 0.7806638479232788, "masked_sentence_fisher_curvature": 917.40625, "masked_sentence_fisher_curvature/max": 3104.0, "masked_sentence_fisher_curvature/median": 596.0, "masked_sentence_fisher_curvature/min": 70.5, "masked_sentence_fisher_curvature/p25": 445.5, "masked_sentence_fisher_curvature/p75": 1382.0, "masked_sentence_fisher_curvature/p85": 1624.0, "masked_sentence_fisher_curvature/p90": 1956.0, "masked_sentence_fisher_curvature/p95": 2216.0, "masked_sentence_fisher_curvature/p99": 2800.0009765625, "masked_sentence_fisher_curvature/var": 466959.40625, "masked_sentence_fisher_kl_divergence": 8.36222824318611e-08, "masked_sentence_fisher_kl_divergence/max": 2.8312206268310547e-07, "masked_sentence_fisher_kl_divergence/median": 5.4249539971351624e-08, "masked_sentence_fisher_kl_divergence/min": 6.4319465309381485e-09, "masked_sentence_fisher_kl_divergence/p25": 4.068715497851372e-08, "masked_sentence_fisher_kl_divergence/p75": 1.2619420886039734e-07, "masked_sentence_fisher_kl_divergence/p85": 1.4831312000751495e-07, "masked_sentence_fisher_kl_divergence/p90": 1.7834827303886414e-07, "masked_sentence_fisher_kl_divergence/p95": 2.0186416804790497e-07, "masked_sentence_fisher_kl_divergence/p99": 2.548099473642651e-07, "masked_sentence_fisher_kl_divergence/var": 3.8795438825974145e-15, "masked_sentence_full_gradient_variance/max_squared_error": 14.343104362487793, "masked_sentence_full_gradient_variance/metric": 14.343104362487793, "masked_sentence_full_gradient_variance/p75": 14.343104362487793, "masked_sentence_full_gradient_variance/p90": 14.343104362487793, "masked_sentence_full_gradient_variance/p95": 14.343104362487793, "masked_sentence_full_gradient_variance/p99": 14.343104362487793, "masked_sentence_full_update_term": 0.0019363164901733398, "masked_sentence_full_update_term/max": 0.0115966796875, "masked_sentence_full_update_term/median": 0.001190185546875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.003131866455078125, "masked_sentence_full_update_term/p85": 0.004180908203125, "masked_sentence_full_update_term/p90": 0.0046234130859375, "masked_sentence_full_update_term/p95": 0.00736236572265625, "masked_sentence_full_update_term/p99": 0.010958864353597164, "masked_sentence_full_update_term/var": 6.508076239697402e-06, "masked_sentence_hessian_coeff": -9711.333984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -6368.0, "masked_sentence_hessian_coeff/min": -43264.0, "masked_sentence_hessian_coeff/p25": -17984.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 121463984.0, "masked_sentence_hessian_coeff_abs": 9711.333984375, "masked_sentence_hessian_coeff_abs/max": 43264.0, "masked_sentence_hessian_coeff_abs/median": 5952.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 17984.0, "masked_sentence_hessian_coeff_abs/p99": 40832.0078125, "masked_sentence_hessian_coeff_abs/var": 121463984.0, "masked_token_fisher_curvature": 1207.742919921875, "masked_token_fisher_curvature/max": 109056.0, "masked_token_fisher_curvature/median": 4.8627768478581856e-14, "masked_token_fisher_curvature/min": 2.228737289159057e-35, "masked_token_fisher_curvature/p25": 1.3552527156068805e-17, "masked_token_fisher_curvature/p75": 6.810296326875687e-09, "masked_token_fisher_curvature/p85": 0.0003719329833984375, "masked_token_fisher_curvature/p90": 1.25, "masked_token_fisher_curvature/p95": 1056.0, "masked_token_fisher_curvature/p99": 44544.0, "masked_token_fisher_curvature/var": 65126648.0, "masked_token_fisher_kl_divergence": 1.1005488431692356e-07, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 4.420246398330242e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.2369338993865465e-27, "masked_token_fisher_kl_divergence/p75": 6.2002811739014785e-19, "masked_token_fisher_kl_divergence/p85": 3.397282455352979e-14, "masked_token_fisher_kl_divergence/p90": 1.1368683772161603e-10, "masked_token_fisher_kl_divergence/p95": 9.639188647270203e-08, "masked_token_fisher_kl_divergence/p99": 4.0531158447265625e-06, "masked_token_fisher_kl_divergence/var": 5.407966283989263e-13, "masked_token_full_update_term": 3.437412669882178e-05, "masked_token_full_update_term/max": 0.004302978515625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -6.407499313354492e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.373101715136272e-15, "masked_token_full_update_term/p85": 3.595346242946107e-12, "masked_token_full_update_term/p90": 2.6921043172478676e-10, "masked_token_full_update_term/p95": 5.662441253662109e-07, "masked_token_full_update_term/p99": 0.00131988525390625, "masked_token_full_update_term/var": 7.849197913856187e-08, "masked_token_hessian_coeff": -17055.568359375, "masked_token_hessian_coeff/max": 5984.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -2473984.0, "masked_token_hessian_coeff/p25": -4.6193599700927734e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.02978515625, "masked_token_hessian_coeff/var": 19980795904.0, "masked_token_hessian_coeff_abs": 17056.34375, "masked_token_hessian_coeff_abs/max": 2473984.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 4.750490188598633e-05, "masked_token_hessian_coeff_abs/p99": 651264.0, "masked_token_hessian_coeff_abs/var": 19980769280.0, "mean_logprobs": -0.052001953125, "mean_logprobs/var": 0.00164794921875, "num_completions/total": 960, "per_sentence_gradient_norm": 72.88671875, "per_sentence_gradient_norm/max": 346.0, "per_sentence_gradient_norm/median": 54.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 130.25, "per_sentence_gradient_norm/var": 6949.2900390625, "per_token_feature_norm": 198.73377990722656, "per_token_feature_norm/max": 332.0, "per_token_feature_norm/median": 199.0, "per_token_feature_norm/min": 65.5, "per_token_feature_norm/p25": 183.0, "per_token_feature_norm/p75": 218.0, "per_token_feature_norm/var": 983.3921508789062, "per_token_gradient_norm": 3.4301671981811523, "per_token_gradient_norm/max": 420.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 9.546056389808655e-08, "per_token_gradient_norm/var": 555.306884765625, "per_token_policy_error_norm": 0.02943897247314453, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.026540584862232208, "policy_entropy": 0.06290333718061447, "policy_entropy/max": 3.703125, "policy_entropy/median": 4.353933036327362e-08, "policy_entropy/min": 1.3010426069826053e-17, "policy_entropy/p25": 8.440110832452774e-10, "policy_entropy/p75": 2.3484230041503906e-05, "policy_entropy/var": 0.060564324259757996, "policy_loss": -0.5416666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2508772015571594, "policy_sharpness": 8.865724563598633, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 7.227980613708496, "reward": 0.5416666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2508772015571594, "rewards/accuracy_reward": 0.5416666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2508772015571594, "sentence_fisher_curvature": 266536.71875, "sentence_fisher_curvature/max": 1482752.0, "sentence_fisher_curvature/median": 117248.0, "sentence_fisher_curvature/min": 980.0, "sentence_fisher_curvature/p25": 14288.0, "sentence_fisher_curvature/p75": 437248.0, "sentence_fisher_curvature/p85": 664576.0, "sentence_fisher_curvature/p90": 819200.0, "sentence_fisher_curvature/p95": 918528.0, "sentence_fisher_curvature/p99": 1140327.5, "sentence_fisher_curvature/var": 111103614976.0, "sentence_fisher_kl_divergence": 2.4286709958687425e-05, "sentence_fisher_kl_divergence/max": 0.0001354217529296875, "sentence_fisher_kl_divergence/median": 1.0669231414794922e-05, "sentence_fisher_kl_divergence/min": 8.940696716308594e-08, "sentence_fisher_kl_divergence/p25": 1.300126314163208e-06, "sentence_fisher_kl_divergence/p75": 3.987550735473633e-05, "sentence_fisher_kl_divergence/p85": 6.0498714447021484e-05, "sentence_fisher_kl_divergence/p90": 7.462501525878906e-05, "sentence_fisher_kl_divergence/p95": 8.356571197509766e-05, "sentence_fisher_kl_divergence/p99": 0.00010371218377258629, "sentence_fisher_kl_divergence/var": 9.225392672007615e-10, "sentence_full_gradient_variance/max_squared_error": 12025.375, "sentence_full_gradient_variance/metric": 12025.375, "sentence_full_gradient_variance/p75": 12025.375, "sentence_full_gradient_variance/p90": 12025.375, "sentence_full_gradient_variance/p95": 12025.375, "sentence_full_gradient_variance/p99": 12025.375, "sentence_full_update_term": 0.0707143172621727, "sentence_full_update_term/max": 0.30859375, "sentence_full_update_term/median": 0.0439453125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.11083984375, "sentence_full_update_term/p85": 0.17724609375, "sentence_full_update_term/p90": 0.21337890625, "sentence_full_update_term/p95": 0.248291015625, "sentence_full_update_term/p99": 0.3011718988418579, "sentence_full_update_term/var": 0.007687929552048445, "sentence_hessian_coeff": 81938.171875, "sentence_hessian_coeff/max": 860160.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -112128.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 114688.0, "sentence_hessian_coeff/p99": 521626.6875, "sentence_hessian_coeff/var": 29349396480.0, "sentence_hessian_coeff_abs": 100806.171875, "sentence_hessian_coeff_abs/max": 860160.0, "sentence_hessian_coeff_abs/median": 10752.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 114688.0, "sentence_hessian_coeff_abs/p99": 521626.6875, "sentence_hessian_coeff_abs/var": 25865082880.0, "step": 10, "token_fisher_curvature": 339172.75, "token_fisher_curvature/max": 66584576.0, "token_fisher_curvature/median": 1.0613732115416497e-13, "token_fisher_curvature/min": 2.228737289159057e-35, "token_fisher_curvature/p25": 2.1467203015212988e-17, "token_fisher_curvature/p75": 1.424923539161682e-07, "token_fisher_curvature/p85": 0.16796875, "token_fisher_curvature/p90": 512.0, "token_fisher_curvature/p95": 65536.0, "token_fisher_curvature/p99": 11599872.0, "token_fisher_curvature/var": 9299576225792.0, "token_fisher_kl_divergence": 3.09054485114757e-05, "token_fisher_kl_divergence/max": 0.006072998046875, "token_fisher_kl_divergence/median": 9.667673409213511e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.9563750449481093e-27, "token_fisher_kl_divergence/p75": 1.3010426069826053e-17, "token_fisher_kl_divergence/p85": 1.5347723092418164e-11, "token_fisher_kl_divergence/p90": 4.6566128730773926e-08, "token_fisher_kl_divergence/p95": 5.9604644775390625e-06, "token_fisher_kl_divergence/p99": 0.00106048583984375, "token_fisher_kl_divergence/var": 7.72091297562838e-08, "token_full_update_term": 0.001139850472100079, "token_full_update_term/max": 0.11572265625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -6.407499313354492e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.7763568394002505e-14, "token_full_update_term/p85": 2.6353141890922416e-11, "token_full_update_term/p90": 1.6065314412117004e-08, "token_full_update_term/p95": 0.00037498772144317627, "token_full_update_term/p99": 0.046142578125, "token_full_update_term/var": 6.17316909483634e-05, "token_hessian_coeff": 88658.3984375, "token_hessian_coeff/max": 65798144.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -8650752.0, "token_hessian_coeff/p25": -8.404254913330078e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 528128.0, "token_hessian_coeff/var": 6507241406464.0, "token_hessian_coeff_abs": 300940.375, "token_hessian_coeff_abs/max": 65798144.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 9.918212890625e-05, "token_hessian_coeff_abs/p99": 6881280.0, "token_hessian_coeff_abs/var": 6424535498752.0 }, { "accuracy_reward": 0.6145833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.23936404287815094, "adam_stats/lm_head/lr_effective_max": 8.811964653432369e-05, "adam_stats/lm_head/lr_effective_mean": -6.94136623180519e-11, "adam_stats/lm_head/lr_effective_min": -8.761223580222577e-05, "adam_stats/lm_head/lr_effective_std": 2.356090817556833e-06, "adam_stats/lr_effective_max": 9.743407281348482e-05, "adam_stats/lr_effective_mean": -3.79826364804714e-10, "adam_stats/lr_effective_min": -9.735803178045899e-05, "adam_stats/m_t_max": 0.014335593208670616, "adam_stats/m_t_mean": -7.898131454409096e-11, "adam_stats/m_t_min": -0.009293091483414173, "adam_stats/v_t_max": 2.6511675969231874e-05, "adam_stats/v_t_mean": 1.7287567861609676e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6145833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.23936404287815094, "all_logprobs": -0.051402099430561066, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -10.375, "all_logprobs/p1": -1.4417190551757812, "all_logprobs/p10": -0.007415771484375, "all_logprobs/p25": -5.960464477539062e-07, "all_logprobs/p5": -0.16015625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.09704061597585678, "clip_ratio": 0.0, "completion_length": 841.1146240234375, "completion_length/correct": 785.1356201171875, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 1015.0, "completion_length/correct/min": 219.0, "completion_length/correct/p25": 452.5, "completion_length/correct/p75": 1024.0, "completion_length/correct/var": 77705.2578125, "completion_length/incorrect": 930.37841796875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 380.0, "completion_length/incorrect/p25": 1024.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 46159.359375, "completion_length/max": 1024.0, "completion_length/median": 1024.0, "completion_length/min": 219.0, "completion_length/p25": 590.0, "completion_length/p75": 1024.0, "completion_length/var": 69982.5625, "curvature_clip_ratio_token_fisher": 0.050367195159196854, "curvature_clip_ratio_token_hessian": 0.033227238804101944, "curvature_clip_ratio_total_fisher": 0.050367195159196854, "curvature_clip_ratio_total_full": 0.050367195159196854, "curvature_clip_ratio_total_hessian": 0.033227238804101944, "epoch": 0.0176, "feature_vector_variance/max_squared_error": 163212.765625, "feature_vector_variance/metric": 32318.716796875, "generated_tokens/total": 744997.0, "global_fisher_curvature": 112128.0, "global_fisher_curvature/max": 112128.0, "global_fisher_curvature/median": 112128.0, "global_fisher_curvature/min": 112128.0, "global_fisher_curvature/p25": 112128.0, "global_fisher_curvature/p75": 112128.0, "global_fisher_curvature/p85": 112128.0, "global_fisher_curvature/p90": 112128.0, "global_fisher_curvature/p95": 112128.0, "global_fisher_curvature/p99": 112128.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.2636184692382812e-05, "global_fisher_kl_divergence/max": 1.2636184692382812e-05, "global_fisher_kl_divergence/median": 1.2636184692382812e-05, "global_fisher_kl_divergence/min": 1.2636184692382812e-05, "global_fisher_kl_divergence/p25": 1.2636184692382812e-05, "global_fisher_kl_divergence/p75": 1.2636184692382812e-05, "global_fisher_kl_divergence/p85": 1.2636184692382812e-05, "global_fisher_kl_divergence/p90": 1.2636184692382812e-05, "global_fisher_kl_divergence/p95": 1.2636184692382812e-05, "global_fisher_kl_divergence/p99": 1.2636184692382812e-05, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.2333984375, "global_full_update_term/max": 0.2333984375, "global_full_update_term/median": 0.2333984375, "global_full_update_term/min": 0.2333984375, "global_full_update_term/p25": 0.2333984375, "global_full_update_term/p75": 0.2333984375, "global_full_update_term/p85": 0.2333984375, "global_full_update_term/p90": 0.2333984375, "global_full_update_term/p95": 0.2333984375, "global_full_update_term/p99": 0.2333984375, "global_full_update_term/var": NaN, "global_hessian_coeff": 37888.0, "global_hessian_coeff/max": 37888.0, "global_hessian_coeff/median": 37888.0, "global_hessian_coeff/min": 37888.0, "global_hessian_coeff/p25": 37888.0, "global_hessian_coeff/p75": 37888.0, "global_hessian_coeff/p99": 37888.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 37888.0, "global_hessian_coeff_abs/max": 37888.0, "global_hessian_coeff_abs/median": 37888.0, "global_hessian_coeff_abs/min": 37888.0, "global_hessian_coeff_abs/p25": 37888.0, "global_hessian_coeff_abs/p75": 37888.0, "global_hessian_coeff_abs/p99": 37888.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04431635141372681, "learning_rate": 1.4995431202643219e-05, "loss": -0.6146, "masked_global_fisher_curvature": 676.0, "masked_global_fisher_curvature/max": 676.0, "masked_global_fisher_curvature/median": 676.0, "masked_global_fisher_curvature/min": 676.0, "masked_global_fisher_curvature/p25": 676.0, "masked_global_fisher_curvature/p75": 676.0, "masked_global_fisher_curvature/p85": 676.0, "masked_global_fisher_curvature/p90": 676.0, "masked_global_fisher_curvature/p95": 676.0, "masked_global_fisher_curvature/p99": 676.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/max": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/median": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/min": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/p25": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/p75": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/p85": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/p90": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/p95": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/p99": 7.59027898311615e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.004486083984375, "masked_global_full_update_term/max": 0.004486083984375, "masked_global_full_update_term/median": 0.004486083984375, "masked_global_full_update_term/min": 0.004486083984375, "masked_global_full_update_term/p25": 0.004486083984375, "masked_global_full_update_term/p75": 0.004486083984375, "masked_global_full_update_term/p85": 0.004486083984375, "masked_global_full_update_term/p90": 0.004486083984375, "masked_global_full_update_term/p95": 0.004486083984375, "masked_global_full_update_term/p99": 0.004486083984375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1944.0, "masked_global_hessian_coeff/max": -1944.0, "masked_global_hessian_coeff/median": -1944.0, "masked_global_hessian_coeff/min": -1944.0, "masked_global_hessian_coeff/p25": -1944.0, "masked_global_hessian_coeff/p75": -1944.0, "masked_global_hessian_coeff/p99": -1944.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1944.0, "masked_global_hessian_coeff_abs/max": 1944.0, "masked_global_hessian_coeff_abs/median": 1944.0, "masked_global_hessian_coeff_abs/min": 1944.0, "masked_global_hessian_coeff_abs/p25": 1944.0, "masked_global_hessian_coeff_abs/p75": 1944.0, "masked_global_hessian_coeff_abs/p99": 1944.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.1349692344665527, "masked_per_sentence_gradient_norm/max": 11.0625, "masked_per_sentence_gradient_norm/median": 1.828125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.67578125, "masked_per_sentence_gradient_norm/var": 4.96444034576416, "masked_per_token_gradient_norm": 0.11279872059822083, "masked_per_token_gradient_norm/max": 24.5, "masked_per_token_gradient_norm/median": 8.230927051045e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.7567148208618164e-07, "masked_per_token_gradient_norm/var": 0.6865296959877014, "masked_sentence_fisher_curvature": 618.1146240234375, "masked_sentence_fisher_curvature/max": 3312.0, "masked_sentence_fisher_curvature/median": 464.0, "masked_sentence_fisher_curvature/min": 66.5, "masked_sentence_fisher_curvature/p25": 291.0, "masked_sentence_fisher_curvature/p75": 743.0, "masked_sentence_fisher_curvature/p85": 1072.0, "masked_sentence_fisher_curvature/p90": 1380.0, "masked_sentence_fisher_curvature/p95": 1680.0, "masked_sentence_fisher_curvature/p99": 2719.201904296875, "masked_sentence_fisher_curvature/var": 315061.375, "masked_sentence_fisher_kl_divergence": 6.950480013756533e-08, "masked_sentence_fisher_kl_divergence/max": 3.725290298461914e-07, "masked_sentence_fisher_kl_divergence/median": 5.21540641784668e-08, "masked_sentence_fisher_kl_divergence/min": 7.508788257837296e-09, "masked_sentence_fisher_kl_divergence/p25": 3.271270543336868e-08, "masked_sentence_fisher_kl_divergence/p75": 8.370261639356613e-08, "masked_sentence_fisher_kl_divergence/p85": 1.203734427690506e-07, "masked_sentence_fisher_kl_divergence/p90": 1.5506520867347717e-07, "masked_sentence_fisher_kl_divergence/p95": 1.8882565200328827e-07, "masked_sentence_fisher_kl_divergence/p99": 3.0528775596394553e-07, "masked_sentence_fisher_kl_divergence/var": 3.980989630558273e-15, "masked_sentence_full_gradient_variance/max_squared_error": 9.336054801940918, "masked_sentence_full_gradient_variance/metric": 9.336054801940918, "masked_sentence_full_gradient_variance/p75": 9.336054801940918, "masked_sentence_full_gradient_variance/p90": 9.336054801940918, "masked_sentence_full_gradient_variance/p95": 9.336054801940918, "masked_sentence_full_gradient_variance/p99": 9.336054801940918, "masked_sentence_full_update_term": 0.0018413265934213996, "masked_sentence_full_update_term/max": 0.00775146484375, "masked_sentence_full_update_term/median": 0.0016326904296875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00336456298828125, "masked_sentence_full_update_term/p85": 0.00382232666015625, "masked_sentence_full_update_term/p90": 0.0042572021484375, "masked_sentence_full_update_term/p95": 0.00479888916015625, "masked_sentence_full_update_term/p99": 0.006446842569857836, "masked_sentence_full_update_term/var": 3.465641839284217e-06, "masked_sentence_hessian_coeff": -9388.1669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -9280.0, "masked_sentence_hessian_coeff/min": -33024.0, "masked_sentence_hessian_coeff/p25": -16400.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 83884848.0, "masked_sentence_hessian_coeff_abs": 9388.1669921875, "masked_sentence_hessian_coeff_abs/max": 33024.0, "masked_sentence_hessian_coeff_abs/median": 9088.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 16400.0, "masked_sentence_hessian_coeff_abs/p99": 31808.00390625, "masked_sentence_hessian_coeff_abs/var": 83884848.0, "masked_token_fisher_curvature": 845.812744140625, "masked_token_fisher_curvature/max": 88576.0, "masked_token_fisher_curvature/median": 1.7208456881689926e-14, "masked_token_fisher_curvature/min": 9.328723168125674e-34, "masked_token_fisher_curvature/p25": 4.1470733097570545e-18, "masked_token_fisher_curvature/p75": 1.1932570487260818e-09, "masked_token_fisher_curvature/p85": 1.0687392204999924e-05, "masked_token_fisher_curvature/p90": 0.04296875, "masked_token_fisher_curvature/p95": 416.0, "masked_token_fisher_curvature/p99": 32000.0, "masked_token_fisher_curvature/var": 36667000.0, "masked_token_fisher_kl_divergence": 9.512827148228098e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.9387045606711586e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.67005655890839e-28, "masked_token_fisher_kl_divergence/p75": 1.3383120566617945e-19, "masked_token_fisher_kl_divergence/p85": 1.2025428396122972e-15, "masked_token_fisher_kl_divergence/p90": 4.831690603168681e-12, "masked_token_fisher_kl_divergence/p95": 4.6798959374427795e-08, "masked_token_fisher_kl_divergence/p99": 3.606081008911133e-06, "masked_token_fisher_kl_divergence/var": 4.638096547451842e-13, "masked_token_full_update_term": 4.019734842586331e-05, "masked_token_full_update_term/max": 0.0042724609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.589557647705078e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.2612133559741778e-13, "masked_token_full_update_term/p85": 4.0245140553452075e-11, "masked_token_full_update_term/p90": 3.3775222618714906e-09, "masked_token_full_update_term/p95": 4.231929779052734e-06, "masked_token_full_update_term/p99": 0.00160980224609375, "masked_token_full_update_term/var": 8.756895653050378e-08, "masked_token_hessian_coeff": -18339.30078125, "masked_token_hessian_coeff/max": 716.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2179072.0, "masked_token_hessian_coeff/p25": -3.981590270996094e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.052001953125, "masked_token_hessian_coeff/var": 18898800640.0, "masked_token_hessian_coeff_abs": 18339.40234375, "masked_token_hessian_coeff_abs/max": 2179072.0, "masked_token_hessian_coeff_abs/median": 4.400499165058136e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.00023555755615234375, "masked_token_hessian_coeff_abs/p99": 724992.0, "masked_token_hessian_coeff_abs/var": 18898796544.0, "mean_logprobs": -0.04833984375, "mean_logprobs/var": 0.00139617919921875, "num_completions/total": 1056, "per_sentence_gradient_norm": 61.86979293823242, "per_sentence_gradient_norm/max": 286.0, "per_sentence_gradient_norm/median": 53.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 99.25, "per_sentence_gradient_norm/var": 4088.934814453125, "per_token_feature_norm": 202.7738494873047, "per_token_feature_norm/max": 332.0, "per_token_feature_norm/median": 202.0, "per_token_feature_norm/min": 66.5, "per_token_feature_norm/p25": 188.0, "per_token_feature_norm/p75": 220.0, "per_token_feature_norm/var": 799.512939453125, "per_token_gradient_norm": 4.246248722076416, "per_token_gradient_norm/max": 386.0, "per_token_gradient_norm/median": 2.2009771782904863e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 7.115304470062256e-07, "per_token_gradient_norm/var": 654.0068359375, "per_token_policy_error_norm": 0.02639574185013771, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.023625263944268227, "policy_entropy": 0.05679871886968613, "policy_entropy/max": 3.65625, "policy_entropy/median": 3.003515303134918e-08, "policy_entropy/min": 5.2583805365546965e-18, "policy_entropy/p25": 5.657057045027614e-10, "policy_entropy/p75": 1.0371208190917969e-05, "policy_entropy/var": 0.05618672072887421, "policy_loss": -0.6145833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.23936404287815094, "policy_sharpness": 8.988770484924316, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 6.536299228668213, "reward": 0.6145833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.23936404287815094, "rewards/accuracy_reward": 0.6145833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.23936404287815094, "sentence_fisher_curvature": 317409.3125, "sentence_fisher_curvature/max": 1245184.0, "sentence_fisher_curvature/median": 232448.0, "sentence_fisher_curvature/min": 652.0, "sentence_fisher_curvature/p25": 12256.0, "sentence_fisher_curvature/p75": 556032.0, "sentence_fisher_curvature/p85": 643072.0, "sentence_fisher_curvature/p90": 739328.0, "sentence_fisher_curvature/p95": 950272.0, "sentence_fisher_curvature/p99": 1136230.75, "sentence_fisher_curvature/var": 105065701376.0, "sentence_fisher_kl_divergence": 3.568628380890004e-05, "sentence_fisher_kl_divergence/max": 0.00014019012451171875, "sentence_fisher_kl_divergence/median": 2.6106834411621094e-05, "sentence_fisher_kl_divergence/min": 7.35744833946228e-08, "sentence_fisher_kl_divergence/p25": 1.3764947652816772e-06, "sentence_fisher_kl_divergence/p75": 6.23464584350586e-05, "sentence_fisher_kl_divergence/p85": 7.2479248046875e-05, "sentence_fisher_kl_divergence/p90": 8.320808410644531e-05, "sentence_fisher_kl_divergence/p95": 0.0001068115234375, "sentence_fisher_kl_divergence/p99": 0.00012750629684887826, "sentence_fisher_kl_divergence/var": 1.3274966681464662e-09, "sentence_full_gradient_variance/max_squared_error": 7745.2138671875, "sentence_full_gradient_variance/metric": 7745.2138671875, "sentence_full_gradient_variance/p75": 7745.2138671875, "sentence_full_gradient_variance/p90": 7745.2138671875, "sentence_full_gradient_variance/p95": 7745.2138671875, "sentence_full_gradient_variance/p99": 7745.2138671875, "sentence_full_update_term": 0.0772705078125, "sentence_full_update_term/max": 0.365234375, "sentence_full_update_term/median": 0.048828125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.1328125, "sentence_full_update_term/p85": 0.177490234375, "sentence_full_update_term/p90": 0.2001953125, "sentence_full_update_term/p95": 0.23291015625, "sentence_full_update_term/p99": 0.26875030994415283, "sentence_full_update_term/var": 0.0070879096165299416, "sentence_hessian_coeff": 91986.75, "sentence_hessian_coeff/max": 663552.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -116224.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 189184.0, "sentence_hessian_coeff/p99": 589619.4375, "sentence_hessian_coeff/var": 22917361664.0, "sentence_hessian_coeff_abs": 106802.921875, "sentence_hessian_coeff_abs/max": 663552.0, "sentence_hessian_coeff_abs/median": 43264.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 189184.0, "sentence_hessian_coeff_abs/p99": 589619.4375, "sentence_hessian_coeff_abs/var": 19941058560.0, "step": 11, "token_fisher_curvature": 414344.59375, "token_fisher_curvature/max": 65536000.0, "token_fisher_curvature/median": 4.085620730620576e-14, "token_fisher_curvature/min": 9.328723168125674e-34, "token_fisher_curvature/p25": 6.938893903907228e-18, "token_fisher_curvature/p75": 2.270098775625229e-08, "token_fisher_curvature/p85": 0.0179443359375, "token_fisher_curvature/p90": 294.0, "token_fisher_curvature/p95": 92160.0, "token_fisher_curvature/p99": 15269888.0, "token_fisher_curvature/var": 11149392216064.0, "token_fisher_kl_divergence": 4.660059857997112e-05, "token_fisher_kl_divergence/max": 0.00738525390625, "token_fisher_kl_divergence/median": 4.6011921573262164e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 7.793945743583597e-28, "token_fisher_kl_divergence/p75": 2.5478751053409354e-18, "token_fisher_kl_divergence/p85": 2.0179413695586845e-12, "token_fisher_kl_divergence/p90": 3.306195139884949e-08, "token_fisher_kl_divergence/p95": 1.0371208190917969e-05, "token_fisher_kl_divergence/p99": 0.00171661376953125, "token_fisher_kl_divergence/var": 1.4103585499469773e-07, "token_full_update_term": 0.0015955254202708602, "token_full_update_term/max": 0.12890625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.589557647705078e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 6.714628852932947e-13, "token_full_update_term/p85": 8.294591680169106e-10, "token_full_update_term/p90": 6.571353878825903e-07, "token_full_update_term/p95": 0.00191497802734375, "token_full_update_term/p99": 0.059326171875, "token_full_update_term/var": 9.373776993015781e-05, "token_hessian_coeff": 84691.359375, "token_hessian_coeff/max": 65011712.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -8585216.0, "token_hessian_coeff/p25": -8.58306884765625e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 4521984.0, "token_hessian_coeff/var": 7625106259968.0, "token_hessian_coeff_abs": 368679.28125, "token_hessian_coeff_abs/max": 65011712.0, "token_hessian_coeff_abs/median": 1.0989606380462646e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.00064849853515625, "token_hessian_coeff_abs/p99": 7569408.0, "token_hessian_coeff_abs/var": 7496353185792.0 }, { "accuracy_reward": 0.5833333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24561403691768646, "adam_stats/lm_head/lr_effective_max": 8.755610178923234e-05, "adam_stats/lm_head/lr_effective_mean": -2.8308551683342564e-11, "adam_stats/lm_head/lr_effective_min": -8.906235598260537e-05, "adam_stats/lm_head/lr_effective_std": 2.206931867476669e-06, "adam_stats/lr_effective_max": 9.84534271992743e-05, "adam_stats/lr_effective_mean": -3.115503410811016e-10, "adam_stats/lr_effective_min": -9.737316577229649e-05, "adam_stats/m_t_max": 0.012642634101212025, "adam_stats/m_t_mean": -7.257520973080744e-11, "adam_stats/m_t_min": -0.008076230995357037, "adam_stats/v_t_max": 2.649189264047891e-05, "adam_stats/v_t_mean": 1.7322964894136983e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5833333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24561403691768646, "all_logprobs": -0.05507757514715195, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.625, "all_logprobs/p1": -1.9140625, "all_logprobs/p10": -0.0037384033203125, "all_logprobs/p25": -4.76837158203125e-07, "all_logprobs/p5": -0.140625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.10764182358980179, "clip_ratio": 0.0, "completion_length": 814.2708740234375, "completion_length/correct": 756.8750610351562, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 754.0, "completion_length/correct/min": 262.0, "completion_length/correct/p25": 555.0, "completion_length/correct/p75": 1024.0, "completion_length/correct/var": 46654.25390625, "completion_length/incorrect": 894.625, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 321.0, "completion_length/incorrect/p25": 885.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 57593.01171875, "completion_length/max": 1024.0, "completion_length/median": 837.0, "completion_length/min": 262.0, "completion_length/p25": 671.0, "completion_length/p75": 1024.0, "completion_length/var": 55314.34765625, "curvature_clip_ratio_token_fisher": 0.03784060478210449, "curvature_clip_ratio_token_hessian": 0.024011768400669098, "curvature_clip_ratio_total_fisher": 0.03784060478210449, "curvature_clip_ratio_total_full": 0.03784060478210449, "curvature_clip_ratio_total_hessian": 0.024011768400669098, "epoch": 0.0192, "feature_vector_variance/max_squared_error": 155910.390625, "feature_vector_variance/metric": 30266.08984375, "generated_tokens/total": 823167.0, "global_fisher_curvature": 78848.0, "global_fisher_curvature/max": 78848.0, "global_fisher_curvature/median": 78848.0, "global_fisher_curvature/min": 78848.0, "global_fisher_curvature/p25": 78848.0, "global_fisher_curvature/p75": 78848.0, "global_fisher_curvature/p85": 78848.0, "global_fisher_curvature/p90": 78848.0, "global_fisher_curvature/p95": 78848.0, "global_fisher_curvature/p99": 78848.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 8.881092071533203e-06, "global_fisher_kl_divergence/max": 8.881092071533203e-06, "global_fisher_kl_divergence/median": 8.881092071533203e-06, "global_fisher_kl_divergence/min": 8.881092071533203e-06, "global_fisher_kl_divergence/p25": 8.881092071533203e-06, "global_fisher_kl_divergence/p75": 8.881092071533203e-06, "global_fisher_kl_divergence/p85": 8.881092071533203e-06, "global_fisher_kl_divergence/p90": 8.881092071533203e-06, "global_fisher_kl_divergence/p95": 8.881092071533203e-06, "global_fisher_kl_divergence/p99": 8.881092071533203e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.197265625, "global_full_update_term/max": 0.197265625, "global_full_update_term/median": 0.197265625, "global_full_update_term/min": 0.197265625, "global_full_update_term/p25": 0.197265625, "global_full_update_term/p75": 0.197265625, "global_full_update_term/p85": 0.197265625, "global_full_update_term/p90": 0.197265625, "global_full_update_term/p95": 0.197265625, "global_full_update_term/p99": 0.197265625, "global_full_update_term/var": NaN, "global_hessian_coeff": 25984.0, "global_hessian_coeff/max": 25984.0, "global_hessian_coeff/median": 25984.0, "global_hessian_coeff/min": 25984.0, "global_hessian_coeff/p25": 25984.0, "global_hessian_coeff/p75": 25984.0, "global_hessian_coeff/p99": 25984.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 25984.0, "global_hessian_coeff_abs/max": 25984.0, "global_hessian_coeff_abs/median": 25984.0, "global_hessian_coeff_abs/min": 25984.0, "global_hessian_coeff_abs/p25": 25984.0, "global_hessian_coeff_abs/p75": 25984.0, "global_hessian_coeff_abs/p99": 25984.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.10472825169563293, "learning_rate": 1.4981730376948682e-05, "loss": -0.5833, "masked_global_fisher_curvature": 680.0, "masked_global_fisher_curvature/max": 680.0, "masked_global_fisher_curvature/median": 680.0, "masked_global_fisher_curvature/min": 680.0, "masked_global_fisher_curvature/p25": 680.0, "masked_global_fisher_curvature/p75": 680.0, "masked_global_fisher_curvature/p85": 680.0, "masked_global_fisher_curvature/p90": 680.0, "masked_global_fisher_curvature/p95": 680.0, "masked_global_fisher_curvature/p99": 680.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/max": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/median": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/min": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/p25": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/p75": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/p85": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/p90": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/p95": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/p99": 7.636845111846924e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00567626953125, "masked_global_full_update_term/max": 0.00567626953125, "masked_global_full_update_term/median": 0.00567626953125, "masked_global_full_update_term/min": 0.00567626953125, "masked_global_full_update_term/p25": 0.00567626953125, "masked_global_full_update_term/p75": 0.00567626953125, "masked_global_full_update_term/p85": 0.00567626953125, "masked_global_full_update_term/p90": 0.00567626953125, "masked_global_full_update_term/p95": 0.00567626953125, "masked_global_full_update_term/p99": 0.00567626953125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1520.0, "masked_global_hessian_coeff/max": -1520.0, "masked_global_hessian_coeff/median": -1520.0, "masked_global_hessian_coeff/min": -1520.0, "masked_global_hessian_coeff/p25": -1520.0, "masked_global_hessian_coeff/p75": -1520.0, "masked_global_hessian_coeff/p99": -1520.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1520.0, "masked_global_hessian_coeff_abs/max": 1520.0, "masked_global_hessian_coeff_abs/median": 1520.0, "masked_global_hessian_coeff_abs/min": 1520.0, "masked_global_hessian_coeff_abs/p25": 1520.0, "masked_global_hessian_coeff_abs/p75": 1520.0, "masked_global_hessian_coeff_abs/p99": 1520.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.9474080801010132, "masked_per_sentence_gradient_norm/max": 10.6875, "masked_per_sentence_gradient_norm/median": 0.91796875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.2265625, "masked_per_sentence_gradient_norm/var": 5.855546474456787, "masked_per_token_gradient_norm": 0.08749444037675858, "masked_per_token_gradient_norm/max": 18.375, "masked_per_token_gradient_norm/median": 2.6858515411731787e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.1920928955078125e-07, "masked_per_token_gradient_norm/var": 0.5506075620651245, "masked_sentence_fisher_curvature": 612.3616943359375, "masked_sentence_fisher_curvature/max": 2672.0, "masked_sentence_fisher_curvature/median": 390.0, "masked_sentence_fisher_curvature/min": 4.46875, "masked_sentence_fisher_curvature/p25": 243.25, "masked_sentence_fisher_curvature/p75": 686.0, "masked_sentence_fisher_curvature/p85": 1027.0, "masked_sentence_fisher_curvature/p90": 1620.0, "masked_sentence_fisher_curvature/p95": 1858.0, "masked_sentence_fisher_curvature/p99": 2352.801025390625, "masked_sentence_fisher_curvature/var": 323188.0, "masked_sentence_fisher_kl_divergence": 6.881366232391883e-08, "masked_sentence_fisher_kl_divergence/max": 2.998858690261841e-07, "masked_sentence_fisher_kl_divergence/median": 4.377216100692749e-08, "masked_sentence_fisher_kl_divergence/min": 5.020410753786564e-10, "masked_sentence_fisher_kl_divergence/p25": 2.7386704459786415e-08, "masked_sentence_fisher_kl_divergence/p75": 7.706694304943085e-08, "masked_sentence_fisher_kl_divergence/p85": 1.153675839304924e-07, "masked_sentence_fisher_kl_divergence/p90": 1.8207356333732605e-07, "masked_sentence_fisher_kl_divergence/p95": 2.0884908735752106e-07, "masked_sentence_fisher_kl_divergence/p99": 2.644957248776336e-07, "masked_sentence_fisher_kl_divergence/var": 4.078783819483518e-15, "masked_sentence_full_gradient_variance/max_squared_error": 9.42874526977539, "masked_sentence_full_gradient_variance/metric": 9.42874526977539, "masked_sentence_full_gradient_variance/p75": 9.42874526977539, "masked_sentence_full_gradient_variance/p90": 9.42874526977539, "masked_sentence_full_gradient_variance/p95": 9.42874526977539, "masked_sentence_full_gradient_variance/p99": 9.42874526977539, "masked_sentence_full_update_term": 0.0015836060047149658, "masked_sentence_full_update_term/max": 0.009033203125, "masked_sentence_full_update_term/median": 0.00054931640625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00279998779296875, "masked_sentence_full_update_term/p85": 0.00363922119140625, "masked_sentence_full_update_term/p90": 0.0044097900390625, "masked_sentence_full_update_term/p95": 0.0052947998046875, "masked_sentence_full_update_term/p99": 0.006655891425907612, "masked_sentence_full_update_term/var": 3.8179837247298565e-06, "masked_sentence_hessian_coeff": -6772.9169921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -4512.0, "masked_sentence_hessian_coeff/min": -27648.0, "masked_sentence_hessian_coeff/p25": -11808.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 55398916.0, "masked_sentence_hessian_coeff_abs": 6772.9169921875, "masked_sentence_hessian_coeff_abs/max": 27648.0, "masked_sentence_hessian_coeff_abs/median": 4256.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 11808.0, "masked_sentence_hessian_coeff_abs/p99": 23635.212890625, "masked_sentence_hessian_coeff_abs/var": 55398916.0, "masked_token_fisher_curvature": 834.6217651367188, "masked_token_fisher_curvature/max": 88576.0, "masked_token_fisher_curvature/median": 7.105427357601002e-15, "masked_token_fisher_curvature/min": 1.680956921675871e-36, "masked_token_fisher_curvature/p25": 3.1001405869507392e-19, "masked_token_fisher_curvature/p75": 1.4551915228366852e-09, "masked_token_fisher_curvature/p85": 6.884336471557617e-06, "masked_token_fisher_curvature/p90": 0.01513671875, "masked_token_fisher_curvature/p95": 342.0, "masked_token_fisher_curvature/p99": 31744.0, "masked_token_fisher_curvature/var": 37866880.0, "masked_token_fisher_kl_divergence": 9.381967913668632e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 7.981000441429603e-25, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.490709505602977e-29, "masked_token_fisher_kl_divergence/p75": 1.6347735882007997e-19, "masked_token_fisher_kl_divergence/p85": 7.73686670285656e-16, "masked_token_fisher_kl_divergence/p90": 1.7053025658242404e-12, "masked_token_fisher_kl_divergence/p95": 3.841705620288849e-08, "masked_token_fisher_kl_divergence/p99": 3.5762786865234375e-06, "masked_token_fisher_kl_divergence/var": 4.784703991518591e-13, "masked_token_full_update_term": 3.147108873235993e-05, "masked_token_full_update_term/max": 0.004302978515625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.473592758178711e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.1823875212257917e-14, "masked_token_full_update_term/p85": 9.058087613311727e-12, "masked_token_full_update_term/p90": 5.929905455559492e-10, "masked_token_full_update_term/p95": 4.609028110280633e-07, "masked_token_full_update_term/p99": 0.00113677978515625, "masked_token_full_update_term/var": 7.152512893071616e-08, "masked_token_hessian_coeff": -13919.1083984375, "masked_token_hessian_coeff/max": 1928.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -2228224.0, "masked_token_hessian_coeff/p25": -6.5267086029052734e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.055881500244140625, "masked_token_hessian_coeff/var": 14554089472.0, "masked_token_hessian_coeff_abs": 13919.3212890625, "masked_token_hessian_coeff_abs/max": 2228224.0, "masked_token_hessian_coeff_abs/median": 9.458744898438454e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 8.106231689453125e-05, "masked_token_hessian_coeff_abs/p99": 497440.0, "masked_token_hessian_coeff_abs/var": 14554082304.0, "mean_logprobs": -0.04833984375, "mean_logprobs/var": 0.0025177001953125, "num_completions/total": 1152, "per_sentence_gradient_norm": 50.73698043823242, "per_sentence_gradient_norm/max": 223.0, "per_sentence_gradient_norm/median": 22.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 87.125, "per_sentence_gradient_norm/var": 3538.8828125, "per_token_feature_norm": 193.9803009033203, "per_token_feature_norm/max": 334.0, "per_token_feature_norm/median": 194.0, "per_token_feature_norm/min": 59.0, "per_token_feature_norm/p25": 181.0, "per_token_feature_norm/p75": 210.0, "per_token_feature_norm/var": 818.967041015625, "per_token_gradient_norm": 3.068265438079834, "per_token_gradient_norm/max": 380.0, "per_token_gradient_norm/median": 6.252776074688882e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.4400651454925537e-07, "per_token_gradient_norm/var": 480.02191162109375, "per_token_policy_error_norm": 0.027197210118174553, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0247499980032444, "policy_entropy": 0.059223923832178116, "policy_entropy/max": 3.59375, "policy_entropy/median": 1.9674189388751984e-08, "policy_entropy/min": 4.065758146820642e-18, "policy_entropy/p25": 1.7189449863508344e-10, "policy_entropy/p75": 8.761882781982422e-06, "policy_entropy/var": 0.07152112573385239, "policy_loss": -0.5833333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24561403691768646, "policy_sharpness": 9.067142486572266, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 6.1881103515625, "reward": 0.5833333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24561403691768646, "rewards/accuracy_reward": 0.5833333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24561403691768646, "sentence_fisher_curvature": 206228.671875, "sentence_fisher_curvature/max": 1007616.0, "sentence_fisher_curvature/median": 95744.0, "sentence_fisher_curvature/min": 488.0, "sentence_fisher_curvature/p25": 8736.0, "sentence_fisher_curvature/p75": 347648.0, "sentence_fisher_curvature/p85": 482304.0, "sentence_fisher_curvature/p90": 550912.0, "sentence_fisher_curvature/p95": 661504.0, "sentence_fisher_curvature/p99": 859750.875, "sentence_fisher_curvature/var": 57058082816.0, "sentence_fisher_kl_divergence": 2.319070881640073e-05, "sentence_fisher_kl_divergence/max": 0.00011348724365234375, "sentence_fisher_kl_divergence/median": 1.0788440704345703e-05, "sentence_fisher_kl_divergence/min": 5.494803190231323e-08, "sentence_fisher_kl_divergence/p25": 9.806826710700989e-07, "sentence_fisher_kl_divergence/p75": 3.904104232788086e-05, "sentence_fisher_kl_divergence/p85": 5.429983139038086e-05, "sentence_fisher_kl_divergence/p90": 6.175041198730469e-05, "sentence_fisher_kl_divergence/p95": 7.450580596923828e-05, "sentence_fisher_kl_divergence/p99": 9.672647138359025e-05, "sentence_fisher_kl_divergence/var": 7.22135184982875e-10, "sentence_full_gradient_variance/max_squared_error": 5996.76123046875, "sentence_full_gradient_variance/metric": 5996.76123046875, "sentence_full_gradient_variance/p75": 5996.76123046875, "sentence_full_gradient_variance/p90": 5996.76123046875, "sentence_full_gradient_variance/p95": 5996.76123046875, "sentence_full_gradient_variance/p99": 5996.76123046875, "sentence_full_update_term": 0.05648040771484375, "sentence_full_update_term/max": 0.2734375, "sentence_full_update_term/median": 0.0189208984375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0758056640625, "sentence_full_update_term/p85": 0.14306640625, "sentence_full_update_term/p90": 0.173828125, "sentence_full_update_term/p95": 0.242431640625, "sentence_full_update_term/p99": 0.2715820372104645, "sentence_full_update_term/var": 0.005850325804203749, "sentence_hessian_coeff": 62642.8359375, "sentence_hessian_coeff/max": 440320.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -85504.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 117376.0, "sentence_hessian_coeff/p99": 389734.5625, "sentence_hessian_coeff/var": 10829203456.0, "sentence_hessian_coeff_abs": 71138.8359375, "sentence_hessian_coeff_abs/max": 440320.0, "sentence_hessian_coeff_abs/median": 23808.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 117376.0, "sentence_hessian_coeff_abs/p99": 389734.5625, "sentence_hessian_coeff_abs/var": 9680629760.0, "step": 12, "token_fisher_curvature": 306955.8125, "token_fisher_curvature/max": 65536000.0, "token_fisher_curvature/median": 1.554312234475219e-14, "token_fisher_curvature/min": 1.680956921675871e-36, "token_fisher_curvature/p25": 4.692562527788824e-19, "token_fisher_curvature/p75": 1.257285475730896e-08, "token_fisher_curvature/p85": 0.000774383544921875, "token_fisher_curvature/p90": 17.75, "token_fisher_curvature/p95": 22784.0, "token_fisher_curvature/p99": 9175040.0, "token_fisher_curvature/var": 8263549583360.0, "token_fisher_kl_divergence": 3.450462827458978e-05, "token_fisher_kl_divergence/max": 0.007354736328125, "token_fisher_kl_divergence/median": 1.7448341046040428e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.265646542350254e-29, "token_fisher_kl_divergence/p75": 1.4162390878091902e-18, "token_fisher_kl_divergence/p85": 8.704148513061227e-14, "token_fisher_kl_divergence/p90": 1.9936123862862587e-09, "token_fisher_kl_divergence/p95": 2.562999725341797e-06, "token_fisher_kl_divergence/p99": 0.00102996826171875, "token_fisher_kl_divergence/var": 1.0440905384712096e-07, "token_full_update_term": 0.0011694260174408555, "token_full_update_term/max": 0.12890625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.473592758178711e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.306866057708248e-14, "token_full_update_term/p85": 6.139089236967266e-11, "token_full_update_term/p90": 1.1757947504520416e-08, "token_full_update_term/p95": 0.00028228759765625, "token_full_update_term/p99": 0.04541015625, "token_full_update_term/var": 6.990237307036296e-05, "token_hessian_coeff": 77637.78125, "token_hessian_coeff/max": 65273856.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -8388608.0, "token_hessian_coeff/p25": -1.1146068572998047e-05, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 11.89453125, "token_hessian_coeff/var": 5612251906048.0, "token_hessian_coeff_abs": 271436.15625, "token_hessian_coeff_abs/max": 65273856.0, "token_hessian_coeff_abs/median": 2.4156179279088974e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.000171661376953125, "token_hessian_coeff_abs/p99": 6684672.0, "token_hessian_coeff_abs/var": 5544600928256.0 }, { "accuracy_reward": 0.59375, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24375000596046448, "adam_stats/lm_head/lr_effective_max": 7.979057409102097e-05, "adam_stats/lm_head/lr_effective_mean": -9.473711745644486e-12, "adam_stats/lm_head/lr_effective_min": -8.639464795123786e-05, "adam_stats/lm_head/lr_effective_std": 2.0754041543114e-06, "adam_stats/lr_effective_max": 9.734580089570954e-05, "adam_stats/lr_effective_mean": -2.903220441830001e-10, "adam_stats/lr_effective_min": -9.654726454755291e-05, "adam_stats/m_t_max": 0.011140333488583565, "adam_stats/m_t_mean": -6.316249606674873e-11, "adam_stats/m_t_min": -0.0071877362206578255, "adam_stats/v_t_max": 2.6471067030797713e-05, "adam_stats/v_t_mean": 1.7313531251034187e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.59375, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24375000596046448, "all_logprobs": -0.04318754002451897, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.875, "all_logprobs/p1": -1.3125, "all_logprobs/p10": -0.002471923828125, "all_logprobs/p25": -9.5367431640625e-07, "all_logprobs/p5": -0.09619140625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.08138498663902283, "clip_ratio": 0.0, "completion_length": 811.09375, "completion_length/correct": 709.7719116210938, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 689.0, "completion_length/correct/min": 253.0, "completion_length/correct/p25": 425.0, "completion_length/correct/p75": 1024.0, "completion_length/correct/var": 72950.7109375, "completion_length/incorrect": 959.1795043945312, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 379.0, "completion_length/incorrect/p25": 1024.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 20514.783203125, "completion_length/max": 1024.0, "completion_length/median": 1024.0, "completion_length/min": 253.0, "completion_length/p25": 617.0, "completion_length/p75": 1024.0, "completion_length/var": 66370.703125, "curvature_clip_ratio_token_fisher": 0.033301226794719696, "curvature_clip_ratio_token_hessian": 0.01971360668540001, "curvature_clip_ratio_total_fisher": 0.033301226794719696, "curvature_clip_ratio_total_full": 0.033301226794719696, "curvature_clip_ratio_total_hessian": 0.01971360668540001, "epoch": 0.0208, "feature_vector_variance/max_squared_error": 152039.71875, "feature_vector_variance/metric": 29826.65234375, "generated_tokens/total": 901032.0, "global_fisher_curvature": 80384.0, "global_fisher_curvature/max": 80384.0, "global_fisher_curvature/median": 80384.0, "global_fisher_curvature/min": 80384.0, "global_fisher_curvature/p25": 80384.0, "global_fisher_curvature/p75": 80384.0, "global_fisher_curvature/p85": 80384.0, "global_fisher_curvature/p90": 80384.0, "global_fisher_curvature/p95": 80384.0, "global_fisher_curvature/p99": 80384.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 9.000301361083984e-06, "global_fisher_kl_divergence/max": 9.000301361083984e-06, "global_fisher_kl_divergence/median": 9.000301361083984e-06, "global_fisher_kl_divergence/min": 9.000301361083984e-06, "global_fisher_kl_divergence/p25": 9.000301361083984e-06, "global_fisher_kl_divergence/p75": 9.000301361083984e-06, "global_fisher_kl_divergence/p85": 9.000301361083984e-06, "global_fisher_kl_divergence/p90": 9.000301361083984e-06, "global_fisher_kl_divergence/p95": 9.000301361083984e-06, "global_fisher_kl_divergence/p99": 9.000301361083984e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.146484375, "global_full_update_term/max": 0.146484375, "global_full_update_term/median": 0.146484375, "global_full_update_term/min": 0.146484375, "global_full_update_term/p25": 0.146484375, "global_full_update_term/p75": 0.146484375, "global_full_update_term/p85": 0.146484375, "global_full_update_term/p90": 0.146484375, "global_full_update_term/p95": 0.146484375, "global_full_update_term/p99": 0.146484375, "global_full_update_term/var": NaN, "global_hessian_coeff": 24320.0, "global_hessian_coeff/max": 24320.0, "global_hessian_coeff/median": 24320.0, "global_hessian_coeff/min": 24320.0, "global_hessian_coeff/p25": 24320.0, "global_hessian_coeff/p75": 24320.0, "global_hessian_coeff/p99": 24320.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 24320.0, "global_hessian_coeff_abs/max": 24320.0, "global_hessian_coeff_abs/median": 24320.0, "global_hessian_coeff_abs/min": 24320.0, "global_hessian_coeff_abs/p25": 24320.0, "global_hessian_coeff_abs/p75": 24320.0, "global_hessian_coeff_abs/p99": 24320.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04284278675913811, "learning_rate": 1.495891421526205e-05, "loss": -0.5938, "masked_global_fisher_curvature": 656.0, "masked_global_fisher_curvature/max": 656.0, "masked_global_fisher_curvature/median": 656.0, "masked_global_fisher_curvature/min": 656.0, "masked_global_fisher_curvature/p25": 656.0, "masked_global_fisher_curvature/p75": 656.0, "masked_global_fisher_curvature/p85": 656.0, "masked_global_fisher_curvature/p90": 656.0, "masked_global_fisher_curvature/p95": 656.0, "masked_global_fisher_curvature/p99": 656.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/max": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/median": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/min": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/p25": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/p75": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/p85": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/p90": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/p95": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/p99": 7.35744833946228e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.004730224609375, "masked_global_full_update_term/max": 0.004730224609375, "masked_global_full_update_term/median": 0.004730224609375, "masked_global_full_update_term/min": 0.004730224609375, "masked_global_full_update_term/p25": 0.004730224609375, "masked_global_full_update_term/p75": 0.004730224609375, "masked_global_full_update_term/p85": 0.004730224609375, "masked_global_full_update_term/p90": 0.004730224609375, "masked_global_full_update_term/p95": 0.004730224609375, "masked_global_full_update_term/p99": 0.004730224609375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1632.0, "masked_global_hessian_coeff/max": -1632.0, "masked_global_hessian_coeff/median": -1632.0, "masked_global_hessian_coeff/min": -1632.0, "masked_global_hessian_coeff/p25": -1632.0, "masked_global_hessian_coeff/p75": -1632.0, "masked_global_hessian_coeff/p99": -1632.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1632.0, "masked_global_hessian_coeff_abs/max": 1632.0, "masked_global_hessian_coeff_abs/median": 1632.0, "masked_global_hessian_coeff_abs/min": 1632.0, "masked_global_hessian_coeff_abs/p25": 1632.0, "masked_global_hessian_coeff_abs/p75": 1632.0, "masked_global_hessian_coeff_abs/p99": 1632.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.3241782188415527, "masked_per_sentence_gradient_norm/max": 9.5, "masked_per_sentence_gradient_norm/median": 1.703125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.96875, "masked_per_sentence_gradient_norm/var": 6.6621551513671875, "masked_per_token_gradient_norm": 0.07024990022182465, "masked_per_token_gradient_norm/max": 19.375, "masked_per_token_gradient_norm/median": 8.926193117986259e-14, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.2293457984924316e-07, "masked_per_token_gradient_norm/var": 0.4283917248249054, "masked_sentence_fisher_curvature": 546.9212646484375, "masked_sentence_fisher_curvature/max": 2240.0, "masked_sentence_fisher_curvature/median": 380.0, "masked_sentence_fisher_curvature/min": 9.9375, "masked_sentence_fisher_curvature/p25": 260.5, "masked_sentence_fisher_curvature/p75": 735.0, "masked_sentence_fisher_curvature/p85": 975.0, "masked_sentence_fisher_curvature/p90": 1216.0, "masked_sentence_fisher_curvature/p95": 1486.0, "masked_sentence_fisher_curvature/p99": 1738.401611328125, "masked_sentence_fisher_curvature/var": 191118.921875, "masked_sentence_fisher_kl_divergence": 6.137187114063636e-08, "masked_sentence_fisher_kl_divergence/max": 2.514570951461792e-07, "masked_sentence_fisher_kl_divergence/median": 4.260800778865814e-08, "masked_sentence_fisher_kl_divergence/min": 1.1132215149700642e-09, "masked_sentence_fisher_kl_divergence/p25": 2.9278453439474106e-08, "masked_sentence_fisher_kl_divergence/p75": 8.230563253164291e-08, "masked_sentence_fisher_kl_divergence/p85": 1.0943040251731873e-07, "masked_sentence_fisher_kl_divergence/p90": 1.364387571811676e-07, "masked_sentence_fisher_kl_divergence/p95": 1.6647391021251678e-07, "masked_sentence_fisher_kl_divergence/p99": 1.9483286450849846e-07, "masked_sentence_fisher_kl_divergence/var": 2.4051290896631413e-15, "masked_sentence_full_gradient_variance/max_squared_error": 11.664482116699219, "masked_sentence_full_gradient_variance/metric": 11.664482116699219, "masked_sentence_full_gradient_variance/p75": 11.664482116699219, "masked_sentence_full_gradient_variance/p90": 11.664482116699219, "masked_sentence_full_gradient_variance/p95": 11.664482116699219, "masked_sentence_full_gradient_variance/p99": 11.664482116699219, "masked_sentence_full_update_term": 0.001881331205368042, "masked_sentence_full_update_term/max": 0.00860595703125, "masked_sentence_full_update_term/median": 0.00122833251953125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.002971649169921875, "masked_sentence_full_update_term/p85": 0.00418853759765625, "masked_sentence_full_update_term/p90": 0.0052490234375, "masked_sentence_full_update_term/p95": 0.00620269775390625, "masked_sentence_full_update_term/p99": 0.007591250818222761, "masked_sentence_full_update_term/var": 4.6790551095909905e-06, "masked_sentence_hessian_coeff": -6956.5, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -6976.0, "masked_sentence_hessian_coeff/min": -23808.0, "masked_sentence_hessian_coeff/p25": -12112.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 48634760.0, "masked_sentence_hessian_coeff_abs": 6956.5, "masked_sentence_hessian_coeff_abs/max": 23808.0, "masked_sentence_hessian_coeff_abs/median": 6560.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 12112.0, "masked_sentence_hessian_coeff_abs/p99": 22956.802734375, "masked_sentence_hessian_coeff_abs/var": 48634760.0, "masked_token_fisher_curvature": 735.391845703125, "masked_token_fisher_curvature/max": 88576.0, "masked_token_fisher_curvature/median": 4.085620730620576e-14, "masked_token_fisher_curvature/min": 2.5860875718090325e-37, "masked_token_fisher_curvature/p25": 7.928228386300251e-19, "masked_token_fisher_curvature/p75": 7.043126970529556e-09, "masked_token_fisher_curvature/p85": 1.3768672943115234e-05, "masked_token_fisher_curvature/p90": 0.007049560546875, "masked_token_fisher_curvature/p95": 104.0, "masked_token_fisher_curvature/p99": 26880.0, "masked_token_fisher_curvature/var": 32479430.0, "masked_token_fisher_kl_divergence": 8.253676497815832e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 4.575342763183934e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 8.914128228997433e-29, "masked_token_fisher_kl_divergence/p75": 7.894347068410079e-19, "masked_token_fisher_kl_divergence/p85": 1.547373340571312e-15, "masked_token_fisher_kl_divergence/p90": 7.922551503725117e-13, "masked_token_fisher_kl_divergence/p95": 1.1699739843606949e-08, "masked_token_fisher_kl_divergence/p99": 3.0100345611572266e-06, "masked_token_fisher_kl_divergence/var": 4.091721536245152e-13, "masked_token_full_update_term": 2.5994915631599724e-05, "masked_token_full_update_term/max": 0.0042724609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.8849968910217285e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.3836154444391013e-14, "masked_token_full_update_term/p85": 1.5916157281026244e-11, "masked_token_full_update_term/p90": 8.258211892098188e-10, "masked_token_full_update_term/p95": 2.285887603648007e-07, "masked_token_full_update_term/p99": 0.000820159912109375, "masked_token_full_update_term/var": 5.8609000319620463e-08, "masked_token_hessian_coeff": -11799.119140625, "masked_token_hessian_coeff/max": 148.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2162688.0, "masked_token_hessian_coeff/p25": -4.678964614868164e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.08056640625, "masked_token_hessian_coeff/var": 12516977664.0, "masked_token_hessian_coeff_abs": 11799.2001953125, "masked_token_hessian_coeff_abs/max": 2162688.0, "masked_token_hessian_coeff_abs/median": 1.887201506178826e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 8.249282836914062e-05, "masked_token_hessian_coeff_abs/p99": 397312.0, "masked_token_hessian_coeff_abs/var": 12516975616.0, "mean_logprobs": -0.037841796875, "mean_logprobs/var": 0.000820159912109375, "num_completions/total": 1248, "per_sentence_gradient_norm": 62.04296875, "per_sentence_gradient_norm/max": 296.0, "per_sentence_gradient_norm/median": 42.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 88.875, "per_sentence_gradient_norm/var": 5299.853515625, "per_token_feature_norm": 189.93643188476562, "per_token_feature_norm/max": 330.0, "per_token_feature_norm/median": 189.0, "per_token_feature_norm/min": 67.0, "per_token_feature_norm/p25": 177.0, "per_token_feature_norm/p75": 203.0, "per_token_feature_norm/var": 700.2454223632812, "per_token_gradient_norm": 2.4873640537261963, "per_token_gradient_norm/max": 392.0, "per_token_gradient_norm/median": 3.9968028886505635e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.384185791015625e-07, "per_token_gradient_norm/var": 386.0641174316406, "per_token_policy_error_norm": 0.022253811359405518, "per_token_policy_error_norm/max": 1.9921875, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.020412182435393333, "policy_entropy": 0.04729127883911133, "policy_entropy/max": 3.609375, "policy_entropy/median": 4.6798959374427795e-08, "policy_entropy/min": 3.3745792618611326e-18, "policy_entropy/p25": 3.0377123039215803e-10, "policy_entropy/p75": 1.4781951904296875e-05, "policy_entropy/var": 0.0477277897298336, "policy_loss": -0.59375, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24375000596046448, "policy_sharpness": 9.127165794372559, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 5.7238593101501465, "reward": 0.59375, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24375000596046448, "rewards/accuracy_reward": 0.59375, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24375000596046448, "sentence_fisher_curvature": 197124.84375, "sentence_fisher_curvature/max": 954368.0, "sentence_fisher_curvature/median": 144384.0, "sentence_fisher_curvature/min": 816.0, "sentence_fisher_curvature/p25": 6904.0, "sentence_fisher_curvature/p75": 343040.0, "sentence_fisher_curvature/p85": 405504.0, "sentence_fisher_curvature/p90": 501760.0, "sentence_fisher_curvature/p95": 636928.0, "sentence_fisher_curvature/p99": 837632.375, "sentence_fisher_curvature/var": 49395884032.0, "sentence_fisher_kl_divergence": 2.213144034612924e-05, "sentence_fisher_kl_divergence/max": 0.00010728836059570312, "sentence_fisher_kl_divergence/median": 1.621246337890625e-05, "sentence_fisher_kl_divergence/min": 9.173527359962463e-08, "sentence_fisher_kl_divergence/p25": 7.739290595054626e-07, "sentence_fisher_kl_divergence/p75": 3.8504600524902344e-05, "sentence_fisher_kl_divergence/p85": 4.553794860839844e-05, "sentence_fisher_kl_divergence/p90": 5.626678466796875e-05, "sentence_fisher_kl_divergence/p95": 7.164478302001953e-05, "sentence_fisher_kl_divergence/p99": 9.41515390877612e-05, "sentence_fisher_kl_divergence/var": 6.231828386660254e-10, "sentence_full_gradient_variance/max_squared_error": 8920.9765625, "sentence_full_gradient_variance/metric": 8920.9765625, "sentence_full_gradient_variance/p75": 8920.9765625, "sentence_full_gradient_variance/p90": 8920.9765625, "sentence_full_gradient_variance/p95": 8920.9765625, "sentence_full_gradient_variance/p99": 8920.9765625, "sentence_full_update_term": 0.06052907556295395, "sentence_full_update_term/max": 0.279296875, "sentence_full_update_term/median": 0.036376953125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0980224609375, "sentence_full_update_term/p85": 0.141357421875, "sentence_full_update_term/p90": 0.17431640625, "sentence_full_update_term/p95": 0.208984375, "sentence_full_update_term/p99": 0.2625977098941803, "sentence_full_update_term/var": 0.005371286999434233, "sentence_hessian_coeff": 47926.66796875, "sentence_hessian_coeff/max": 503808.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -112640.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 81280.0, "sentence_hessian_coeff/p99": 424038.65625, "sentence_hessian_coeff/var": 12056158208.0, "sentence_hessian_coeff_abs": 67270.671875, "sentence_hessian_coeff_abs/max": 503808.0, "sentence_hessian_coeff_abs/median": 32640.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 91392.0, "sentence_hessian_coeff_abs/p99": 424038.65625, "sentence_hessian_coeff_abs/var": 9804324864.0, "step": 13, "token_fisher_curvature": 245236.25, "token_fisher_curvature/max": 66060288.0, "token_fisher_curvature/median": 8.43769498715119e-14, "token_fisher_curvature/min": 2.5860875718090325e-37, "token_fisher_curvature/p25": 1.2400562347802957e-18, "token_fisher_curvature/p75": 4.0046870708465576e-08, "token_fisher_curvature/p85": 0.0003681182861328125, "token_fisher_curvature/p90": 3.375, "token_fisher_curvature/p95": 11264.0, "token_fisher_curvature/p99": 5767168.0, "token_fisher_curvature/var": 6665554362368.0, "token_fisher_kl_divergence": 2.752131513261702e-05, "token_fisher_kl_divergence/max": 0.007415771484375, "token_fisher_kl_divergence/median": 9.460878256075254e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.3883951931889808e-28, "token_fisher_kl_divergence/p75": 4.4994390158148434e-18, "token_fisher_kl_divergence/p85": 4.1300296516055823e-14, "token_fisher_kl_divergence/p90": 3.7834979593753815e-10, "token_fisher_kl_divergence/p95": 1.2665987014770508e-06, "token_fisher_kl_divergence/p99": 0.00064849853515625, "token_fisher_kl_divergence/var": 8.393607231482747e-08, "token_full_update_term": 0.0009442503214813769, "token_full_update_term/max": 0.12890625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.8849968910217285e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.950795411990839e-14, "token_full_update_term/p85": 7.912603905424476e-11, "token_full_update_term/p90": 7.683411240577698e-09, "token_full_update_term/p95": 1.9311904907226562e-05, "token_full_update_term/p99": 0.03564453125, "token_full_update_term/var": 5.5783842981327325e-05, "token_hessian_coeff": 48265.37109375, "token_hessian_coeff/max": 66060288.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -8650752.0, "token_hessian_coeff/p25": -8.52346420288086e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 1.71875, "token_hessian_coeff/var": 4584131002368.0, "token_hessian_coeff_abs": 222867.640625, "token_hessian_coeff_abs/max": 66060288.0, "token_hessian_coeff_abs/median": 1.1141310096718371e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.00016498565673828125, "token_hessian_coeff_abs/p99": 5799936.0, "token_hessian_coeff_abs/var": 4536790417408.0 }, { "accuracy_reward": 0.6041666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24166668951511383, "adam_stats/lm_head/lr_effective_max": 8.295354928122833e-05, "adam_stats/lm_head/lr_effective_mean": -3.4505943241613934e-11, "adam_stats/lm_head/lr_effective_min": -8.634686673758551e-05, "adam_stats/lm_head/lr_effective_std": 2.0219156340317568e-06, "adam_stats/lr_effective_max": 9.29476591409184e-05, "adam_stats/lr_effective_mean": -2.78211870208267e-10, "adam_stats/lr_effective_min": -9.357310045743361e-05, "adam_stats/m_t_max": 0.009998834691941738, "adam_stats/m_t_mean": -5.6083276911422786e-11, "adam_stats/m_t_min": -0.0064704883843660355, "adam_stats/v_t_max": 2.644467167556286e-05, "adam_stats/v_t_mean": 1.729864949201465e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6041666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24166668951511383, "all_logprobs": -0.03833167999982834, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.0, "all_logprobs/p1": -1.2734375, "all_logprobs/p10": -0.00046539306640625, "all_logprobs/p25": -7.152557373046875e-07, "all_logprobs/p5": -0.048583984375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.07241319864988327, "clip_ratio": 0.0, "completion_length": 591.3125, "completion_length/correct": 460.4137878417969, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 355.0, "completion_length/correct/min": 230.0, "completion_length/correct/p25": 264.25, "completion_length/correct/p75": 545.75, "completion_length/correct/var": 61442.00390625, "completion_length/incorrect": 791.1052856445312, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 920.0, "completion_length/incorrect/min": 253.0, "completion_length/incorrect/p25": 528.75, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 74615.171875, "completion_length/max": 1024.0, "completion_length/median": 493.0, "completion_length/min": 230.0, "completion_length/p25": 319.5, "completion_length/p75": 944.25, "completion_length/var": 92353.75, "curvature_clip_ratio_token_fisher": 0.023024344816803932, "curvature_clip_ratio_token_hessian": 0.01220801193267107, "curvature_clip_ratio_total_fisher": 0.023024344816803932, "curvature_clip_ratio_total_full": 0.023024344816803932, "curvature_clip_ratio_total_hessian": 0.01220801193267107, "epoch": 0.0224, "feature_vector_variance/max_squared_error": 147087.484375, "feature_vector_variance/metric": 28285.333984375, "generated_tokens/total": 957798.0, "global_fisher_curvature": 76800.0, "global_fisher_curvature/max": 76800.0, "global_fisher_curvature/median": 76800.0, "global_fisher_curvature/min": 76800.0, "global_fisher_curvature/p25": 76800.0, "global_fisher_curvature/p75": 76800.0, "global_fisher_curvature/p85": 76800.0, "global_fisher_curvature/p90": 76800.0, "global_fisher_curvature/p95": 76800.0, "global_fisher_curvature/p99": 76800.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 8.58306884765625e-06, "global_fisher_kl_divergence/max": 8.58306884765625e-06, "global_fisher_kl_divergence/median": 8.58306884765625e-06, "global_fisher_kl_divergence/min": 8.58306884765625e-06, "global_fisher_kl_divergence/p25": 8.58306884765625e-06, "global_fisher_kl_divergence/p75": 8.58306884765625e-06, "global_fisher_kl_divergence/p85": 8.58306884765625e-06, "global_fisher_kl_divergence/p90": 8.58306884765625e-06, "global_fisher_kl_divergence/p95": 8.58306884765625e-06, "global_fisher_kl_divergence/p99": 8.58306884765625e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.1884765625, "global_full_update_term/max": 0.1884765625, "global_full_update_term/median": 0.1884765625, "global_full_update_term/min": 0.1884765625, "global_full_update_term/p25": 0.1884765625, "global_full_update_term/p75": 0.1884765625, "global_full_update_term/p85": 0.1884765625, "global_full_update_term/p90": 0.1884765625, "global_full_update_term/p95": 0.1884765625, "global_full_update_term/p99": 0.1884765625, "global_full_update_term/var": NaN, "global_hessian_coeff": 17792.0, "global_hessian_coeff/max": 17792.0, "global_hessian_coeff/median": 17792.0, "global_hessian_coeff/min": 17792.0, "global_hessian_coeff/p25": 17792.0, "global_hessian_coeff/p75": 17792.0, "global_hessian_coeff/p99": 17792.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 17792.0, "global_hessian_coeff_abs/max": 17792.0, "global_hessian_coeff_abs/median": 17792.0, "global_hessian_coeff_abs/min": 17792.0, "global_hessian_coeff_abs/p25": 17792.0, "global_hessian_coeff_abs/p75": 17792.0, "global_hessian_coeff_abs/p99": 17792.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.03286447003483772, "learning_rate": 1.4927010515561777e-05, "loss": -0.6042, "masked_global_fisher_curvature": 388.0, "masked_global_fisher_curvature/max": 388.0, "masked_global_fisher_curvature/median": 388.0, "masked_global_fisher_curvature/min": 388.0, "masked_global_fisher_curvature/p25": 388.0, "masked_global_fisher_curvature/p75": 388.0, "masked_global_fisher_curvature/p85": 388.0, "masked_global_fisher_curvature/p90": 388.0, "masked_global_fisher_curvature/p95": 388.0, "masked_global_fisher_curvature/p99": 388.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/max": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/median": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/min": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/p25": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/p75": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/p85": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/p90": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/p95": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/p99": 4.330649971961975e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0035247802734375, "masked_global_full_update_term/max": 0.0035247802734375, "masked_global_full_update_term/median": 0.0035247802734375, "masked_global_full_update_term/min": 0.0035247802734375, "masked_global_full_update_term/p25": 0.0035247802734375, "masked_global_full_update_term/p75": 0.0035247802734375, "masked_global_full_update_term/p85": 0.0035247802734375, "masked_global_full_update_term/p90": 0.0035247802734375, "masked_global_full_update_term/p95": 0.0035247802734375, "masked_global_full_update_term/p99": 0.0035247802734375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2544.0, "masked_global_hessian_coeff/max": -2544.0, "masked_global_hessian_coeff/median": -2544.0, "masked_global_hessian_coeff/min": -2544.0, "masked_global_hessian_coeff/p25": -2544.0, "masked_global_hessian_coeff/p75": -2544.0, "masked_global_hessian_coeff/p99": -2544.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2544.0, "masked_global_hessian_coeff_abs/max": 2544.0, "masked_global_hessian_coeff_abs/median": 2544.0, "masked_global_hessian_coeff_abs/min": 2544.0, "masked_global_hessian_coeff_abs/p25": 2544.0, "masked_global_hessian_coeff_abs/p75": 2544.0, "masked_global_hessian_coeff_abs/p99": 2544.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.9884847402572632, "masked_per_sentence_gradient_norm/max": 10.5625, "masked_per_sentence_gradient_norm/median": 0.70703125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.046875, "masked_per_sentence_gradient_norm/var": 6.859908103942871, "masked_per_token_gradient_norm": 0.047732967883348465, "masked_per_token_gradient_norm/max": 12.125, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.1082738637924194e-07, "masked_per_token_gradient_norm/var": 0.24580208957195282, "masked_sentence_fisher_curvature": 500.1224060058594, "masked_sentence_fisher_curvature/max": 2912.0, "masked_sentence_fisher_curvature/median": 356.0, "masked_sentence_fisher_curvature/min": 20.375, "masked_sentence_fisher_curvature/p25": 215.25, "masked_sentence_fisher_curvature/p75": 704.0, "masked_sentence_fisher_curvature/p85": 832.0, "masked_sentence_fisher_curvature/p90": 1018.0, "masked_sentence_fisher_curvature/p95": 1340.0, "masked_sentence_fisher_curvature/p99": 1787.20361328125, "masked_sentence_fisher_curvature/var": 203604.796875, "masked_sentence_fisher_kl_divergence": 5.594741736558717e-08, "masked_sentence_fisher_kl_divergence/max": 3.259629011154175e-07, "masked_sentence_fisher_kl_divergence/median": 3.9814040064811707e-08, "masked_sentence_fisher_kl_divergence/min": 2.2846506908535957e-09, "masked_sentence_fisher_kl_divergence/p25": 2.403976395726204e-08, "masked_sentence_fisher_kl_divergence/p75": 7.869675755500793e-08, "masked_sentence_fisher_kl_divergence/p85": 9.313225746154785e-08, "masked_sentence_fisher_kl_divergence/p90": 1.1385418474674225e-07, "masked_sentence_fisher_kl_divergence/p95": 1.501757651567459e-07, "masked_sentence_fisher_kl_divergence/p99": 2.0032788938806334e-07, "masked_sentence_fisher_kl_divergence/var": 2.5502569619886145e-15, "masked_sentence_full_gradient_variance/max_squared_error": 10.461272239685059, "masked_sentence_full_gradient_variance/metric": 10.461272239685059, "masked_sentence_full_gradient_variance/p75": 10.461272239685059, "masked_sentence_full_gradient_variance/p90": 10.461272239685059, "masked_sentence_full_gradient_variance/p95": 10.461272239685059, "masked_sentence_full_gradient_variance/p99": 10.461272239685059, "masked_sentence_full_update_term": 0.001450955867767334, "masked_sentence_full_update_term/max": 0.007598876953125, "masked_sentence_full_update_term/median": 0.000392913818359375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00243377685546875, "masked_sentence_full_update_term/p85": 0.00341796875, "masked_sentence_full_update_term/p90": 0.004364013671875, "masked_sentence_full_update_term/p95": 0.005126953125, "masked_sentence_full_update_term/p99": 0.006642153952270746, "masked_sentence_full_update_term/var": 3.281612180217053e-06, "masked_sentence_hessian_coeff": -8199.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -5312.0, "masked_sentence_hessian_coeff/min": -40960.0, "masked_sentence_hessian_coeff/p25": -14320.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 86201528.0, "masked_sentence_hessian_coeff_abs": 8199.0, "masked_sentence_hessian_coeff_abs/max": 40960.0, "masked_sentence_hessian_coeff_abs/median": 5312.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 14320.0, "masked_sentence_hessian_coeff_abs/p99": 31840.029296875, "masked_sentence_hessian_coeff_abs/var": 86201528.0, "masked_token_fisher_curvature": 626.2554931640625, "masked_token_fisher_curvature/max": 88576.0, "masked_token_fisher_curvature/median": 1.2168044349891716e-13, "masked_token_fisher_curvature/min": 4.5917748078995606e-40, "masked_token_fisher_curvature/p25": 2.019326546254252e-18, "masked_token_fisher_curvature/p75": 8.207280188798904e-09, "masked_token_fisher_curvature/p85": 5.036592483520508e-06, "masked_token_fisher_curvature/p90": 0.0010833740234375, "masked_token_fisher_curvature/p95": 28.5, "masked_token_fisher_curvature/p99": 20736.0, "masked_token_fisher_curvature/var": 27395818.0, "masked_token_fisher_kl_divergence": 7.006606494996959e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 1.3648480107124957e-23, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.2561421889320938e-28, "masked_token_fisher_kl_divergence/p75": 9.215718466126788e-19, "masked_token_fisher_kl_divergence/p85": 5.620504062164855e-16, "masked_token_fisher_kl_divergence/p90": 1.2079226507921703e-13, "masked_token_fisher_kl_divergence/p95": 3.1868694350123405e-09, "masked_token_fisher_kl_divergence/p99": 2.3245811462402344e-06, "masked_token_fisher_kl_divergence/var": 3.4289942846960075e-13, "masked_token_full_update_term": 1.894926026579924e-05, "masked_token_full_update_term/max": 0.004302978515625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.039836883544922e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.6506574712925612e-15, "masked_token_full_update_term/p85": 1.2107648217352107e-11, "masked_token_full_update_term/p90": 6.519229600598919e-10, "masked_token_full_update_term/p95": 6.845220923423767e-08, "masked_token_full_update_term/p99": 0.0004356801509857178, "masked_token_full_update_term/var": 4.005326204037374e-08, "masked_token_hessian_coeff": -8993.78125, "masked_token_hessian_coeff/max": 680.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2088960.0, "masked_token_hessian_coeff/p25": -2.2873282432556152e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.06396484375, "masked_token_hessian_coeff/var": 9294583808.0, "masked_token_hessian_coeff_abs": 8993.90234375, "masked_token_hessian_coeff_abs/max": 2088960.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 5.91278076171875e-05, "masked_token_hessian_coeff_abs/p99": 224688.0, "masked_token_hessian_coeff_abs/var": 9294581760.0, "mean_logprobs": -0.0294189453125, "mean_logprobs/var": 0.0016937255859375, "num_completions/total": 1344, "per_sentence_gradient_norm": 46.26041793823242, "per_sentence_gradient_norm/max": 239.0, "per_sentence_gradient_norm/median": 17.375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 76.5, "per_sentence_gradient_norm/var": 3798.29248046875, "per_token_feature_norm": 185.55210876464844, "per_token_feature_norm/max": 330.0, "per_token_feature_norm/median": 184.0, "per_token_feature_norm/min": 71.0, "per_token_feature_norm/p25": 173.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 500.609130859375, "per_token_gradient_norm": 1.4196157455444336, "per_token_gradient_norm/max": 374.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.7601996660232544e-07, "per_token_gradient_norm/var": 194.7476043701172, "per_token_policy_error_norm": 0.019248325377702713, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0172667745500803, "policy_entropy": 0.0415441133081913, "policy_entropy/max": 3.734375, "policy_entropy/median": 8.149072527885437e-08, "policy_entropy/min": 5.082197683525802e-19, "policy_entropy/p25": 4.94765117764473e-10, "policy_entropy/p75": 1.150369644165039e-05, "policy_entropy/var": 0.04772758483886719, "policy_loss": -0.6041666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24166668951511383, "policy_sharpness": 9.276459693908691, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 4.817769527435303, "reward": 0.6041666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24166668951511383, "rewards/accuracy_reward": 0.6041666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24166668951511383, "sentence_fisher_curvature": 162754.78125, "sentence_fisher_curvature/max": 684032.0, "sentence_fisher_curvature/median": 119296.0, "sentence_fisher_curvature/min": 175.0, "sentence_fisher_curvature/p25": 4936.0, "sentence_fisher_curvature/p75": 250368.0, "sentence_fisher_curvature/p85": 372224.0, "sentence_fisher_curvature/p90": 439296.0, "sentence_fisher_curvature/p95": 539648.0, "sentence_fisher_curvature/p99": 641228.9375, "sentence_fisher_curvature/var": 33299681280.0, "sentence_fisher_kl_divergence": 1.8211572751170024e-05, "sentence_fisher_kl_divergence/max": 7.677078247070312e-05, "sentence_fisher_kl_divergence/median": 1.33514404296875e-05, "sentence_fisher_kl_divergence/min": 1.955777406692505e-08, "sentence_fisher_kl_divergence/p25": 5.522742867469788e-07, "sentence_fisher_kl_divergence/p75": 2.804398536682129e-05, "sentence_fisher_kl_divergence/p85": 4.166364669799805e-05, "sentence_fisher_kl_divergence/p90": 4.9114227294921875e-05, "sentence_fisher_kl_divergence/p95": 6.0439109802246094e-05, "sentence_fisher_kl_divergence/p99": 7.178785017458722e-05, "sentence_fisher_kl_divergence/var": 4.1736872291586735e-10, "sentence_full_gradient_variance/max_squared_error": 5801.7529296875, "sentence_full_gradient_variance/metric": 5801.7529296875, "sentence_full_gradient_variance/p75": 5801.7529296875, "sentence_full_gradient_variance/p90": 5801.7529296875, "sentence_full_gradient_variance/p95": 5801.7529296875, "sentence_full_gradient_variance/p99": 5801.7529296875, "sentence_full_update_term": 0.044211070984601974, "sentence_full_update_term/max": 0.314453125, "sentence_full_update_term/median": 0.015625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0703125, "sentence_full_update_term/p85": 0.0941162109375, "sentence_full_update_term/p90": 0.121337890625, "sentence_full_update_term/p95": 0.173828125, "sentence_full_update_term/p99": 0.29404303431510925, "sentence_full_update_term/var": 0.003851066343486309, "sentence_hessian_coeff": 18935.59375, "sentence_hessian_coeff/max": 329728.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -125440.0, "sentence_hessian_coeff/p25": -12144.0, "sentence_hessian_coeff/p75": 16704.0, "sentence_hessian_coeff/p99": 321945.625, "sentence_hessian_coeff/var": 7349026816.0, "sentence_hessian_coeff_abs": 48690.07421875, "sentence_hessian_coeff_abs/max": 329728.0, "sentence_hessian_coeff_abs/median": 14784.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 68096.0, "sentence_hessian_coeff_abs/p99": 321945.625, "sentence_hessian_coeff_abs/var": 5315678720.0, "step": 14, "token_fisher_curvature": 153692.6875, "token_fisher_curvature/max": 68157440.0, "token_fisher_curvature/median": 1.936228954946273e-13, "token_fisher_curvature/min": 4.5917748078995606e-40, "token_fisher_curvature/p25": 2.72405795836983e-18, "token_fisher_curvature/p75": 2.153683453798294e-08, "token_fisher_curvature/p85": 3.337860107421875e-05, "token_fisher_curvature/p90": 0.040771484375, "token_fisher_curvature/p95": 1960.0, "token_fisher_curvature/p99": 1316480.0, "token_fisher_curvature/var": 4086659022848.0, "token_fisher_kl_divergence": 1.719572173897177e-05, "token_fisher_kl_divergence/max": 0.00762939453125, "token_fisher_kl_divergence/median": 2.1713491079516976e-23, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.0450030941531056e-28, "token_fisher_kl_divergence/p75": 2.4123498337802474e-18, "token_fisher_kl_divergence/p85": 3.747002708109903e-15, "token_fisher_kl_divergence/p90": 4.575895218295045e-12, "token_fisher_kl_divergence/p95": 2.1886080503463745e-07, "token_fisher_kl_divergence/p99": 0.00014753639698028564, "token_fisher_kl_divergence/var": 5.116011436712142e-08, "token_full_update_term": 0.0005957471439614892, "token_full_update_term/max": 0.130859375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.039836883544922e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 8.049116928532385e-15, "token_full_update_term/p85": 3.205968823749572e-11, "token_full_update_term/p90": 2.255546860396862e-09, "token_full_update_term/p95": 5.736947059631348e-07, "token_full_update_term/p99": 0.01603412628173828, "token_full_update_term/var": 3.476676647551358e-05, "token_hessian_coeff": 14137.2177734375, "token_hessian_coeff/max": 66846720.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -8388608.0, "token_hessian_coeff/p25": -3.591179847717285e-06, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.224029541015625, "token_hessian_coeff/var": 2639441952768.0, "token_hessian_coeff_abs": 137599.1875, "token_hessian_coeff_abs/max": 66846720.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 9.572505950927734e-05, "token_hessian_coeff_abs/p99": 3850240.0, "token_hessian_coeff_abs/var": 2620708093952.0 }, { "accuracy_reward": 0.75, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.75, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.1894736886024475, "adam_stats/lm_head/lr_effective_max": 7.800511957611889e-05, "adam_stats/lm_head/lr_effective_mean": 3.730438180582496e-11, "adam_stats/lm_head/lr_effective_min": -7.852039561839774e-05, "adam_stats/lm_head/lr_effective_std": 1.938376271937159e-06, "adam_stats/lr_effective_max": 9.461832814849913e-05, "adam_stats/lr_effective_mean": -1.800757043479706e-10, "adam_stats/lr_effective_min": -9.339182724943385e-05, "adam_stats/m_t_max": 0.009104236960411072, "adam_stats/m_t_mean": -5.053958293532723e-11, "adam_stats/m_t_min": -0.0059081255458295345, "adam_stats/v_t_max": 2.641933497216087e-05, "adam_stats/v_t_mean": 1.7285126238317239e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.75, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.75, "advantages/p75": 1.0, "advantages/var": 0.1894736886024475, "all_logprobs": -0.013782369904220104, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.5, "all_logprobs/p1": -0.38671875, "all_logprobs/p10": -3.528594970703125e-05, "all_logprobs/p25": -2.384185791015625e-07, "all_logprobs/p5": -0.002471923828125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.021961266174912453, "clip_ratio": 0.0, "completion_length": 627.9271240234375, "completion_length/correct": 556.2777709960938, "completion_length/correct/max": 1016.0, "completion_length/correct/median": 530.0, "completion_length/correct/min": 189.0, "completion_length/correct/p25": 444.75, "completion_length/correct/p75": 691.5, "completion_length/correct/var": 50221.5, "completion_length/incorrect": 842.875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 393.0, "completion_length/incorrect/p25": 577.75, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 64718.46875, "completion_length/max": 1024.0, "completion_length/median": 585.0, "completion_length/min": 189.0, "completion_length/p25": 465.75, "completion_length/p75": 866.5, "completion_length/var": 68765.6328125, "curvature_clip_ratio_token_fisher": 0.015460924245417118, "curvature_clip_ratio_token_hessian": 0.009920206852257252, "curvature_clip_ratio_total_fisher": 0.015460924245417118, "curvature_clip_ratio_total_full": 0.015460924245417118, "curvature_clip_ratio_total_hessian": 0.009920206852257252, "epoch": 0.024, "feature_vector_variance/max_squared_error": 140697.5625, "feature_vector_variance/metric": 25448.431640625, "generated_tokens/total": 1018079.0, "global_fisher_curvature": 53760.0, "global_fisher_curvature/max": 53760.0, "global_fisher_curvature/median": 53760.0, "global_fisher_curvature/min": 53760.0, "global_fisher_curvature/p25": 53760.0, "global_fisher_curvature/p75": 53760.0, "global_fisher_curvature/p85": 53760.0, "global_fisher_curvature/p90": 53760.0, "global_fisher_curvature/p95": 53760.0, "global_fisher_curvature/p99": 53760.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.990266799926758e-06, "global_fisher_kl_divergence/max": 5.990266799926758e-06, "global_fisher_kl_divergence/median": 5.990266799926758e-06, "global_fisher_kl_divergence/min": 5.990266799926758e-06, "global_fisher_kl_divergence/p25": 5.990266799926758e-06, "global_fisher_kl_divergence/p75": 5.990266799926758e-06, "global_fisher_kl_divergence/p85": 5.990266799926758e-06, "global_fisher_kl_divergence/p90": 5.990266799926758e-06, "global_fisher_kl_divergence/p95": 5.990266799926758e-06, "global_fisher_kl_divergence/p99": 5.990266799926758e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.064453125, "global_full_update_term/max": 0.064453125, "global_full_update_term/median": 0.064453125, "global_full_update_term/min": 0.064453125, "global_full_update_term/p25": 0.064453125, "global_full_update_term/p75": 0.064453125, "global_full_update_term/p85": 0.064453125, "global_full_update_term/p90": 0.064453125, "global_full_update_term/p95": 0.064453125, "global_full_update_term/p99": 0.064453125, "global_full_update_term/var": NaN, "global_hessian_coeff": 11456.0, "global_hessian_coeff/max": 11456.0, "global_hessian_coeff/median": 11456.0, "global_hessian_coeff/min": 11456.0, "global_hessian_coeff/p25": 11456.0, "global_hessian_coeff/p75": 11456.0, "global_hessian_coeff/p99": 11456.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 11456.0, "global_hessian_coeff_abs/max": 11456.0, "global_hessian_coeff_abs/median": 11456.0, "global_hessian_coeff_abs/min": 11456.0, "global_hessian_coeff_abs/p25": 11456.0, "global_hessian_coeff_abs/p75": 11456.0, "global_hessian_coeff_abs/p99": 11456.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.03564983606338501, "learning_rate": 1.488605814759156e-05, "loss": -0.75, "masked_global_fisher_curvature": 400.0, "masked_global_fisher_curvature/max": 400.0, "masked_global_fisher_curvature/median": 400.0, "masked_global_fisher_curvature/min": 400.0, "masked_global_fisher_curvature/p25": 400.0, "masked_global_fisher_curvature/p75": 400.0, "masked_global_fisher_curvature/p85": 400.0, "masked_global_fisher_curvature/p90": 400.0, "masked_global_fisher_curvature/p95": 400.0, "masked_global_fisher_curvature/p99": 400.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/max": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/median": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/min": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/p25": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/p75": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/p85": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/p90": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/p95": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/p99": 4.44706529378891e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.003021240234375, "masked_global_full_update_term/max": 0.003021240234375, "masked_global_full_update_term/median": 0.003021240234375, "masked_global_full_update_term/min": 0.003021240234375, "masked_global_full_update_term/p25": 0.003021240234375, "masked_global_full_update_term/p75": 0.003021240234375, "masked_global_full_update_term/p85": 0.003021240234375, "masked_global_full_update_term/p90": 0.003021240234375, "masked_global_full_update_term/p95": 0.003021240234375, "masked_global_full_update_term/p99": 0.003021240234375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2064.0, "masked_global_hessian_coeff/max": -2064.0, "masked_global_hessian_coeff/median": -2064.0, "masked_global_hessian_coeff/min": -2064.0, "masked_global_hessian_coeff/p25": -2064.0, "masked_global_hessian_coeff/p75": -2064.0, "masked_global_hessian_coeff/p99": -2064.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2064.0, "masked_global_hessian_coeff_abs/max": 2064.0, "masked_global_hessian_coeff_abs/median": 2064.0, "masked_global_hessian_coeff_abs/min": 2064.0, "masked_global_hessian_coeff_abs/p25": 2064.0, "masked_global_hessian_coeff_abs/p75": 2064.0, "masked_global_hessian_coeff_abs/p99": 2064.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.7787679433822632, "masked_per_sentence_gradient_norm/max": 7.71875, "masked_per_sentence_gradient_norm/median": 1.3984375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.3896484375, "masked_per_sentence_gradient_norm/p75": 2.74609375, "masked_per_sentence_gradient_norm/var": 3.1012933254241943, "masked_per_token_gradient_norm": 0.05005483329296112, "masked_per_token_gradient_norm/max": 12.125, "masked_per_token_gradient_norm/median": 8.11269273981452e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.125037670135498e-06, "masked_per_token_gradient_norm/var": 0.23971907794475555, "masked_sentence_fisher_curvature": 261.5625, "masked_sentence_fisher_curvature/max": 1800.0, "masked_sentence_fisher_curvature/median": 208.0, "masked_sentence_fisher_curvature/min": 30.375, "masked_sentence_fisher_curvature/p25": 99.375, "masked_sentence_fisher_curvature/p75": 358.0, "masked_sentence_fisher_curvature/p85": 431.0, "masked_sentence_fisher_curvature/p90": 470.0, "masked_sentence_fisher_curvature/p95": 545.0, "masked_sentence_fisher_curvature/p99": 812.003173828125, "masked_sentence_fisher_curvature/var": 52171.8515625, "masked_sentence_fisher_kl_divergence": 2.9130660550436005e-08, "masked_sentence_fisher_kl_divergence/max": 2.0023435354232788e-07, "masked_sentence_fisher_kl_divergence/median": 2.3166649043560028e-08, "masked_sentence_fisher_kl_divergence/min": 3.3905962482094765e-09, "masked_sentence_fisher_kl_divergence/p25": 1.104490365833044e-08, "masked_sentence_fisher_kl_divergence/p75": 3.9814040064811707e-08, "masked_sentence_fisher_kl_divergence/p85": 4.807952791452408e-08, "masked_sentence_fisher_kl_divergence/p90": 5.2386894822120667e-08, "masked_sentence_fisher_kl_divergence/p95": 6.07105903327465e-08, "masked_sentence_fisher_kl_divergence/p99": 9.052490668182145e-08, "masked_sentence_fisher_kl_divergence/var": 6.466294517964652e-16, "masked_sentence_full_gradient_variance/max_squared_error": 6.05136251449585, "masked_sentence_full_gradient_variance/metric": 6.05136251449585, "masked_sentence_full_gradient_variance/p75": 6.05136251449585, "masked_sentence_full_gradient_variance/p90": 6.05136251449585, "masked_sentence_full_gradient_variance/p95": 6.05136251449585, "masked_sentence_full_gradient_variance/p99": 6.05136251449585, "masked_sentence_full_update_term": 0.0013822715263813734, "masked_sentence_full_update_term/max": 0.00830078125, "masked_sentence_full_update_term/median": 0.00104522705078125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00023746490478515625, "masked_sentence_full_update_term/p75": 0.0019378662109375, "masked_sentence_full_update_term/p85": 0.002796173095703125, "masked_sentence_full_update_term/p90": 0.00348663330078125, "masked_sentence_full_update_term/p95": 0.003887176513671875, "masked_sentence_full_update_term/p99": 0.0060104443691670895, "masked_sentence_full_update_term/var": 2.3052512005961034e-06, "masked_sentence_hessian_coeff": -7016.25, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -5728.0, "masked_sentence_hessian_coeff/min": -25344.0, "masked_sentence_hessian_coeff/p25": -12032.0, "masked_sentence_hessian_coeff/p75": -1266.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 38170704.0, "masked_sentence_hessian_coeff_abs": 7016.25, "masked_sentence_hessian_coeff_abs/max": 25344.0, "masked_sentence_hessian_coeff_abs/median": 5696.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 1266.0, "masked_sentence_hessian_coeff_abs/p75": 12032.0, "masked_sentence_hessian_coeff_abs/p99": 23763.205078125, "masked_sentence_hessian_coeff_abs/var": 38170704.0, "masked_token_fisher_curvature": 348.0450439453125, "masked_token_fisher_curvature/max": 88576.0, "masked_token_fisher_curvature/median": 2.4202861936828413e-14, "masked_token_fisher_curvature/min": 3.948926334793622e-39, "masked_token_fisher_curvature/p25": 5.793705359219414e-19, "masked_token_fisher_curvature/p75": 7.785274647176266e-10, "masked_token_fisher_curvature/p85": 3.9674341678619385e-07, "masked_token_fisher_curvature/p90": 4.9591064453125e-05, "masked_token_fisher_curvature/p95": 0.12382125854492188, "masked_token_fisher_curvature/p99": 8000.0, "masked_token_fisher_curvature/var": 15409824.0, "masked_token_fisher_kl_divergence": 3.878280097069364e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 2.701261687868481e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 6.468659422812297e-29, "masked_token_fisher_kl_divergence/p75": 8.682087709356578e-20, "masked_token_fisher_kl_divergence/p85": 4.423544863740858e-17, "masked_token_fisher_kl_divergence/p90": 5.523359547510154e-15, "masked_token_fisher_kl_divergence/p95": 1.3789414055054294e-11, "masked_token_fisher_kl_divergence/p99": 8.903443813323975e-07, "masked_token_fisher_kl_divergence/var": 1.913963101505753e-13, "masked_token_full_update_term": 2.046104236796964e-05, "masked_token_full_update_term/max": 0.0042724609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.3096799850463867e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 6.146194664324867e-13, "masked_token_full_update_term/p85": 1.0186340659856796e-10, "masked_token_full_update_term/p90": 2.240994945168495e-09, "masked_token_full_update_term/p95": 2.2221138351596892e-07, "masked_token_full_update_term/p99": 0.00070953369140625, "masked_token_full_update_term/var": 4.15636094430738e-08, "masked_token_hessian_coeff": -9608.669921875, "masked_token_hessian_coeff/max": 233.0, "masked_token_hessian_coeff/median": -5.144329406903125e-12, "masked_token_hessian_coeff/min": -2162688.0, "masked_token_hessian_coeff/p25": -9.202957153320312e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.11792945861816406, "masked_token_hessian_coeff/var": 9523472384.0, "masked_token_hessian_coeff_abs": 9608.8076171875, "masked_token_hessian_coeff_abs/max": 2162688.0, "masked_token_hessian_coeff_abs/median": 2.4028122425079346e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.0005645751953125, "masked_token_hessian_coeff_abs/p99": 315472.0, "masked_token_hessian_coeff_abs/var": 9523469312.0, "mean_logprobs": -0.01287841796875, "mean_logprobs/var": 0.0001430511474609375, "num_completions/total": 1440, "per_sentence_gradient_norm": 37.5859375, "per_sentence_gradient_norm/max": 207.0, "per_sentence_gradient_norm/median": 23.375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 4.3828125, "per_sentence_gradient_norm/p75": 48.9375, "per_sentence_gradient_norm/var": 2071.90771484375, "per_token_feature_norm": 179.31625366210938, "per_token_feature_norm/max": 316.0, "per_token_feature_norm/median": 180.0, "per_token_feature_norm/min": 76.0, "per_token_feature_norm/p25": 169.0, "per_token_feature_norm/p75": 189.0, "per_token_feature_norm/var": 319.91314697265625, "per_token_gradient_norm": 1.0944982767105103, "per_token_gradient_norm/max": 278.0, "per_token_gradient_norm/median": 9.822542779147625e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.5050172805786133e-06, "per_token_gradient_norm/var": 133.48313903808594, "per_token_policy_error_norm": 0.007732503116130829, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.007289842236787081, "policy_entropy": 0.01526745967566967, "policy_entropy/max": 3.375, "policy_entropy/median": 3.748573362827301e-08, "policy_entropy/min": 3.347474207548995e-18, "policy_entropy/p25": 2.6557245291769505e-10, "policy_entropy/p75": 3.874301910400391e-06, "policy_entropy/var": 0.009503550827503204, "policy_loss": -0.75, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -0.75, "policy_loss/var": 0.1894736886024475, "policy_sharpness": 9.565057754516602, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.7248826026916504, "reward": 0.75, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.75, "reward/p75": 1.0, "reward/var": 0.1894736886024475, "rewards/accuracy_reward": 0.75, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.75, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.1894736886024475, "sentence_fisher_curvature": 123456.234375, "sentence_fisher_curvature/max": 630784.0, "sentence_fisher_curvature/median": 86016.0, "sentence_fisher_curvature/min": 312.0, "sentence_fisher_curvature/p25": 22816.0, "sentence_fisher_curvature/p75": 202752.0, "sentence_fisher_curvature/p85": 252416.0, "sentence_fisher_curvature/p90": 294912.0, "sentence_fisher_curvature/p95": 357888.0, "sentence_fisher_curvature/p99": 484864.46875, "sentence_fisher_curvature/var": 16298192896.0, "sentence_fisher_kl_divergence": 1.3752217455476057e-05, "sentence_fisher_kl_divergence/max": 7.009506225585938e-05, "sentence_fisher_kl_divergence/median": 9.59634780883789e-06, "sentence_fisher_kl_divergence/min": 3.4691765904426575e-08, "sentence_fisher_kl_divergence/p25": 2.5443732738494873e-06, "sentence_fisher_kl_divergence/p75": 2.2530555725097656e-05, "sentence_fisher_kl_divergence/p85": 2.810359001159668e-05, "sentence_fisher_kl_divergence/p90": 3.2901763916015625e-05, "sentence_fisher_kl_divergence/p95": 3.993511199951172e-05, "sentence_fisher_kl_divergence/p99": 5.4013780754758045e-05, "sentence_fisher_kl_divergence/var": 2.0214183105160544e-10, "sentence_full_gradient_variance/max_squared_error": 3418.528076171875, "sentence_full_gradient_variance/metric": 3418.528076171875, "sentence_full_gradient_variance/p75": 3418.528076171875, "sentence_full_gradient_variance/p90": 3418.528076171875, "sentence_full_gradient_variance/p95": 3418.528076171875, "sentence_full_gradient_variance/p99": 3418.528076171875, "sentence_full_update_term": 0.030671119689941406, "sentence_full_update_term/max": 0.17578125, "sentence_full_update_term/median": 0.0191650390625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00368499755859375, "sentence_full_update_term/p75": 0.04815673828125, "sentence_full_update_term/p85": 0.0653076171875, "sentence_full_update_term/p90": 0.080810546875, "sentence_full_update_term/p95": 0.10498046875, "sentence_full_update_term/p99": 0.13681653141975403, "sentence_full_update_term/var": 0.0013205476570874453, "sentence_hessian_coeff": 12110.71875, "sentence_hessian_coeff/max": 327680.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -158720.0, "sentence_hessian_coeff/p25": -12784.0, "sentence_hessian_coeff/p75": 30752.0, "sentence_hessian_coeff/p99": 220672.34375, "sentence_hessian_coeff/var": 4379437568.0, "sentence_hessian_coeff_abs": 39487.78125, "sentence_hessian_coeff_abs/max": 327680.0, "sentence_hessian_coeff_abs/median": 19968.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 38.25, "sentence_hessian_coeff_abs/p75": 47296.0, "sentence_hessian_coeff_abs/p99": 220672.34375, "sentence_hessian_coeff_abs/var": 2951952640.0, "step": 15, "token_fisher_curvature": 121357.734375, "token_fisher_curvature/max": 64487424.0, "token_fisher_curvature/median": 3.26405569239796e-14, "token_fisher_curvature/min": 3.948926334793622e-39, "token_fisher_curvature/p25": 6.708500942254059e-19, "token_fisher_curvature/p75": 1.418811734765768e-09, "token_fisher_curvature/p85": 1.1101365089416504e-06, "token_fisher_curvature/p90": 0.00022029876708984375, "token_fisher_curvature/p95": 7.8125, "token_fisher_curvature/p99": 524288.0, "token_fisher_curvature/var": 3359032737792.0, "token_fisher_kl_divergence": 1.3518683772417717e-05, "token_fisher_kl_divergence/max": 0.007171630859375, "token_fisher_kl_divergence/median": 3.644764574061778e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 7.454735554338562e-29, "token_fisher_kl_divergence/p75": 1.5839516113655416e-19, "token_fisher_kl_divergence/p85": 1.240327285323417e-16, "token_fisher_kl_divergence/p90": 2.453592884421596e-14, "token_fisher_kl_divergence/p95": 8.694769348949194e-10, "token_fisher_kl_divergence/p99": 5.841255187988281e-05, "token_fisher_kl_divergence/var": 4.167847578173678e-08, "token_full_update_term": 0.0004875813901890069, "token_full_update_term/max": 0.126953125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.3096799850463867e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.1084466677857563e-12, "token_full_update_term/p85": 1.9190338207408786e-10, "token_full_update_term/p90": 6.490154191851616e-09, "token_full_update_term/p95": 1.4454126358032227e-06, "token_full_update_term/p99": 0.00971674919128418, "token_full_update_term/var": 2.7704070816980675e-05, "token_hessian_coeff": 1722.7091064453125, "token_hessian_coeff/max": 64225280.0, "token_hessian_coeff/median": -1.0743406164692715e-11, "token_hessian_coeff/min": -8585216.0, "token_hessian_coeff/p25": -0.000125885009765625, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.3046875, "token_hessian_coeff/var": 2266266599424.0, "token_hessian_coeff_abs": 117433.9765625, "token_hessian_coeff_abs/max": 64225280.0, "token_hessian_coeff_abs/median": 2.9616057872772217e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.00078582763671875, "token_hessian_coeff_abs/p99": 3407872.0, "token_hessian_coeff_abs/var": 2252478611456.0 }, { "accuracy_reward": 0.75, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.75, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.1894736886024475, "adam_stats/lm_head/lr_effective_max": 7.85796728450805e-05, "adam_stats/lm_head/lr_effective_mean": 1.471900726301989e-11, "adam_stats/lm_head/lr_effective_min": -7.883187208790332e-05, "adam_stats/lm_head/lr_effective_std": 1.953642367880093e-06, "adam_stats/lr_effective_max": 9.481902816332877e-05, "adam_stats/lr_effective_mean": -1.837237861845864e-10, "adam_stats/lr_effective_min": -9.462008893024176e-05, "adam_stats/m_t_max": 0.00826552975922823, "adam_stats/m_t_mean": -4.504474224176214e-11, "adam_stats/m_t_min": -0.005375677719712257, "adam_stats/v_t_max": 2.6393430744064972e-05, "adam_stats/v_t_mean": 1.727345805453695e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.75, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.75, "advantages/p75": 1.0, "advantages/var": 0.1894736886024475, "all_logprobs": -0.017084229737520218, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.125, "all_logprobs/p1": -0.474609375, "all_logprobs/p10": -6.632809527218342e-05, "all_logprobs/p25": -4.76837158203125e-07, "all_logprobs/p5": -0.005218505859375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.027548158541321754, "clip_ratio": 0.0, "completion_length": 615.625, "completion_length/correct": 513.7361450195312, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 442.0, "completion_length/correct/min": 274.0, "completion_length/correct/p25": 328.0, "completion_length/correct/p75": 578.0, "completion_length/correct/var": 49558.81640625, "completion_length/incorrect": 921.2916870117188, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 565.0, "completion_length/incorrect/p25": 860.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 27210.126953125, "completion_length/max": 1024.0, "completion_length/median": 500.0, "completion_length/min": 274.0, "completion_length/p25": 407.0, "completion_length/p75": 894.0, "completion_length/var": 75098.28125, "curvature_clip_ratio_token_fisher": 0.01917089708149433, "curvature_clip_ratio_token_hessian": 0.012571912258863449, "curvature_clip_ratio_total_fisher": 0.01917089708149433, "curvature_clip_ratio_total_full": 0.01917089708149433, "curvature_clip_ratio_total_hessian": 0.012571912258863449, "epoch": 0.0256, "feature_vector_variance/max_squared_error": 138132.203125, "feature_vector_variance/metric": 25485.541015625, "generated_tokens/total": 1077179.0, "global_fisher_curvature": 72192.0, "global_fisher_curvature/max": 72192.0, "global_fisher_curvature/median": 72192.0, "global_fisher_curvature/min": 72192.0, "global_fisher_curvature/p25": 72192.0, "global_fisher_curvature/p75": 72192.0, "global_fisher_curvature/p85": 72192.0, "global_fisher_curvature/p90": 72192.0, "global_fisher_curvature/p95": 72192.0, "global_fisher_curvature/p99": 72192.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.987022399902344e-06, "global_fisher_kl_divergence/max": 7.987022399902344e-06, "global_fisher_kl_divergence/median": 7.987022399902344e-06, "global_fisher_kl_divergence/min": 7.987022399902344e-06, "global_fisher_kl_divergence/p25": 7.987022399902344e-06, "global_fisher_kl_divergence/p75": 7.987022399902344e-06, "global_fisher_kl_divergence/p85": 7.987022399902344e-06, "global_fisher_kl_divergence/p90": 7.987022399902344e-06, "global_fisher_kl_divergence/p95": 7.987022399902344e-06, "global_fisher_kl_divergence/p99": 7.987022399902344e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.1474609375, "global_full_update_term/max": 0.1474609375, "global_full_update_term/median": 0.1474609375, "global_full_update_term/min": 0.1474609375, "global_full_update_term/p25": 0.1474609375, "global_full_update_term/p75": 0.1474609375, "global_full_update_term/p85": 0.1474609375, "global_full_update_term/p90": 0.1474609375, "global_full_update_term/p95": 0.1474609375, "global_full_update_term/p99": 0.1474609375, "global_full_update_term/var": NaN, "global_hessian_coeff": 17152.0, "global_hessian_coeff/max": 17152.0, "global_hessian_coeff/median": 17152.0, "global_hessian_coeff/min": 17152.0, "global_hessian_coeff/p25": 17152.0, "global_hessian_coeff/p75": 17152.0, "global_hessian_coeff/p99": 17152.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 17152.0, "global_hessian_coeff_abs/max": 17152.0, "global_hessian_coeff_abs/median": 17152.0, "global_hessian_coeff_abs/min": 17152.0, "global_hessian_coeff_abs/p25": 17152.0, "global_hessian_coeff_abs/p75": 17152.0, "global_hessian_coeff_abs/p99": 17152.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.055271562188863754, "learning_rate": 1.4836107005503543e-05, "loss": -0.75, "masked_global_fisher_curvature": 302.0, "masked_global_fisher_curvature/max": 302.0, "masked_global_fisher_curvature/median": 302.0, "masked_global_fisher_curvature/min": 302.0, "masked_global_fisher_curvature/p25": 302.0, "masked_global_fisher_curvature/p75": 302.0, "masked_global_fisher_curvature/p85": 302.0, "masked_global_fisher_curvature/p90": 302.0, "masked_global_fisher_curvature/p95": 302.0, "masked_global_fisher_curvature/p99": 302.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/max": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/median": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/min": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/p25": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/p75": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/p85": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/p90": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/p95": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/p99": 3.3527612686157227e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0054931640625, "masked_global_full_update_term/max": 0.0054931640625, "masked_global_full_update_term/median": 0.0054931640625, "masked_global_full_update_term/min": 0.0054931640625, "masked_global_full_update_term/p25": 0.0054931640625, "masked_global_full_update_term/p75": 0.0054931640625, "masked_global_full_update_term/p85": 0.0054931640625, "masked_global_full_update_term/p90": 0.0054931640625, "masked_global_full_update_term/p95": 0.0054931640625, "masked_global_full_update_term/p99": 0.0054931640625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2944.0, "masked_global_hessian_coeff/max": -2944.0, "masked_global_hessian_coeff/median": -2944.0, "masked_global_hessian_coeff/min": -2944.0, "masked_global_hessian_coeff/p25": -2944.0, "masked_global_hessian_coeff/p75": -2944.0, "masked_global_hessian_coeff/p99": -2944.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2944.0, "masked_global_hessian_coeff_abs/max": 2944.0, "masked_global_hessian_coeff_abs/median": 2944.0, "masked_global_hessian_coeff_abs/min": 2944.0, "masked_global_hessian_coeff_abs/p25": 2944.0, "masked_global_hessian_coeff_abs/p75": 2944.0, "masked_global_hessian_coeff_abs/p99": 2944.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.3774821758270264, "masked_per_sentence_gradient_norm/max": 8.375, "masked_per_sentence_gradient_norm/median": 2.171875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.31640625, "masked_per_sentence_gradient_norm/p75": 3.41015625, "masked_per_sentence_gradient_norm/var": 5.08339786529541, "masked_per_token_gradient_norm": 0.06282103806734085, "masked_per_token_gradient_norm/max": 11.4375, "masked_per_token_gradient_norm/median": 3.728928277269006e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.5480985641479492e-06, "masked_per_token_gradient_norm/var": 0.32014065980911255, "masked_sentence_fisher_curvature": 333.890625, "masked_sentence_fisher_curvature/max": 1176.0, "masked_sentence_fisher_curvature/median": 258.0, "masked_sentence_fisher_curvature/min": 19.25, "masked_sentence_fisher_curvature/p25": 174.75, "masked_sentence_fisher_curvature/p75": 480.5, "masked_sentence_fisher_curvature/p85": 551.0, "masked_sentence_fisher_curvature/p90": 610.0, "masked_sentence_fisher_curvature/p95": 734.0, "masked_sentence_fisher_curvature/p99": 803.6011962890625, "masked_sentence_fisher_curvature/var": 45930.22265625, "masked_sentence_fisher_kl_divergence": 3.6995214003354704e-08, "masked_sentence_fisher_kl_divergence/max": 1.30385160446167e-07, "masked_sentence_fisher_kl_divergence/median": 2.8638169169425964e-08, "masked_sentence_fisher_kl_divergence/min": 2.1391315385699272e-09, "masked_sentence_fisher_kl_divergence/p25": 1.941225491464138e-08, "masked_sentence_fisher_kl_divergence/p75": 5.314359441399574e-08, "masked_sentence_fisher_kl_divergence/p85": 6.088521331548691e-08, "masked_sentence_fisher_kl_divergence/p90": 6.775371730327606e-08, "masked_sentence_fisher_kl_divergence/p95": 8.12578946352005e-08, "masked_sentence_fisher_kl_divergence/p99": 8.924411787347708e-08, "masked_sentence_fisher_kl_divergence/var": 5.645469828889583e-16, "masked_sentence_full_gradient_variance/max_squared_error": 10.294195175170898, "masked_sentence_full_gradient_variance/metric": 10.294195175170898, "masked_sentence_full_gradient_variance/p75": 10.294195175170898, "masked_sentence_full_gradient_variance/p90": 10.294195175170898, "masked_sentence_full_gradient_variance/p95": 10.294195175170898, "masked_sentence_full_gradient_variance/p99": 10.294195175170898, "masked_sentence_full_update_term": 0.0019033154239878058, "masked_sentence_full_update_term/max": 0.0079345703125, "masked_sentence_full_update_term/median": 0.0015106201171875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00023174285888671875, "masked_sentence_full_update_term/p75": 0.00283050537109375, "masked_sentence_full_update_term/p85": 0.00402069091796875, "masked_sentence_full_update_term/p90": 0.004547119140625, "masked_sentence_full_update_term/p95": 0.004913330078125, "masked_sentence_full_update_term/p99": 0.006919864099472761, "masked_sentence_full_update_term/var": 3.201875642844243e-06, "masked_sentence_hessian_coeff": -9303.6669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -8320.0, "masked_sentence_hessian_coeff/min": -37376.0, "masked_sentence_hessian_coeff/p25": -14256.0, "masked_sentence_hessian_coeff/p75": -1284.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 67749544.0, "masked_sentence_hessian_coeff_abs": 9303.6669921875, "masked_sentence_hessian_coeff_abs/max": 37376.0, "masked_sentence_hessian_coeff_abs/median": 8128.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 1284.0, "masked_sentence_hessian_coeff_abs/p75": 14256.0, "masked_sentence_hessian_coeff_abs/p99": 28377.62890625, "masked_sentence_hessian_coeff_abs/var": 67749544.0, "masked_token_fisher_curvature": 422.267333984375, "masked_token_fisher_curvature/max": 86528.0, "masked_token_fisher_curvature/median": 6.394884621840902e-14, "masked_token_fisher_curvature/min": 2.865267480129326e-38, "masked_token_fisher_curvature/p25": 8.199278929421627e-19, "masked_token_fisher_curvature/p75": 5.034962669014931e-09, "masked_token_fisher_curvature/p85": 1.9073486328125e-06, "masked_token_fisher_curvature/p90": 0.0001735687255859375, "masked_token_fisher_curvature/p95": 0.70703125, "masked_token_fisher_curvature/p99": 9835.5, "masked_token_fisher_curvature/var": 18506810.0, "masked_token_fisher_kl_divergence": 4.678346243736087e-08, "masked_token_fisher_kl_divergence/max": 9.59634780883789e-06, "masked_token_fisher_kl_divergence/median": 7.0827339949853e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 9.071900410041636e-29, "masked_token_fisher_kl_divergence/p75": 5.590417451878382e-19, "masked_token_fisher_kl_divergence/p85": 2.1163626406917047e-16, "masked_token_fisher_kl_divergence/p90": 1.9206858326015208e-14, "masked_token_fisher_kl_divergence/p95": 7.821654435247183e-11, "masked_token_fisher_kl_divergence/p99": 1.0928488336503506e-06, "masked_token_fisher_kl_divergence/var": 2.2720140927055127e-13, "masked_token_full_update_term": 2.5978975827456452e-05, "masked_token_full_update_term/max": 0.004150390625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.7120113372802734e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.0127011996228248e-12, "masked_token_full_update_term/p85": 7.967173587530851e-10, "masked_token_full_update_term/p90": 1.4901161193847656e-08, "masked_token_full_update_term/p95": 7.82310962677002e-07, "masked_token_full_update_term/p99": 0.000850677490234375, "masked_token_full_update_term/var": 5.73829943562032e-08, "masked_token_hessian_coeff": -12523.02734375, "masked_token_hessian_coeff/max": 2656.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2310144.0, "masked_token_hessian_coeff/p25": -0.0003757476806640625, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.17806625366210938, "masked_token_hessian_coeff/var": 13832561664.0, "masked_token_hessian_coeff_abs": 12523.431640625, "masked_token_hessian_coeff_abs/max": 2310144.0, "masked_token_hessian_coeff_abs/median": 9.639188647270203e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.00165557861328125, "masked_token_hessian_coeff_abs/p99": 425984.0, "masked_token_hessian_coeff_abs/var": 13832551424.0, "mean_logprobs": -0.0169677734375, "mean_logprobs/var": 0.0001430511474609375, "num_completions/total": 1536, "per_sentence_gradient_norm": 45.46354293823242, "per_sentence_gradient_norm/max": 188.0, "per_sentence_gradient_norm/median": 38.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 12.5625, "per_sentence_gradient_norm/p75": 67.0, "per_sentence_gradient_norm/var": 1634.205078125, "per_token_feature_norm": 177.8481903076172, "per_token_feature_norm/max": 322.0, "per_token_feature_norm/median": 179.0, "per_token_feature_norm/min": 83.5, "per_token_feature_norm/p25": 168.0, "per_token_feature_norm/p75": 188.0, "per_token_feature_norm/var": 302.6885070800781, "per_token_gradient_norm": 1.3753254413604736, "per_token_gradient_norm/max": 306.0, "per_token_gradient_norm/median": 5.020410753786564e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 3.904104232788086e-06, "per_token_gradient_norm/var": 173.85060119628906, "per_token_policy_error_norm": 0.009584391489624977, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.009057448245584965, "policy_entropy": 0.018741026520729065, "policy_entropy/max": 3.359375, "policy_entropy/median": 5.8673322200775146e-08, "policy_entropy/min": 4.777265822514254e-19, "policy_entropy/p25": 3.255991032347083e-10, "policy_entropy/p75": 7.547438144683838e-06, "policy_entropy/var": 0.012275958433747292, "policy_loss": -0.75, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -0.75, "policy_loss/var": 0.1894736886024475, "policy_sharpness": 9.501884460449219, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 3.1491260528564453, "reward": 0.75, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.75, "reward/p75": 1.0, "reward/var": 0.1894736886024475, "rewards/accuracy_reward": 0.75, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.75, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.1894736886024475, "sentence_fisher_curvature": 182425.71875, "sentence_fisher_curvature/max": 860160.0, "sentence_fisher_curvature/median": 149504.0, "sentence_fisher_curvature/min": 256.0, "sentence_fisher_curvature/p25": 16840.0, "sentence_fisher_curvature/p75": 271360.0, "sentence_fisher_curvature/p85": 331264.0, "sentence_fisher_curvature/p90": 409600.0, "sentence_fisher_curvature/p95": 523776.0, "sentence_fisher_curvature/p99": 622797.5625, "sentence_fisher_curvature/var": 31076227072.0, "sentence_fisher_kl_divergence": 2.0205261535011232e-05, "sentence_fisher_kl_divergence/max": 9.5367431640625e-05, "sentence_fisher_kl_divergence/median": 1.6570091247558594e-05, "sentence_fisher_kl_divergence/min": 2.8405338525772095e-08, "sentence_fisher_kl_divergence/p25": 1.8617138266563416e-06, "sentence_fisher_kl_divergence/p75": 3.0040740966796875e-05, "sentence_fisher_kl_divergence/p85": 3.6656856536865234e-05, "sentence_fisher_kl_divergence/p90": 4.5299530029296875e-05, "sentence_fisher_kl_divergence/p95": 5.799531936645508e-05, "sentence_fisher_kl_divergence/p99": 6.909378862474114e-05, "sentence_fisher_kl_divergence/var": 3.8138023294997936e-10, "sentence_full_gradient_variance/max_squared_error": 3632.365966796875, "sentence_full_gradient_variance/metric": 3632.365966796875, "sentence_full_gradient_variance/p75": 3632.365966796875, "sentence_full_gradient_variance/p90": 3632.365966796875, "sentence_full_gradient_variance/p95": 3632.365966796875, "sentence_full_gradient_variance/p99": 3632.365966796875, "sentence_full_update_term": 0.04203542321920395, "sentence_full_update_term/max": 0.26171875, "sentence_full_update_term/median": 0.036865234375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.010345458984375, "sentence_full_update_term/p75": 0.063720703125, "sentence_full_update_term/p85": 0.07275390625, "sentence_full_update_term/p90": 0.08447265625, "sentence_full_update_term/p95": 0.104248046875, "sentence_full_update_term/p99": 0.13369181752204895, "sentence_full_update_term/var": 0.001585817546583712, "sentence_hessian_coeff": 30283.458984375, "sentence_hessian_coeff/max": 462848.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -162816.0, "sentence_hessian_coeff/p25": -15024.0, "sentence_hessian_coeff/p75": 59072.0, "sentence_hessian_coeff/p99": 371405.09375, "sentence_hessian_coeff/var": 9577711616.0, "sentence_hessian_coeff_abs": 61580.54296875, "sentence_hessian_coeff_abs/max": 462848.0, "sentence_hessian_coeff_abs/median": 39168.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 723.0, "sentence_hessian_coeff_abs/p75": 80000.0, "sentence_hessian_coeff_abs/p99": 371405.09375, "sentence_hessian_coeff_abs/var": 6672372736.0, "step": 16, "token_fisher_curvature": 159378.734375, "token_fisher_curvature/max": 67108864.0, "token_fisher_curvature/median": 9.237055564881302e-14, "token_fisher_curvature/min": 2.865267480129326e-38, "token_fisher_curvature/p25": 1.0232158002831948e-18, "token_fisher_curvature/p75": 1.0826624929904938e-08, "token_fisher_curvature/p85": 7.539987564086914e-06, "token_fisher_curvature/p90": 0.0017242431640625, "token_fisher_curvature/p95": 51.7626953125, "token_fisher_curvature/p99": 1294528.0, "token_fisher_curvature/var": 4637611524096.0, "token_fisher_kl_divergence": 1.7655758711043745e-05, "token_fisher_kl_divergence/max": 0.0074462890625, "token_fisher_kl_divergence/median": 1.0236360080343717e-23, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.135959703518257e-28, "token_fisher_kl_divergence/p75": 1.1993986533120893e-18, "token_fisher_kl_divergence/p85": 8.36136715420821e-16, "token_fisher_kl_divergence/p90": 1.9095836023552692e-13, "token_fisher_kl_divergence/p95": 5.734932528866921e-09, "token_fisher_kl_divergence/p99": 0.00014307349920272827, "token_fisher_kl_divergence/var": 5.691312665589976e-08, "token_full_update_term": 0.0006158525357022882, "token_full_update_term/max": 0.12890625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.7120113372802734e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.4640736379951704e-12, "token_full_update_term/p85": 1.7316779121756554e-09, "token_full_update_term/p90": 4.353933036327362e-08, "token_full_update_term/p95": 8.225440979003906e-06, "token_full_update_term/p99": 0.0162353515625, "token_full_update_term/var": 3.6218141758581623e-05, "token_hessian_coeff": 11095.85546875, "token_hessian_coeff/max": 65273856.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -8650752.0, "token_hessian_coeff/p25": -0.000545501708984375, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.5508270263671875, "token_hessian_coeff/var": 3118199472128.0, "token_hessian_coeff_abs": 151756.515625, "token_hessian_coeff_abs/max": 65273856.0, "token_hessian_coeff_abs/median": 1.3317912817001343e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.002685546875, "token_hessian_coeff_abs/p99": 4194304.0, "token_hessian_coeff_abs/var": 3095292542976.0 }, { "accuracy_reward": 0.78125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.17269739508628845, "adam_stats/lm_head/lr_effective_max": 7.768039358779788e-05, "adam_stats/lm_head/lr_effective_mean": -4.318794627478084e-11, "adam_stats/lm_head/lr_effective_min": -7.756787090329453e-05, "adam_stats/lm_head/lr_effective_std": 1.919905116665177e-06, "adam_stats/lr_effective_max": 9.210177086060867e-05, "adam_stats/lr_effective_mean": -2.0112898846402771e-10, "adam_stats/lr_effective_min": -9.31105823838152e-05, "adam_stats/m_t_max": 0.007395870517939329, "adam_stats/m_t_mean": -3.88857210320559e-11, "adam_stats/m_t_min": -0.004859281703829765, "adam_stats/v_t_max": 2.6367222744738683e-05, "adam_stats/v_t_mean": 1.7269240508085981e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.78125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.17269739508628845, "all_logprobs": -0.017207054421305656, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.34375, "all_logprobs/p1": -0.474609375, "all_logprobs/p10": -5.817413330078125e-05, "all_logprobs/p25": -4.76837158203125e-07, "all_logprobs/p5": -0.004180908203125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.028875719755887985, "clip_ratio": 0.0, "completion_length": 436.5, "completion_length/correct": 376.8933410644531, "completion_length/correct/max": 914.0, "completion_length/correct/median": 331.0, "completion_length/correct/min": 226.0, "completion_length/correct/p25": 276.0, "completion_length/correct/p75": 479.0, "completion_length/correct/var": 20448.74609375, "completion_length/incorrect": 649.3809814453125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 649.0, "completion_length/incorrect/min": 277.0, "completion_length/incorrect/p25": 327.0, "completion_length/incorrect/p75": 811.0, "completion_length/incorrect/var": 68792.84375, "completion_length/max": 1024.0, "completion_length/median": 333.0, "completion_length/min": 226.0, "completion_length/p25": 279.0, "completion_length/p75": 544.25, "completion_length/var": 43233.890625, "curvature_clip_ratio_token_fisher": 0.017182130366563797, "curvature_clip_ratio_token_hessian": 0.011597937904298306, "curvature_clip_ratio_total_fisher": 0.017182130366563797, "curvature_clip_ratio_total_full": 0.017182130366563797, "curvature_clip_ratio_total_hessian": 0.011597937904298306, "epoch": 0.0272, "feature_vector_variance/max_squared_error": 135968.59375, "feature_vector_variance/metric": 26492.421875, "generated_tokens/total": 1119083.0, "global_fisher_curvature": 90624.0, "global_fisher_curvature/max": 90624.0, "global_fisher_curvature/median": 90624.0, "global_fisher_curvature/min": 90624.0, "global_fisher_curvature/p25": 90624.0, "global_fisher_curvature/p75": 90624.0, "global_fisher_curvature/p85": 90624.0, "global_fisher_curvature/p90": 90624.0, "global_fisher_curvature/p95": 90624.0, "global_fisher_curvature/p99": 90624.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 9.953975677490234e-06, "global_fisher_kl_divergence/max": 9.953975677490234e-06, "global_fisher_kl_divergence/median": 9.953975677490234e-06, "global_fisher_kl_divergence/min": 9.953975677490234e-06, "global_fisher_kl_divergence/p25": 9.953975677490234e-06, "global_fisher_kl_divergence/p75": 9.953975677490234e-06, "global_fisher_kl_divergence/p85": 9.953975677490234e-06, "global_fisher_kl_divergence/p90": 9.953975677490234e-06, "global_fisher_kl_divergence/p95": 9.953975677490234e-06, "global_fisher_kl_divergence/p99": 9.953975677490234e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0830078125, "global_full_update_term/max": 0.0830078125, "global_full_update_term/median": 0.0830078125, "global_full_update_term/min": 0.0830078125, "global_full_update_term/p25": 0.0830078125, "global_full_update_term/p75": 0.0830078125, "global_full_update_term/p85": 0.0830078125, "global_full_update_term/p90": 0.0830078125, "global_full_update_term/p95": 0.0830078125, "global_full_update_term/p99": 0.0830078125, "global_full_update_term/var": NaN, "global_hessian_coeff": 22784.0, "global_hessian_coeff/max": 22784.0, "global_hessian_coeff/median": 22784.0, "global_hessian_coeff/min": 22784.0, "global_hessian_coeff/p25": 22784.0, "global_hessian_coeff/p75": 22784.0, "global_hessian_coeff/p99": 22784.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 22784.0, "global_hessian_coeff_abs/max": 22784.0, "global_hessian_coeff_abs/median": 22784.0, "global_hessian_coeff_abs/min": 22784.0, "global_hessian_coeff_abs/p25": 22784.0, "global_hessian_coeff_abs/p75": 22784.0, "global_hessian_coeff_abs/p99": 22784.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.06316431611776352, "learning_rate": 1.4777217947069972e-05, "loss": -0.7812, "masked_global_fisher_curvature": 211.0, "masked_global_fisher_curvature/max": 211.0, "masked_global_fisher_curvature/median": 211.0, "masked_global_fisher_curvature/min": 211.0, "masked_global_fisher_curvature/p25": 211.0, "masked_global_fisher_curvature/p75": 211.0, "masked_global_fisher_curvature/p85": 211.0, "masked_global_fisher_curvature/p90": 211.0, "masked_global_fisher_curvature/p95": 211.0, "masked_global_fisher_curvature/p99": 211.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/max": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/median": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/min": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p25": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p75": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p85": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p90": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p95": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p99": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00433349609375, "masked_global_full_update_term/max": 0.00433349609375, "masked_global_full_update_term/median": 0.00433349609375, "masked_global_full_update_term/min": 0.00433349609375, "masked_global_full_update_term/p25": 0.00433349609375, "masked_global_full_update_term/p75": 0.00433349609375, "masked_global_full_update_term/p85": 0.00433349609375, "masked_global_full_update_term/p90": 0.00433349609375, "masked_global_full_update_term/p95": 0.00433349609375, "masked_global_full_update_term/p99": 0.00433349609375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -3232.0, "masked_global_hessian_coeff/max": -3232.0, "masked_global_hessian_coeff/median": -3232.0, "masked_global_hessian_coeff/min": -3232.0, "masked_global_hessian_coeff/p25": -3232.0, "masked_global_hessian_coeff/p75": -3232.0, "masked_global_hessian_coeff/p99": -3232.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 3232.0, "masked_global_hessian_coeff_abs/max": 3232.0, "masked_global_hessian_coeff_abs/median": 3232.0, "masked_global_hessian_coeff_abs/min": 3232.0, "masked_global_hessian_coeff_abs/p25": 3232.0, "masked_global_hessian_coeff_abs/p75": 3232.0, "masked_global_hessian_coeff_abs/p99": 3232.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.4451904296875, "masked_per_sentence_gradient_norm/max": 9.9375, "masked_per_sentence_gradient_norm/median": 1.8125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.42724609375, "masked_per_sentence_gradient_norm/p75": 4.25, "masked_per_sentence_gradient_norm/var": 5.374481201171875, "masked_per_token_gradient_norm": 0.05663401633501053, "masked_per_token_gradient_norm/max": 10.8125, "masked_per_token_gradient_norm/median": 1.5934347175061703e-09, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.550201654434204e-06, "masked_per_token_gradient_norm/var": 0.27571767568588257, "masked_sentence_fisher_curvature": 312.013671875, "masked_sentence_fisher_curvature/max": 1004.0, "masked_sentence_fisher_curvature/median": 292.0, "masked_sentence_fisher_curvature/min": 12.4375, "masked_sentence_fisher_curvature/p25": 177.0, "masked_sentence_fisher_curvature/p75": 430.5, "masked_sentence_fisher_curvature/p85": 504.0, "masked_sentence_fisher_curvature/p90": 564.0, "masked_sentence_fisher_curvature/p95": 618.0, "masked_sentence_fisher_curvature/p99": 950.8001708984375, "masked_sentence_fisher_curvature/var": 39818.078125, "masked_sentence_fisher_kl_divergence": 3.4318723862725165e-08, "masked_sentence_fisher_kl_divergence/max": 1.103617250919342e-07, "masked_sentence_fisher_kl_divergence/median": 3.213062882423401e-08, "masked_sentence_fisher_kl_divergence/min": 1.367880031466484e-09, "masked_sentence_fisher_kl_divergence/p25": 1.9441358745098114e-08, "masked_sentence_fisher_kl_divergence/p75": 4.7322828322649e-08, "masked_sentence_fisher_kl_divergence/p85": 5.541369318962097e-08, "masked_sentence_fisher_kl_divergence/p90": 6.193295121192932e-08, "masked_sentence_fisher_kl_divergence/p95": 6.798654794692993e-08, "masked_sentence_fisher_kl_divergence/p99": 1.0461082666779475e-07, "masked_sentence_fisher_kl_divergence/var": 4.816110753475336e-16, "masked_sentence_full_gradient_variance/max_squared_error": 10.818937301635742, "masked_sentence_full_gradient_variance/metric": 10.818937301635742, "masked_sentence_full_gradient_variance/p75": 10.818937301635742, "masked_sentence_full_gradient_variance/p90": 10.818937301635742, "masked_sentence_full_gradient_variance/p95": 10.818937301635742, "masked_sentence_full_gradient_variance/p99": 10.818937301635742, "masked_sentence_full_update_term": 0.001709292409941554, "masked_sentence_full_update_term/max": 0.006072998046875, "masked_sentence_full_update_term/median": 0.00130462646484375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00035762786865234375, "masked_sentence_full_update_term/p75": 0.002899169921875, "masked_sentence_full_update_term/p85": 0.003559112548828125, "masked_sentence_full_update_term/p90": 0.00377655029296875, "masked_sentence_full_update_term/p95": 0.00424957275390625, "masked_sentence_full_update_term/p99": 0.0049133338034152985, "masked_sentence_full_update_term/var": 2.2177118808031082e-06, "masked_sentence_hessian_coeff": -9197.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -10176.0, "masked_sentence_hessian_coeff/min": -28544.0, "masked_sentence_hessian_coeff/p25": -12784.0, "masked_sentence_hessian_coeff/p75": -3076.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 43673928.0, "masked_sentence_hessian_coeff_abs": 9197.0, "masked_sentence_hessian_coeff_abs/max": 28544.0, "masked_sentence_hessian_coeff_abs/median": 9984.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 3076.0, "masked_sentence_hessian_coeff_abs/p75": 12784.0, "masked_sentence_hessian_coeff_abs/p99": 22099.220703125, "masked_sentence_hessian_coeff_abs/var": 43673928.0, "masked_token_fisher_curvature": 381.43804931640625, "masked_token_fisher_curvature/max": 90112.0, "masked_token_fisher_curvature/median": 1.9628743075372768e-13, "masked_token_fisher_curvature/min": 1.3224311446750734e-38, "masked_token_fisher_curvature/p25": 1.4433441421213278e-18, "masked_token_fisher_curvature/p75": 8.032657206058502e-09, "masked_token_fisher_curvature/p85": 2.049666363745928e-06, "masked_token_fisher_curvature/p90": 0.0001888275146484375, "masked_token_fisher_curvature/p95": 0.494140625, "masked_token_fisher_curvature/p99": 9024.0, "masked_token_fisher_curvature/var": 16171923.0, "masked_token_fisher_kl_divergence": 4.19790353589633e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 2.1610093502947848e-23, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.5856104194942337e-28, "masked_token_fisher_kl_divergence/p75": 8.809142651444724e-19, "masked_token_fisher_kl_divergence/p85": 2.255140518769849e-16, "masked_token_fisher_kl_divergence/p90": 2.0761170560490427e-14, "masked_token_fisher_kl_divergence/p95": 5.434230843093246e-11, "masked_token_fisher_kl_divergence/p99": 9.909272193908691e-07, "masked_token_fisher_kl_divergence/var": 1.9592163449325822e-13, "masked_token_full_update_term": 2.321865758858621e-05, "masked_token_full_update_term/max": 0.004241943359375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.2202730178833008e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 6.139089236967266e-12, "masked_token_full_update_term/p85": 8.751186442168546e-10, "masked_token_full_update_term/p90": 1.3445969671010971e-08, "masked_token_full_update_term/p95": 6.742775440216064e-07, "masked_token_full_update_term/p99": 0.000782012939453125, "masked_token_full_update_term/var": 4.826117461220747e-08, "masked_token_hessian_coeff": -11356.4912109375, "masked_token_hessian_coeff/max": 163.0, "masked_token_hessian_coeff/median": -6.184563972055912e-11, "masked_token_hessian_coeff/min": -2228224.0, "masked_token_hessian_coeff/p25": -0.0007476806640625, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.08203125, "masked_token_hessian_coeff/var": 11785893888.0, "masked_token_hessian_coeff_abs": 11356.599609375, "masked_token_hessian_coeff_abs/max": 2228224.0, "masked_token_hessian_coeff_abs/median": 3.855675458908081e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.00274658203125, "masked_token_hessian_coeff_abs/p99": 373088.0, "masked_token_hessian_coeff_abs/var": 11785890816.0, "mean_logprobs": -0.01611328125, "mean_logprobs/var": 0.00018405914306640625, "num_completions/total": 1632, "per_sentence_gradient_norm": 55.378257751464844, "per_sentence_gradient_norm/max": 181.0, "per_sentence_gradient_norm/median": 41.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 17.296875, "per_sentence_gradient_norm/p75": 78.625, "per_sentence_gradient_norm/var": 2656.917724609375, "per_token_feature_norm": 179.5385284423828, "per_token_feature_norm/max": 322.0, "per_token_feature_norm/median": 181.0, "per_token_feature_norm/min": 72.0, "per_token_feature_norm/p25": 170.0, "per_token_feature_norm/p75": 189.0, "per_token_feature_norm/var": 275.99163818359375, "per_token_gradient_norm": 1.276348352432251, "per_token_gradient_norm/max": 278.0, "per_token_gradient_norm/median": 2.2264430299401283e-09, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 4.887580871582031e-06, "per_token_gradient_norm/var": 162.20672607421875, "per_token_policy_error_norm": 0.009795734658837318, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.009602108038961887, "policy_entropy": 0.017635120078921318, "policy_entropy/max": 3.265625, "policy_entropy/median": 9.220093488693237e-08, "policy_entropy/min": 1.5178830414797062e-18, "policy_entropy/p25": 4.488356353249401e-10, "policy_entropy/p75": 8.344650268554688e-06, "policy_entropy/var": 0.0108876321464777, "policy_loss": -0.78125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.17269739508628845, "policy_sharpness": 9.522503852844238, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.9992077350616455, "reward": 0.78125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.17269739508628845, "rewards/accuracy_reward": 0.78125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.17269739508628845, "sentence_fisher_curvature": 183889.1875, "sentence_fisher_curvature/max": 1056768.0, "sentence_fisher_curvature/median": 151552.0, "sentence_fisher_curvature/min": 430.0, "sentence_fisher_curvature/p25": 34144.0, "sentence_fisher_curvature/p75": 272896.0, "sentence_fisher_curvature/p85": 339456.0, "sentence_fisher_curvature/p90": 444416.0, "sentence_fisher_curvature/p95": 491520.0, "sentence_fisher_curvature/p99": 784384.875, "sentence_fisher_curvature/var": 36545286144.0, "sentence_fisher_kl_divergence": 2.0240751837263815e-05, "sentence_fisher_kl_divergence/max": 0.0001163482666015625, "sentence_fisher_kl_divergence/median": 1.6689300537109375e-05, "sentence_fisher_kl_divergence/min": 4.7264620661735535e-08, "sentence_fisher_kl_divergence/p25": 3.7476420402526855e-06, "sentence_fisher_kl_divergence/p75": 2.9981136322021484e-05, "sentence_fisher_kl_divergence/p85": 3.737211227416992e-05, "sentence_fisher_kl_divergence/p90": 4.887580871582031e-05, "sentence_fisher_kl_divergence/p95": 5.412101745605469e-05, "sentence_fisher_kl_divergence/p99": 8.645067282486707e-05, "sentence_fisher_kl_divergence/var": 4.42899661390328e-10, "sentence_full_gradient_variance/max_squared_error": 5583.4921875, "sentence_full_gradient_variance/metric": 5583.4921875, "sentence_full_gradient_variance/p75": 5583.4921875, "sentence_full_gradient_variance/p90": 5583.4921875, "sentence_full_gradient_variance/p95": 5583.4921875, "sentence_full_gradient_variance/p99": 5583.4921875, "sentence_full_update_term": 0.04038429260253906, "sentence_full_update_term/max": 0.1416015625, "sentence_full_update_term/median": 0.0341796875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.013153076171875, "sentence_full_update_term/p75": 0.06207275390625, "sentence_full_update_term/p85": 0.078125, "sentence_full_update_term/p90": 0.08984375, "sentence_full_update_term/p95": 0.108154296875, "sentence_full_update_term/p99": 0.11979987472295761, "sentence_full_update_term/var": 0.0012357522500678897, "sentence_hessian_coeff": 25593.66796875, "sentence_hessian_coeff/max": 618496.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -167936.0, "sentence_hessian_coeff/p25": -39936.0, "sentence_hessian_coeff/p75": 50688.0, "sentence_hessian_coeff/p99": 392807.125, "sentence_hessian_coeff/var": 11474343936.0, "sentence_hessian_coeff_abs": 64497.66796875, "sentence_hessian_coeff_abs/max": 618496.0, "sentence_hessian_coeff_abs/median": 45312.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 9408.0, "sentence_hessian_coeff_abs/p75": 73728.0, "sentence_hessian_coeff_abs/p99": 392807.125, "sentence_hessian_coeff_abs/var": 7932537344.0, "step": 17, "token_fisher_curvature": 152203.875, "token_fisher_curvature/max": 69206016.0, "token_fisher_curvature/median": 2.9665159217984183e-13, "token_fisher_curvature/min": 1.3224311446750734e-38, "token_fisher_curvature/p25": 1.870248747537495e-18, "token_fisher_curvature/p75": 1.4319084584712982e-08, "token_fisher_curvature/p85": 7.539987564086914e-06, "token_fisher_curvature/p90": 0.0011985599994659424, "token_fisher_curvature/p95": 27.5, "token_fisher_curvature/p99": 1032192.0, "token_fisher_curvature/var": 4501898526720.0, "token_fisher_kl_divergence": 1.6748052075854503e-05, "token_fisher_kl_divergence/max": 0.00762939453125, "token_fisher_kl_divergence/median": 3.2673634195844593e-23, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.0510383535746307e-28, "token_fisher_kl_divergence/p75": 1.5788694136820158e-18, "token_fisher_kl_divergence/p85": 8.291978215169138e-16, "token_fisher_kl_divergence/p90": 1.318042897047178e-13, "token_fisher_kl_divergence/p95": 3.026798367500305e-09, "token_fisher_kl_divergence/p99": 0.00011348724365234375, "token_fisher_kl_divergence/var": 5.45032001753043e-08, "token_full_update_term": 0.0005761197535321116, "token_full_update_term/max": 0.130859375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.2202730178833008e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.1539214028744027e-11, "token_full_update_term/p85": 1.811713445931673e-09, "token_full_update_term/p90": 3.4226104617118835e-08, "token_full_update_term/p95": 3.4697004593908787e-06, "token_full_update_term/p99": 0.01446533203125, "token_full_update_term/var": 3.4326727472944185e-05, "token_hessian_coeff": 7550.58544921875, "token_hessian_coeff/max": 68157440.0, "token_hessian_coeff/median": -1.000444171950221e-10, "token_hessian_coeff/min": -8781824.0, "token_hessian_coeff/p25": -0.00098419189453125, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.287109375, "token_hessian_coeff/var": 2873608372224.0, "token_hessian_coeff_abs": 140592.09375, "token_hessian_coeff_abs/max": 68157440.0, "token_hessian_coeff_abs/median": 5.178153514862061e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.003879547119140625, "token_hessian_coeff_abs/p99": 3800576.0, "token_hessian_coeff_abs/var": 2853898813440.0 }, { "accuracy_reward": 0.7604166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.18410088121891022, "adam_stats/lm_head/lr_effective_max": 8.200210868380964e-05, "adam_stats/lm_head/lr_effective_mean": -3.082573085677609e-11, "adam_stats/lm_head/lr_effective_min": -7.550926966359839e-05, "adam_stats/lm_head/lr_effective_std": 1.8124512735084863e-06, "adam_stats/lr_effective_max": 9.125469659920782e-05, "adam_stats/lr_effective_mean": -8.196813366945221e-11, "adam_stats/lr_effective_min": -9.326289728051051e-05, "adam_stats/m_t_max": 0.006671065464615822, "adam_stats/m_t_mean": -3.531793260624916e-11, "adam_stats/m_t_min": -0.004442780744284391, "adam_stats/v_t_max": 2.634087832120713e-05, "adam_stats/v_t_mean": 1.7257407525575474e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7604166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.18410088121891022, "all_logprobs": -0.012674757279455662, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.75, "all_logprobs/p1": -0.33857429027557373, "all_logprobs/p10": -1.9156897906214e-05, "all_logprobs/p25": -1.1920928955078125e-07, "all_logprobs/p5": -0.0011749267578125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01773589290678501, "clip_ratio": 0.0, "completion_length": 530.1458740234375, "completion_length/correct": 442.9315185546875, "completion_length/correct/max": 1007.0, "completion_length/correct/median": 428.0, "completion_length/correct/min": 171.0, "completion_length/correct/p25": 358.0, "completion_length/correct/p75": 531.0, "completion_length/correct/var": 25922.83984375, "completion_length/incorrect": 806.95654296875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 781.0, "completion_length/incorrect/min": 327.0, "completion_length/incorrect/p25": 676.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 39165.58984375, "completion_length/max": 1024.0, "completion_length/median": 490.0, "completion_length/min": 171.0, "completion_length/p25": 373.0, "completion_length/p75": 666.25, "completion_length/var": 53112.69140625, "curvature_clip_ratio_token_fisher": 0.015797540545463562, "curvature_clip_ratio_token_hessian": 0.010099422186613083, "curvature_clip_ratio_total_fisher": 0.015797540545463562, "curvature_clip_ratio_total_full": 0.015797540545463562, "curvature_clip_ratio_total_hessian": 0.010099422186613083, "epoch": 0.0288, "feature_vector_variance/max_squared_error": 134189.453125, "feature_vector_variance/metric": 25745.203125, "generated_tokens/total": 1169977.0, "global_fisher_curvature": 65024.0, "global_fisher_curvature/max": 65024.0, "global_fisher_curvature/median": 65024.0, "global_fisher_curvature/min": 65024.0, "global_fisher_curvature/p25": 65024.0, "global_fisher_curvature/p75": 65024.0, "global_fisher_curvature/p85": 65024.0, "global_fisher_curvature/p90": 65024.0, "global_fisher_curvature/p95": 65024.0, "global_fisher_curvature/p99": 65024.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.092952728271484e-06, "global_fisher_kl_divergence/max": 7.092952728271484e-06, "global_fisher_kl_divergence/median": 7.092952728271484e-06, "global_fisher_kl_divergence/min": 7.092952728271484e-06, "global_fisher_kl_divergence/p25": 7.092952728271484e-06, "global_fisher_kl_divergence/p75": 7.092952728271484e-06, "global_fisher_kl_divergence/p85": 7.092952728271484e-06, "global_fisher_kl_divergence/p90": 7.092952728271484e-06, "global_fisher_kl_divergence/p95": 7.092952728271484e-06, "global_fisher_kl_divergence/p99": 7.092952728271484e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.07177734375, "global_full_update_term/max": 0.07177734375, "global_full_update_term/median": 0.07177734375, "global_full_update_term/min": 0.07177734375, "global_full_update_term/p25": 0.07177734375, "global_full_update_term/p75": 0.07177734375, "global_full_update_term/p85": 0.07177734375, "global_full_update_term/p90": 0.07177734375, "global_full_update_term/p95": 0.07177734375, "global_full_update_term/p99": 0.07177734375, "global_full_update_term/var": NaN, "global_hessian_coeff": 15040.0, "global_hessian_coeff/max": 15040.0, "global_hessian_coeff/median": 15040.0, "global_hessian_coeff/min": 15040.0, "global_hessian_coeff/p25": 15040.0, "global_hessian_coeff/p75": 15040.0, "global_hessian_coeff/p99": 15040.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 15040.0, "global_hessian_coeff_abs/max": 15040.0, "global_hessian_coeff_abs/median": 15040.0, "global_hessian_coeff_abs/min": 15040.0, "global_hessian_coeff_abs/p25": 15040.0, "global_hessian_coeff_abs/p75": 15040.0, "global_hessian_coeff_abs/p99": 15040.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04388130083680153, "learning_rate": 1.4709462719537392e-05, "loss": -0.7604, "masked_global_fisher_curvature": 145.0, "masked_global_fisher_curvature/max": 145.0, "masked_global_fisher_curvature/median": 145.0, "masked_global_fisher_curvature/min": 145.0, "masked_global_fisher_curvature/p25": 145.0, "masked_global_fisher_curvature/p75": 145.0, "masked_global_fisher_curvature/p85": 145.0, "masked_global_fisher_curvature/p90": 145.0, "masked_global_fisher_curvature/p95": 145.0, "masked_global_fisher_curvature/p99": 145.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/max": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/median": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/min": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/p25": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/p75": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/p85": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/p90": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/p95": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/p99": 1.5832483768463135e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00244140625, "masked_global_full_update_term/max": 0.00244140625, "masked_global_full_update_term/median": 0.00244140625, "masked_global_full_update_term/min": 0.00244140625, "masked_global_full_update_term/p25": 0.00244140625, "masked_global_full_update_term/p75": 0.00244140625, "masked_global_full_update_term/p85": 0.00244140625, "masked_global_full_update_term/p90": 0.00244140625, "masked_global_full_update_term/p95": 0.00244140625, "masked_global_full_update_term/p99": 0.00244140625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1784.0, "masked_global_hessian_coeff/max": -1784.0, "masked_global_hessian_coeff/median": -1784.0, "masked_global_hessian_coeff/min": -1784.0, "masked_global_hessian_coeff/p25": -1784.0, "masked_global_hessian_coeff/p75": -1784.0, "masked_global_hessian_coeff/p99": -1784.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1784.0, "masked_global_hessian_coeff_abs/max": 1784.0, "masked_global_hessian_coeff_abs/median": 1784.0, "masked_global_hessian_coeff_abs/min": 1784.0, "masked_global_hessian_coeff_abs/p25": 1784.0, "masked_global_hessian_coeff_abs/p75": 1784.0, "masked_global_hessian_coeff_abs/p99": 1784.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.8681234121322632, "masked_per_sentence_gradient_norm/max": 6.9375, "masked_per_sentence_gradient_norm/median": 1.4765625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.4755859375, "masked_per_sentence_gradient_norm/p75": 2.78125, "masked_per_sentence_gradient_norm/var": 2.7986810207366943, "masked_per_token_gradient_norm": 0.04604462534189224, "masked_per_token_gradient_norm/max": 10.4375, "masked_per_token_gradient_norm/median": 1.2369127944111824e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.0132789611816406e-06, "masked_per_token_gradient_norm/var": 0.2105240374803543, "masked_sentence_fisher_curvature": 220.9635467529297, "masked_sentence_fisher_curvature/max": 1020.0, "masked_sentence_fisher_curvature/median": 199.0, "masked_sentence_fisher_curvature/min": 8.375, "masked_sentence_fisher_curvature/p25": 101.625, "masked_sentence_fisher_curvature/p75": 310.0, "masked_sentence_fisher_curvature/p85": 353.0, "masked_sentence_fisher_curvature/p90": 382.0, "masked_sentence_fisher_curvature/p95": 441.0, "masked_sentence_fisher_curvature/p99": 594.4013671875, "masked_sentence_fisher_curvature/var": 23393.5859375, "masked_sentence_fisher_kl_divergence": 2.4116882713087762e-08, "masked_sentence_fisher_kl_divergence/max": 1.1129304766654968e-07, "masked_sentence_fisher_kl_divergence/median": 2.176966518163681e-08, "masked_sentence_fisher_kl_divergence/min": 9.1313268058002e-10, "masked_sentence_fisher_kl_divergence/p25": 1.1088559404015541e-08, "masked_sentence_fisher_kl_divergence/p75": 3.3760443329811096e-08, "masked_sentence_fisher_kl_divergence/p85": 3.8533471524715424e-08, "masked_sentence_fisher_kl_divergence/p90": 4.1676685214042664e-08, "masked_sentence_fisher_kl_divergence/p95": 4.807952791452408e-08, "masked_sentence_fisher_kl_divergence/p99": 6.48434834715772e-08, "masked_sentence_fisher_kl_divergence/var": 2.7848897470592656e-16, "masked_sentence_full_gradient_variance/max_squared_error": 6.044569969177246, "masked_sentence_full_gradient_variance/metric": 6.044569969177246, "masked_sentence_full_gradient_variance/p75": 6.044569969177246, "masked_sentence_full_gradient_variance/p90": 6.044569969177246, "masked_sentence_full_gradient_variance/p95": 6.044569969177246, "masked_sentence_full_gradient_variance/p99": 6.044569969177246, "masked_sentence_full_update_term": 0.001345137832686305, "masked_sentence_full_update_term/max": 0.005767822265625, "masked_sentence_full_update_term/median": 0.00109100341796875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00026226043701171875, "masked_sentence_full_update_term/p75": 0.002105712890625, "masked_sentence_full_update_term/p85": 0.002773284912109375, "masked_sentence_full_update_term/p90": 0.00301361083984375, "masked_sentence_full_update_term/p95": 0.003688812255859375, "masked_sentence_full_update_term/p99": 0.004260258749127388, "masked_sentence_full_update_term/var": 1.5828663890715688e-06, "masked_sentence_hessian_coeff": -7013.25, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -7104.0, "masked_sentence_hessian_coeff/min": -19456.0, "masked_sentence_hessian_coeff/p25": -10608.0, "masked_sentence_hessian_coeff/p75": -1986.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 31694272.0, "masked_sentence_hessian_coeff_abs": 7013.25, "masked_sentence_hessian_coeff_abs/max": 19456.0, "masked_sentence_hessian_coeff_abs/median": 6912.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 1986.0, "masked_sentence_hessian_coeff_abs/p75": 10608.0, "masked_sentence_hessian_coeff_abs/p99": 19456.0, "masked_sentence_hessian_coeff_abs/var": 31694272.0, "masked_token_fisher_curvature": 302.6611328125, "masked_token_fisher_curvature/max": 91136.0, "masked_token_fisher_curvature/median": 1.5765166949677223e-14, "masked_token_fisher_curvature/min": 6.428484731059385e-40, "masked_token_fisher_curvature/p25": 9.571472303973594e-20, "masked_token_fisher_curvature/p75": 6.039044819772243e-10, "masked_token_fisher_curvature/p85": 1.601874828338623e-07, "masked_token_fisher_curvature/p90": 1.1146068572998047e-05, "masked_token_fisher_curvature/p95": 0.022675037384033203, "masked_token_fisher_curvature/p99": 4544.0, "masked_token_fisher_curvature/var": 13929727.0, "masked_token_fisher_kl_divergence": 3.3043331626458894e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.7189847104617606e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.0452406994178406e-29, "masked_token_fisher_kl_divergence/p75": 6.606856988583543e-20, "masked_token_fisher_kl_divergence/p85": 1.745565497701662e-17, "masked_token_fisher_kl_divergence/p90": 1.214306433183765e-15, "masked_token_fisher_kl_divergence/p95": 2.475575300309174e-12, "masked_token_fisher_kl_divergence/p99": 4.954636096954346e-07, "masked_token_fisher_kl_divergence/var": 1.6600926904843105e-13, "masked_token_full_update_term": 1.9117920601274818e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.5422701835632324e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 9.379164112033322e-13, "masked_token_full_update_term/p85": 1.4642864698544145e-10, "masked_token_full_update_term/p90": 2.1245796233415604e-09, "masked_token_full_update_term/p95": 1.5832483768463135e-07, "masked_token_full_update_term/p99": 0.0006802678108215332, "masked_token_full_update_term/var": 3.838649575982345e-08, "masked_token_hessian_coeff": -9367.521484375, "masked_token_hessian_coeff/max": 748.0, "masked_token_hessian_coeff/median": -6.714628852932947e-13, "masked_token_hessian_coeff/min": -2195456.0, "masked_token_hessian_coeff/p25": -0.00014019012451171875, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.10791015625, "masked_token_hessian_coeff/var": 9518136320.0, "masked_token_hessian_coeff_abs": 9367.7353515625, "masked_token_hessian_coeff_abs/max": 2195456.0, "masked_token_hessian_coeff_abs/median": 3.632158041000366e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.000667572021484375, "masked_token_hessian_coeff_abs/p99": 303104.0, "masked_token_hessian_coeff_abs/var": 9518132224.0, "mean_logprobs": -0.012451171875, "mean_logprobs/var": 0.00011444091796875, "num_completions/total": 1728, "per_sentence_gradient_norm": 47.184898376464844, "per_sentence_gradient_norm/max": 139.0, "per_sentence_gradient_norm/median": 36.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 14.4375, "per_sentence_gradient_norm/p75": 75.0, "per_sentence_gradient_norm/var": 1809.3453369140625, "per_token_feature_norm": 177.54425048828125, "per_token_feature_norm/max": 318.0, "per_token_feature_norm/median": 179.0, "per_token_feature_norm/min": 79.5, "per_token_feature_norm/p25": 168.0, "per_token_feature_norm/p75": 188.0, "per_token_feature_norm/var": 261.5390625, "per_token_gradient_norm": 1.1522183418273926, "per_token_gradient_norm/max": 288.0, "per_token_gradient_norm/median": 1.6370904631912708e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.3709068298339844e-06, "per_token_gradient_norm/var": 144.53321838378906, "per_token_policy_error_norm": 0.007450496312230825, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.007002072874456644, "policy_entropy": 0.013988143764436245, "policy_entropy/max": 3.125, "policy_entropy/median": 2.8870999813079834e-08, "policy_entropy/min": 2.270048298641525e-19, "policy_entropy/p25": 1.0095391189679503e-10, "policy_entropy/p75": 2.86102294921875e-06, "policy_entropy/var": 0.008429648354649544, "policy_loss": -0.7604166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.18410088121891022, "policy_sharpness": 9.614375114440918, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.4273009300231934, "reward": 0.7604166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.18410088121891022, "rewards/accuracy_reward": 0.7604166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.18410088121891022, "sentence_fisher_curvature": 157661.75, "sentence_fisher_curvature/max": 659456.0, "sentence_fisher_curvature/median": 112128.0, "sentence_fisher_curvature/min": 158.0, "sentence_fisher_curvature/p25": 22400.0, "sentence_fisher_curvature/p75": 259584.0, "sentence_fisher_curvature/p85": 335872.0, "sentence_fisher_curvature/p90": 377856.0, "sentence_fisher_curvature/p95": 482304.0, "sentence_fisher_curvature/p99": 585523.4375, "sentence_fisher_curvature/var": 25754284032.0, "sentence_fisher_kl_divergence": 1.720966974971816e-05, "sentence_fisher_kl_divergence/max": 7.200241088867188e-05, "sentence_fisher_kl_divergence/median": 1.2218952178955078e-05, "sentence_fisher_kl_divergence/min": 1.7229467630386353e-08, "sentence_fisher_kl_divergence/p25": 2.4437904357910156e-06, "sentence_fisher_kl_divergence/p75": 2.8312206268310547e-05, "sentence_fisher_kl_divergence/p85": 3.6716461181640625e-05, "sentence_fisher_kl_divergence/p90": 4.124641418457031e-05, "sentence_fisher_kl_divergence/p95": 5.257129669189453e-05, "sentence_fisher_kl_divergence/p99": 6.384852167684585e-05, "sentence_fisher_kl_divergence/var": 3.068054699184586e-10, "sentence_full_gradient_variance/max_squared_error": 3933.912109375, "sentence_full_gradient_variance/metric": 3933.912109375, "sentence_full_gradient_variance/p75": 3933.912109375, "sentence_full_gradient_variance/p90": 3933.912109375, "sentence_full_gradient_variance/p95": 3933.912109375, "sentence_full_gradient_variance/p99": 3933.912109375, "sentence_full_update_term": 0.037527721375226974, "sentence_full_update_term/max": 0.14453125, "sentence_full_update_term/median": 0.029296875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00897216796875, "sentence_full_update_term/p75": 0.0623779296875, "sentence_full_update_term/p85": 0.0782470703125, "sentence_full_update_term/p90": 0.090087890625, "sentence_full_update_term/p95": 0.09912109375, "sentence_full_update_term/p99": 0.12412115931510925, "sentence_full_update_term/var": 0.0012291163438931108, "sentence_hessian_coeff": 22260.10546875, "sentence_hessian_coeff/max": 385024.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -185344.0, "sentence_hessian_coeff/p25": -32800.0, "sentence_hessian_coeff/p75": 54336.0, "sentence_hessian_coeff/p99": 314982.625, "sentence_hessian_coeff/var": 9677115392.0, "sentence_hessian_coeff_abs": 64005.8984375, "sentence_hessian_coeff_abs/max": 385024.0, "sentence_hessian_coeff_abs/median": 44800.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 1725.5, "sentence_hessian_coeff_abs/p75": 93696.0, "sentence_hessian_coeff_abs/p99": 314982.625, "sentence_hessian_coeff_abs/var": 6037965312.0, "step": 18, "token_fisher_curvature": 136569.0625, "token_fisher_curvature/max": 70254592.0, "token_fisher_curvature/median": 2.220446049250313e-14, "token_fisher_curvature/min": 6.428484731059385e-40, "token_fisher_curvature/p25": 1.1604351377383915e-19, "token_fisher_curvature/p75": 1.0331859812140465e-09, "token_fisher_curvature/p85": 4.5634806156158447e-07, "token_fisher_curvature/p90": 6.103515625e-05, "token_fisher_curvature/p95": 2.3077392578125, "token_fisher_curvature/p99": 610304.0, "token_fisher_curvature/var": 3946816471040.0, "token_fisher_kl_divergence": 1.4908957382431254e-05, "token_fisher_kl_divergence/max": 0.007659912109375, "token_fisher_kl_divergence/median": 2.4298430493745188e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.262177448353619e-29, "token_fisher_kl_divergence/p75": 1.1265538198482195e-19, "token_fisher_kl_divergence/p85": 4.9873299934333204e-17, "token_fisher_kl_divergence/p90": 6.661338147750939e-15, "token_fisher_kl_divergence/p95": 2.5228530375898117e-10, "token_fisher_kl_divergence/p99": 6.67572021484375e-05, "token_fisher_kl_divergence/var": 4.704478584471872e-08, "token_full_update_term": 0.0005216147983446717, "token_full_update_term/max": 0.130859375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.5422701835632324e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.7035262089848402e-12, "token_full_update_term/p85": 2.6557245291769505e-10, "token_full_update_term/p90": 5.73345459997654e-09, "token_full_update_term/p95": 9.860668797045946e-07, "token_full_update_term/p99": 0.010384559631347656, "token_full_update_term/var": 3.0594703275710344e-05, "token_hessian_coeff": 5713.7119140625, "token_hessian_coeff/max": 69206016.0, "token_hessian_coeff/median": -1.4210854715202004e-12, "token_hessian_coeff/min": -8847360.0, "token_hessian_coeff/p25": -0.00018978118896484375, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.34765625, "token_hessian_coeff/var": 2575017705472.0, "token_hessian_coeff_abs": 125619.8984375, "token_hessian_coeff_abs/max": 69206016.0, "token_hessian_coeff_abs/median": 4.959292709827423e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.0009918212890625, "token_hessian_coeff_abs/p99": 3375104.0, "token_hessian_coeff_abs/var": 2559269404672.0 }, { "accuracy_reward": 0.75, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.75, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.18947365880012512, "adam_stats/lm_head/lr_effective_max": 7.539070065831766e-05, "adam_stats/lm_head/lr_effective_mean": 6.900947695231974e-12, "adam_stats/lm_head/lr_effective_min": -7.22859040251933e-05, "adam_stats/lm_head/lr_effective_std": 1.8125595033779973e-06, "adam_stats/lr_effective_max": 9.082727774512023e-05, "adam_stats/lr_effective_mean": -5.206943903823813e-11, "adam_stats/lr_effective_min": -8.959801925811917e-05, "adam_stats/m_t_max": 0.006362540181726217, "adam_stats/m_t_mean": -2.927583869771766e-11, "adam_stats/m_t_min": -0.004437956027686596, "adam_stats/v_t_max": 2.6327395971748047e-05, "adam_stats/v_t_mean": 1.736282883961493e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.75, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.75, "advantages/p75": 1.0, "advantages/var": 0.18947365880012512, "all_logprobs": -0.021571360528469086, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.0, "all_logprobs/p1": -0.69140625, "all_logprobs/p10": -9.632110595703125e-05, "all_logprobs/p25": -4.76837158203125e-07, "all_logprobs/p5": -0.0067138671875, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.03511880710721016, "clip_ratio": 0.0, "completion_length": 639.3333740234375, "completion_length/correct": 571.9722290039062, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 567.0, "completion_length/correct/min": 185.0, "completion_length/correct/p25": 430.75, "completion_length/correct/p75": 725.25, "completion_length/correct/var": 45193.9140625, "completion_length/incorrect": 841.4166870117188, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 429.0, "completion_length/incorrect/p25": 602.25, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 54956.6015625, "completion_length/max": 1024.0, "completion_length/median": 597.0, "completion_length/min": 185.0, "completion_length/p25": 493.0, "completion_length/p75": 839.0, "completion_length/var": 60837.640625, "curvature_clip_ratio_token_fisher": 0.022451773285865784, "curvature_clip_ratio_token_hessian": 0.01648852974176407, "curvature_clip_ratio_total_fisher": 0.022451773285865784, "curvature_clip_ratio_total_full": 0.022451773285865784, "curvature_clip_ratio_total_hessian": 0.01648852974176407, "epoch": 0.0304, "feature_vector_variance/max_squared_error": 74209.5546875, "feature_vector_variance/metric": 26354.232421875, "generated_tokens/total": 1231353.0, "global_fisher_curvature": 69120.0, "global_fisher_curvature/max": 69120.0, "global_fisher_curvature/median": 69120.0, "global_fisher_curvature/min": 69120.0, "global_fisher_curvature/p25": 69120.0, "global_fisher_curvature/p75": 69120.0, "global_fisher_curvature/p85": 69120.0, "global_fisher_curvature/p90": 69120.0, "global_fisher_curvature/p95": 69120.0, "global_fisher_curvature/p99": 69120.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.4803829193115234e-06, "global_fisher_kl_divergence/max": 7.4803829193115234e-06, "global_fisher_kl_divergence/median": 7.4803829193115234e-06, "global_fisher_kl_divergence/min": 7.4803829193115234e-06, "global_fisher_kl_divergence/p25": 7.4803829193115234e-06, "global_fisher_kl_divergence/p75": 7.4803829193115234e-06, "global_fisher_kl_divergence/p85": 7.4803829193115234e-06, "global_fisher_kl_divergence/p90": 7.4803829193115234e-06, "global_fisher_kl_divergence/p95": 7.4803829193115234e-06, "global_fisher_kl_divergence/p99": 7.4803829193115234e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.10009765625, "global_full_update_term/max": 0.10009765625, "global_full_update_term/median": 0.10009765625, "global_full_update_term/min": 0.10009765625, "global_full_update_term/p25": 0.10009765625, "global_full_update_term/p75": 0.10009765625, "global_full_update_term/p85": 0.10009765625, "global_full_update_term/p90": 0.10009765625, "global_full_update_term/p95": 0.10009765625, "global_full_update_term/p99": 0.10009765625, "global_full_update_term/var": NaN, "global_hessian_coeff": 10112.0, "global_hessian_coeff/max": 10112.0, "global_hessian_coeff/median": 10112.0, "global_hessian_coeff/min": 10112.0, "global_hessian_coeff/p25": 10112.0, "global_hessian_coeff/p75": 10112.0, "global_hessian_coeff/p99": 10112.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 10112.0, "global_hessian_coeff_abs/max": 10112.0, "global_hessian_coeff_abs/median": 10112.0, "global_hessian_coeff_abs/min": 10112.0, "global_hessian_coeff_abs/p25": 10112.0, "global_hessian_coeff_abs/p75": 10112.0, "global_hessian_coeff_abs/p99": 10112.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.15604177117347717, "learning_rate": 1.4632923872213653e-05, "loss": -0.75, "masked_global_fisher_curvature": 488.0, "masked_global_fisher_curvature/max": 488.0, "masked_global_fisher_curvature/median": 488.0, "masked_global_fisher_curvature/min": 488.0, "masked_global_fisher_curvature/p25": 488.0, "masked_global_fisher_curvature/p75": 488.0, "masked_global_fisher_curvature/p85": 488.0, "masked_global_fisher_curvature/p90": 488.0, "masked_global_fisher_curvature/p95": 488.0, "masked_global_fisher_curvature/p99": 488.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/max": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/median": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/min": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/p25": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/p75": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/p85": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/p90": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/p95": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/p99": 5.2852556109428406e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00347900390625, "masked_global_full_update_term/max": 0.00347900390625, "masked_global_full_update_term/median": 0.00347900390625, "masked_global_full_update_term/min": 0.00347900390625, "masked_global_full_update_term/p25": 0.00347900390625, "masked_global_full_update_term/p75": 0.00347900390625, "masked_global_full_update_term/p85": 0.00347900390625, "masked_global_full_update_term/p90": 0.00347900390625, "masked_global_full_update_term/p95": 0.00347900390625, "masked_global_full_update_term/p99": 0.00347900390625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2192.0, "masked_global_hessian_coeff/max": -2192.0, "masked_global_hessian_coeff/median": -2192.0, "masked_global_hessian_coeff/min": -2192.0, "masked_global_hessian_coeff/p25": -2192.0, "masked_global_hessian_coeff/p75": -2192.0, "masked_global_hessian_coeff/p99": -2192.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2192.0, "masked_global_hessian_coeff_abs/max": 2192.0, "masked_global_hessian_coeff_abs/median": 2192.0, "masked_global_hessian_coeff_abs/min": 2192.0, "masked_global_hessian_coeff_abs/p25": 2192.0, "masked_global_hessian_coeff_abs/p75": 2192.0, "masked_global_hessian_coeff_abs/p99": 2192.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.3084716796875, "masked_per_sentence_gradient_norm/max": 10.125, "masked_per_sentence_gradient_norm/median": 1.21875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.421875, "masked_per_sentence_gradient_norm/p75": 2.84375, "masked_per_sentence_gradient_norm/var": 7.717825412750244, "masked_per_token_gradient_norm": 0.05507926642894745, "masked_per_token_gradient_norm/max": 11.25, "masked_per_token_gradient_norm/median": 4.2382453102618456e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.5944242477416992e-06, "masked_per_token_gradient_norm/var": 0.2620411515235901, "masked_sentence_fisher_curvature": 282.28582763671875, "masked_sentence_fisher_curvature/max": 1200.0, "masked_sentence_fisher_curvature/median": 237.0, "masked_sentence_fisher_curvature/min": 12.8125, "masked_sentence_fisher_curvature/p25": 154.25, "masked_sentence_fisher_curvature/p75": 395.5, "masked_sentence_fisher_curvature/p85": 457.5, "masked_sentence_fisher_curvature/p90": 536.0, "masked_sentence_fisher_curvature/p95": 569.0, "masked_sentence_fisher_curvature/p99": 789.601318359375, "masked_sentence_fisher_curvature/var": 36822.6484375, "masked_sentence_fisher_kl_divergence": 3.052484132126665e-08, "masked_sentence_fisher_kl_divergence/max": 1.2945383787155151e-07, "masked_sentence_fisher_kl_divergence/median": 2.561137080192566e-08, "masked_sentence_fisher_kl_divergence/min": 1.3897079043090343e-09, "masked_sentence_fisher_kl_divergence/p25": 1.6676494851708412e-08, "masked_sentence_fisher_kl_divergence/p75": 4.2782630771398544e-08, "masked_sentence_fisher_kl_divergence/p85": 4.9534719437360764e-08, "masked_sentence_fisher_kl_divergence/p90": 5.797483026981354e-08, "masked_sentence_fisher_kl_divergence/p95": 6.146728992462158e-08, "masked_sentence_fisher_kl_divergence/p99": 8.521615768586344e-08, "masked_sentence_fisher_kl_divergence/var": 4.2945504809012967e-16, "masked_sentence_full_gradient_variance/max_squared_error": 12.357099533081055, "masked_sentence_full_gradient_variance/metric": 12.357099533081055, "masked_sentence_full_gradient_variance/p75": 12.357099533081055, "masked_sentence_full_gradient_variance/p90": 12.357099533081055, "masked_sentence_full_gradient_variance/p95": 12.357099533081055, "masked_sentence_full_gradient_variance/p99": 12.357099533081055, "masked_sentence_full_update_term": 0.0016922156792134047, "masked_sentence_full_update_term/max": 0.010009765625, "masked_sentence_full_update_term/median": 0.000934600830078125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0002560615539550781, "masked_sentence_full_update_term/p75": 0.002597808837890625, "masked_sentence_full_update_term/p85": 0.003536224365234375, "masked_sentence_full_update_term/p90": 0.00469970703125, "masked_sentence_full_update_term/p95": 0.0048675537109375, "masked_sentence_full_update_term/p99": 0.008850101381540298, "masked_sentence_full_update_term/var": 3.642728188424371e-06, "masked_sentence_hessian_coeff": -8450.583984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -7584.0, "masked_sentence_hessian_coeff/min": -47360.0, "masked_sentence_hessian_coeff/p25": -12080.0, "masked_sentence_hessian_coeff/p75": -1314.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 63986700.0, "masked_sentence_hessian_coeff_abs": 8450.583984375, "masked_sentence_hessian_coeff_abs/max": 47360.0, "masked_sentence_hessian_coeff_abs/median": 6912.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 1314.0, "masked_sentence_hessian_coeff_abs/p75": 12080.0, "masked_sentence_hessian_coeff_abs/p99": 32038.44921875, "masked_sentence_hessian_coeff_abs/var": 63986700.0, "masked_token_fisher_curvature": 360.1579284667969, "masked_token_fisher_curvature/max": 92160.0, "masked_token_fisher_curvature/median": 5.861977570020827e-14, "masked_token_fisher_curvature/min": 9.183549615799121e-41, "masked_token_fisher_curvature/p25": 3.2187251995663413e-19, "masked_token_fisher_curvature/p75": 5.529727786779404e-09, "masked_token_fisher_curvature/p85": 2.3692846298217773e-06, "masked_token_fisher_curvature/p90": 0.00021895766258239746, "masked_token_fisher_curvature/p95": 0.5390625, "masked_token_fisher_curvature/p99": 8576.0, "masked_token_fisher_curvature/var": 15042407.0, "masked_token_fisher_kl_divergence": 3.8962756576665925e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 6.333101564859118e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.490709505602977e-29, "masked_token_fisher_kl_divergence/p75": 5.996993266560446e-19, "masked_token_fisher_kl_divergence/p85": 2.5673907444456745e-16, "masked_token_fisher_kl_divergence/p90": 2.3713669916602953e-14, "masked_token_fisher_kl_divergence/p95": 5.820766091346741e-11, "masked_token_fisher_kl_divergence/p99": 9.275972843170166e-07, "masked_token_fisher_kl_divergence/var": 1.760446985670655e-13, "masked_token_full_update_term": 2.342930565646384e-05, "masked_token_full_update_term/max": 0.004302978515625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.384185791015625e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.2879476091475226e-12, "masked_token_full_update_term/p85": 3.682600890897447e-10, "masked_token_full_update_term/p90": 8.614733815193176e-09, "masked_token_full_update_term/p95": 5.21540641784668e-07, "masked_token_full_update_term/p99": 0.00074005126953125, "masked_token_full_update_term/var": 4.913612627888142e-08, "masked_token_hessian_coeff": -11809.0810546875, "masked_token_hessian_coeff/max": 696.0, "masked_token_hessian_coeff/median": -2.764863893389702e-10, "masked_token_hessian_coeff/min": -2392064.0, "masked_token_hessian_coeff/p25": -0.0002727508544921875, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0835418701171875, "masked_token_hessian_coeff/var": 12786414592.0, "masked_token_hessian_coeff_abs": 11809.2197265625, "masked_token_hessian_coeff_abs/max": 2392064.0, "masked_token_hessian_coeff_abs/median": 1.019798219203949e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.0009307861328125, "masked_token_hessian_coeff_abs/p99": 360512.0, "masked_token_hessian_coeff_abs/var": 12786412544.0, "mean_logprobs": -0.0185546875, "mean_logprobs/var": 0.0004329681396484375, "num_completions/total": 1824, "per_sentence_gradient_norm": 50.376953125, "per_sentence_gradient_norm/max": 208.0, "per_sentence_gradient_norm/median": 39.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 6.375, "per_sentence_gradient_norm/p75": 84.5, "per_sentence_gradient_norm/var": 2270.846435546875, "per_token_feature_norm": 178.85348510742188, "per_token_feature_norm/max": 274.0, "per_token_feature_norm/median": 181.0, "per_token_feature_norm/min": 84.5, "per_token_feature_norm/p25": 170.0, "per_token_feature_norm/p75": 189.0, "per_token_feature_norm/var": 274.0639343261719, "per_token_gradient_norm": 1.7811708450317383, "per_token_gradient_norm/max": 268.0, "per_token_gradient_norm/median": 6.257323548197746e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.726912498474121e-06, "per_token_gradient_norm/var": 219.79623413085938, "per_token_policy_error_norm": 0.01163916289806366, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.010519823059439659, "policy_entropy": 0.02425958961248398, "policy_entropy/max": 2.359375, "policy_entropy/median": 5.634501576423645e-08, "policy_entropy/min": 1.1180834903756764e-19, "policy_entropy/p25": 2.1464074961841106e-10, "policy_entropy/p75": 8.165836334228516e-06, "policy_entropy/var": 0.02110801264643669, "policy_loss": -0.75, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -0.75, "policy_loss/var": 0.18947365880012512, "policy_sharpness": 9.452978134155273, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 3.58294677734375, "reward": 0.75, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.75, "reward/p75": 1.0, "reward/var": 0.18947365880012512, "rewards/accuracy_reward": 0.75, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.75, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.18947365880012512, "sentence_fisher_curvature": 168123.53125, "sentence_fisher_curvature/max": 630784.0, "sentence_fisher_curvature/median": 132096.0, "sentence_fisher_curvature/min": 382.0, "sentence_fisher_curvature/p25": 23544.0, "sentence_fisher_curvature/p75": 273408.0, "sentence_fisher_curvature/p85": 308224.0, "sentence_fisher_curvature/p90": 392192.0, "sentence_fisher_curvature/p95": 471552.0, "sentence_fisher_curvature/p99": 611328.0625, "sentence_fisher_curvature/var": 25129459712.0, "sentence_fisher_kl_divergence": 1.8185672161052935e-05, "sentence_fisher_kl_divergence/max": 6.818771362304688e-05, "sentence_fisher_kl_divergence/median": 1.430511474609375e-05, "sentence_fisher_kl_divergence/min": 4.1211023926734924e-08, "sentence_fisher_kl_divergence/p25": 2.5490298867225647e-06, "sentence_fisher_kl_divergence/p75": 2.956390380859375e-05, "sentence_fisher_kl_divergence/p85": 3.325939178466797e-05, "sentence_fisher_kl_divergence/p90": 4.2438507080078125e-05, "sentence_fisher_kl_divergence/p95": 5.1081180572509766e-05, "sentence_fisher_kl_divergence/p99": 6.592274439753965e-05, "sentence_fisher_kl_divergence/var": 2.9385477384735736e-10, "sentence_full_gradient_variance/max_squared_error": 4705.529296875, "sentence_full_gradient_variance/metric": 4705.529296875, "sentence_full_gradient_variance/p75": 4705.529296875, "sentence_full_gradient_variance/p90": 4705.529296875, "sentence_full_gradient_variance/p95": 4705.529296875, "sentence_full_gradient_variance/p99": 4705.529296875, "sentence_full_update_term": 0.044017791748046875, "sentence_full_update_term/max": 0.2021484375, "sentence_full_update_term/median": 0.034912109375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0055389404296875, "sentence_full_update_term/p75": 0.06591796875, "sentence_full_update_term/p85": 0.09228515625, "sentence_full_update_term/p90": 0.10498046875, "sentence_full_update_term/p95": 0.1239013671875, "sentence_full_update_term/p99": 0.20122070610523224, "sentence_full_update_term/var": 0.0020124358125030994, "sentence_hessian_coeff": 12806.0, "sentence_hessian_coeff/max": 374784.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -185344.0, "sentence_hessian_coeff/p25": -31616.0, "sentence_hessian_coeff/p75": 37632.0, "sentence_hessian_coeff/p99": 285286.6875, "sentence_hessian_coeff/var": 6437938688.0, "sentence_hessian_coeff_abs": 51362.0, "sentence_hessian_coeff_abs/max": 374784.0, "sentence_hessian_coeff_abs/median": 31616.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 3312.0, "sentence_hessian_coeff_abs/p75": 69888.0, "sentence_hessian_coeff_abs/p99": 285286.6875, "sentence_hessian_coeff_abs/var": 3937834752.0, "step": 19, "token_fisher_curvature": 243264.71875, "token_fisher_curvature/max": 67108864.0, "token_fisher_curvature/median": 9.769962616701378e-14, "token_fisher_curvature/min": 9.183549615799121e-41, "token_fisher_curvature/p25": 4.336808689942018e-19, "token_fisher_curvature/p75": 1.3329554349184036e-08, "token_fisher_curvature/p85": 1.0669231414794922e-05, "token_fisher_curvature/p90": 0.0028076171875, "token_fisher_curvature/p95": 159.0, "token_fisher_curvature/p99": 5472256.0, "token_fisher_curvature/var": 7382995828736.0, "token_fisher_kl_divergence": 2.631460847624112e-05, "token_fisher_kl_divergence/max": 0.00726318359375, "token_fisher_kl_divergence/median": 1.0546552810051103e-23, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.69372238606502e-29, "token_fisher_kl_divergence/p75": 1.4433441421213278e-18, "token_fisher_kl_divergence/p85": 1.1518563880486e-15, "token_fisher_kl_divergence/p90": 3.0375701953744283e-13, "token_fisher_kl_divergence/p95": 1.7229467630386353e-08, "token_fisher_kl_divergence/p99": 0.000591278076171875, "token_fisher_kl_divergence/var": 8.637853454729338e-08, "token_full_update_term": 0.0008596187690272927, "token_full_update_term/max": 0.1279296875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.384185791015625e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.838796030526282e-12, "token_full_update_term/p85": 1.1496013030409813e-09, "token_full_update_term/p90": 3.282912075519562e-08, "token_full_update_term/p95": 9.492039680480957e-06, "token_full_update_term/p99": 0.03369140625, "token_full_update_term/var": 5.397108543547802e-05, "token_hessian_coeff": 44190.91015625, "token_hessian_coeff/max": 64749568.0, "token_hessian_coeff/median": -3.9835867937654257e-10, "token_hessian_coeff/min": -9175040.0, "token_hessian_coeff/p25": -0.0004177093505859375, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 1.146484375, "token_hessian_coeff/var": 4696889622528.0, "token_hessian_coeff_abs": 212079.921875, "token_hessian_coeff_abs/max": 64749568.0, "token_hessian_coeff_abs/median": 1.4621764421463013e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.001617431640625, "token_hessian_coeff_abs/p99": 5898240.0, "token_hessian_coeff_abs/var": 4653863927808.0 }, { "accuracy_reward": 0.8333333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.14035087823867798, "adam_stats/lm_head/lr_effective_max": 7.43837226764299e-05, "adam_stats/lm_head/lr_effective_mean": 4.028119851562373e-11, "adam_stats/lm_head/lr_effective_min": -7.642959099030122e-05, "adam_stats/lm_head/lr_effective_std": 1.7704917354421923e-06, "adam_stats/lr_effective_max": 9.131072874879465e-05, "adam_stats/lr_effective_mean": -1.6304493866137193e-10, "adam_stats/lr_effective_min": -8.492915367241949e-05, "adam_stats/m_t_max": 0.005720134824514389, "adam_stats/m_t_mean": -2.7246918263279518e-11, "adam_stats/m_t_min": -0.0038011365104466677, "adam_stats/v_t_max": 2.6301073376089334e-05, "adam_stats/v_t_mean": 1.7357959687658298e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8333333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.14035087823867798, "all_logprobs": -0.015011834912002087, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.875, "all_logprobs/p1": -0.38671875, "all_logprobs/p10": -2.562999725341797e-05, "all_logprobs/p25": -1.1920928955078125e-07, "all_logprobs/p5": -0.00193023681640625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.02364962175488472, "clip_ratio": 0.0, "completion_length": 515.1979370117188, "completion_length/correct": 465.2250061035156, "completion_length/correct/max": 884.0, "completion_length/correct/median": 411.0, "completion_length/correct/min": 122.0, "completion_length/correct/p25": 318.0, "completion_length/correct/p75": 648.75, "completion_length/correct/var": 45406.40625, "completion_length/incorrect": 765.0625, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 819.0, "completion_length/incorrect/min": 361.0, "completion_length/incorrect/p25": 446.75, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 87216.328125, "completion_length/max": 1024.0, "completion_length/median": 413.0, "completion_length/min": 122.0, "completion_length/p25": 326.0, "completion_length/p75": 724.0, "completion_length/var": 64147.9140625, "curvature_clip_ratio_token_fisher": 0.018156452104449272, "curvature_clip_ratio_token_hessian": 0.013526354916393757, "curvature_clip_ratio_total_fisher": 0.018156452104449272, "curvature_clip_ratio_total_full": 0.018156452104449272, "curvature_clip_ratio_total_hessian": 0.013526354916393757, "epoch": 0.032, "feature_vector_variance/max_squared_error": 62658.80078125, "feature_vector_variance/metric": 26603.720703125, "generated_tokens/total": 1280812.0, "global_fisher_curvature": 94208.0, "global_fisher_curvature/max": 94208.0, "global_fisher_curvature/median": 94208.0, "global_fisher_curvature/min": 94208.0, "global_fisher_curvature/p25": 94208.0, "global_fisher_curvature/p75": 94208.0, "global_fisher_curvature/p85": 94208.0, "global_fisher_curvature/p90": 94208.0, "global_fisher_curvature/p95": 94208.0, "global_fisher_curvature/p99": 94208.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.0073184967041016e-05, "global_fisher_kl_divergence/max": 1.0073184967041016e-05, "global_fisher_kl_divergence/median": 1.0073184967041016e-05, "global_fisher_kl_divergence/min": 1.0073184967041016e-05, "global_fisher_kl_divergence/p25": 1.0073184967041016e-05, "global_fisher_kl_divergence/p75": 1.0073184967041016e-05, "global_fisher_kl_divergence/p85": 1.0073184967041016e-05, "global_fisher_kl_divergence/p90": 1.0073184967041016e-05, "global_fisher_kl_divergence/p95": 1.0073184967041016e-05, "global_fisher_kl_divergence/p99": 1.0073184967041016e-05, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.11328125, "global_full_update_term/max": 0.11328125, "global_full_update_term/median": 0.11328125, "global_full_update_term/min": 0.11328125, "global_full_update_term/p25": 0.11328125, "global_full_update_term/p75": 0.11328125, "global_full_update_term/p85": 0.11328125, "global_full_update_term/p90": 0.11328125, "global_full_update_term/p95": 0.11328125, "global_full_update_term/p99": 0.11328125, "global_full_update_term/var": NaN, "global_hessian_coeff": 16384.0, "global_hessian_coeff/max": 16384.0, "global_hessian_coeff/median": 16384.0, "global_hessian_coeff/min": 16384.0, "global_hessian_coeff/p25": 16384.0, "global_hessian_coeff/p75": 16384.0, "global_hessian_coeff/p99": 16384.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 16384.0, "global_hessian_coeff_abs/max": 16384.0, "global_hessian_coeff_abs/median": 16384.0, "global_hessian_coeff_abs/min": 16384.0, "global_hessian_coeff_abs/p25": 16384.0, "global_hessian_coeff_abs/p75": 16384.0, "global_hessian_coeff_abs/p99": 16384.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.06757553666830063, "learning_rate": 1.4547694655894313e-05, "loss": -0.8333, "masked_global_fisher_curvature": 103.5, "masked_global_fisher_curvature/max": 103.5, "masked_global_fisher_curvature/median": 103.5, "masked_global_fisher_curvature/min": 103.5, "masked_global_fisher_curvature/p25": 103.5, "masked_global_fisher_curvature/p75": 103.5, "masked_global_fisher_curvature/p85": 103.5, "masked_global_fisher_curvature/p90": 103.5, "masked_global_fisher_curvature/p95": 103.5, "masked_global_fisher_curvature/p99": 103.5, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/max": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/median": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/min": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/p25": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/p75": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/p85": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/p90": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/p95": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/p99": 1.1059455573558807e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0033111572265625, "masked_global_full_update_term/max": 0.0033111572265625, "masked_global_full_update_term/median": 0.0033111572265625, "masked_global_full_update_term/min": 0.0033111572265625, "masked_global_full_update_term/p25": 0.0033111572265625, "masked_global_full_update_term/p75": 0.0033111572265625, "masked_global_full_update_term/p85": 0.0033111572265625, "masked_global_full_update_term/p90": 0.0033111572265625, "masked_global_full_update_term/p95": 0.0033111572265625, "masked_global_full_update_term/p99": 0.0033111572265625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1952.0, "masked_global_hessian_coeff/max": -1952.0, "masked_global_hessian_coeff/median": -1952.0, "masked_global_hessian_coeff/min": -1952.0, "masked_global_hessian_coeff/p25": -1952.0, "masked_global_hessian_coeff/p75": -1952.0, "masked_global_hessian_coeff/p99": -1952.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1952.0, "masked_global_hessian_coeff_abs/max": 1952.0, "masked_global_hessian_coeff_abs/median": 1952.0, "masked_global_hessian_coeff_abs/min": 1952.0, "masked_global_hessian_coeff_abs/p25": 1952.0, "masked_global_hessian_coeff_abs/p75": 1952.0, "masked_global_hessian_coeff_abs/p99": 1952.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.1760661602020264, "masked_per_sentence_gradient_norm/max": 7.15625, "masked_per_sentence_gradient_norm/median": 2.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.91015625, "masked_per_sentence_gradient_norm/p75": 2.796875, "masked_per_sentence_gradient_norm/var": 3.1284844875335693, "masked_per_token_gradient_norm": 0.05250721797347069, "masked_per_token_gradient_norm/max": 11.0, "masked_per_token_gradient_norm/median": 1.8917489796876907e-09, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.9653310775756836e-06, "masked_per_token_gradient_norm/var": 0.24379239976406097, "masked_sentence_fisher_curvature": 227.173828125, "masked_sentence_fisher_curvature/max": 632.0, "masked_sentence_fisher_curvature/median": 198.0, "masked_sentence_fisher_curvature/min": 12.6875, "masked_sentence_fisher_curvature/p25": 109.375, "masked_sentence_fisher_curvature/p75": 306.0, "masked_sentence_fisher_curvature/p85": 394.0, "masked_sentence_fisher_curvature/p90": 432.0, "masked_sentence_fisher_curvature/p95": 511.5, "masked_sentence_fisher_curvature/p99": 609.2000732421875, "masked_sentence_fisher_curvature/var": 21575.505859375, "masked_sentence_fisher_kl_divergence": 2.432708967603503e-08, "masked_sentence_fisher_kl_divergence/max": 6.752088665962219e-08, "masked_sentence_fisher_kl_divergence/median": 2.1187588572502136e-08, "masked_sentence_fisher_kl_divergence/min": 1.3606040738523006e-09, "masked_sentence_fisher_kl_divergence/p25": 1.1685187928378582e-08, "masked_sentence_fisher_kl_divergence/p75": 3.277091309428215e-08, "masked_sentence_fisher_kl_divergence/p85": 4.220055416226387e-08, "masked_sentence_fisher_kl_divergence/p90": 4.621688276529312e-08, "masked_sentence_fisher_kl_divergence/p95": 5.48316165804863e-08, "masked_sentence_fisher_kl_divergence/p99": 6.530900265033779e-08, "masked_sentence_fisher_kl_divergence/var": 2.4776650737386185e-16, "masked_sentence_full_gradient_variance/max_squared_error": 7.575300216674805, "masked_sentence_full_gradient_variance/metric": 7.575300216674805, "masked_sentence_full_gradient_variance/p75": 7.575300216674805, "masked_sentence_full_gradient_variance/p90": 7.575300216674805, "masked_sentence_full_gradient_variance/p95": 7.575300216674805, "masked_sentence_full_gradient_variance/p99": 7.575300216674805, "masked_sentence_full_update_term": 0.0015481114387512207, "masked_sentence_full_update_term/max": 0.00634765625, "masked_sentence_full_update_term/median": 0.00135040283203125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0006818771362304688, "masked_sentence_full_update_term/p75": 0.002216339111328125, "masked_sentence_full_update_term/p85": 0.002777099609375, "masked_sentence_full_update_term/p90": 0.0034027099609375, "masked_sentence_full_update_term/p95": 0.003902435302734375, "masked_sentence_full_update_term/p99": 0.00452118506655097, "masked_sentence_full_update_term/var": 1.5645584880985552e-06, "masked_sentence_hessian_coeff": -8587.083984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -8096.0, "masked_sentence_hessian_coeff/min": -26624.0, "masked_sentence_hessian_coeff/p25": -12944.0, "masked_sentence_hessian_coeff/p75": -4832.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 37211060.0, "masked_sentence_hessian_coeff_abs": 8587.083984375, "masked_sentence_hessian_coeff_abs/max": 26624.0, "masked_sentence_hessian_coeff_abs/median": 8064.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 4832.0, "masked_sentence_hessian_coeff_abs/p75": 12944.0, "masked_sentence_hessian_coeff_abs/p99": 21273.6171875, "masked_sentence_hessian_coeff_abs/var": 37211060.0, "masked_token_fisher_curvature": 294.6146545410156, "masked_token_fisher_curvature/max": 92160.0, "masked_token_fisher_curvature/median": 6.494804694057166e-15, "masked_token_fisher_curvature/min": 3.489748854003666e-39, "masked_token_fisher_curvature/p25": 3.176373552203626e-20, "masked_token_fisher_curvature/p75": 8.185452315956354e-10, "masked_token_fisher_curvature/p85": 3.3155083656311035e-07, "masked_token_fisher_curvature/p90": 3.2901763916015625e-05, "masked_token_fisher_curvature/p95": 0.0615234375, "masked_token_fisher_curvature/p99": 4768.0, "masked_token_fisher_curvature/var": 12452562.0, "masked_token_fisher_kl_divergence": 3.1537471301135156e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 6.947024675738318e-25, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.4019626537656134e-30, "masked_token_fisher_kl_divergence/p75": 8.766791004082009e-20, "masked_token_fisher_kl_divergence/p85": 3.5561831257524545e-17, "masked_token_fisher_kl_divergence/p90": 3.524958103184872e-15, "masked_token_fisher_kl_divergence/p95": 6.59383658785373e-12, "masked_token_fisher_kl_divergence/p99": 5.103647708892822e-07, "masked_token_fisher_kl_divergence/var": 1.4269550710314938e-13, "masked_token_full_update_term": 2.1923575332039036e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 2.6020852139652106e-18, "masked_token_full_update_term/min": -1.1473894119262695e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.9117155615240335e-12, "masked_token_full_update_term/p85": 5.384208634495735e-10, "masked_token_full_update_term/p90": 8.614733815193176e-09, "masked_token_full_update_term/p95": 4.880130290985107e-07, "masked_token_full_update_term/p99": 0.00069427490234375, "masked_token_full_update_term/var": 4.482770421532223e-08, "masked_token_hessian_coeff": -11452.4130859375, "masked_token_hessian_coeff/max": 612.0, "masked_token_hessian_coeff/median": -1.3271346688270569e-08, "masked_token_hessian_coeff/min": -2424832.0, "masked_token_hessian_coeff/p25": -0.000911712646484375, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.115234375, "masked_token_hessian_coeff/var": 12767579136.0, "masked_token_hessian_coeff_abs": 11452.6357421875, "masked_token_hessian_coeff_abs/max": 2424832.0, "masked_token_hessian_coeff_abs/median": 7.227063179016113e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.0029144287109375, "masked_token_hessian_coeff_abs/p99": 344064.0, "masked_token_hessian_coeff_abs/var": 12767572992.0, "mean_logprobs": -0.015869140625, "mean_logprobs/var": 0.000125885009765625, "num_completions/total": 1920, "per_sentence_gradient_norm": 60.046875, "per_sentence_gradient_norm/max": 173.0, "per_sentence_gradient_norm/median": 48.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 25.28125, "per_sentence_gradient_norm/p75": 89.375, "per_sentence_gradient_norm/var": 2275.778076171875, "per_token_feature_norm": 179.3040008544922, "per_token_feature_norm/max": 246.0, "per_token_feature_norm/median": 181.0, "per_token_feature_norm/min": 80.5, "per_token_feature_norm/p25": 172.0, "per_token_feature_norm/p75": 188.0, "per_token_feature_norm/var": 216.60960388183594, "per_token_gradient_norm": 1.4430383443832397, "per_token_gradient_norm/max": 300.0, "per_token_gradient_norm/median": 2.5756889954209328e-09, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 4.163336342344337e-17, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 4.202127456665039e-06, "per_token_gradient_norm/var": 180.83792114257812, "per_token_policy_error_norm": 0.008730282075703144, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.008430114947259426, "policy_entropy": 0.01545915100723505, "policy_entropy/max": 2.4375, "policy_entropy/median": 1.909211277961731e-08, "policy_entropy/min": 5.759824041329242e-19, "policy_entropy/p25": 5.729816621169448e-11, "policy_entropy/p75": 3.159046173095703e-06, "policy_entropy/var": 0.009824058972299099, "policy_loss": -0.8333333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.14035087823867798, "policy_sharpness": 9.587738990783691, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.60105037689209, "reward": 0.8333333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.14035087823867798, "rewards/accuracy_reward": 0.8333333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.14035087823867798, "sentence_fisher_curvature": 212842.484375, "sentence_fisher_curvature/max": 692224.0, "sentence_fisher_curvature/median": 201728.0, "sentence_fisher_curvature/min": 223.0, "sentence_fisher_curvature/p25": 105088.0, "sentence_fisher_curvature/p75": 293888.0, "sentence_fisher_curvature/p85": 355840.0, "sentence_fisher_curvature/p90": 439296.0, "sentence_fisher_curvature/p95": 550912.0, "sentence_fisher_curvature/p99": 633856.1875, "sentence_fisher_curvature/var": 27199705088.0, "sentence_fisher_kl_divergence": 2.2794945834903046e-05, "sentence_fisher_kl_divergence/max": 7.390975952148438e-05, "sentence_fisher_kl_divergence/median": 2.1576881408691406e-05, "sentence_fisher_kl_divergence/min": 2.3865140974521637e-08, "sentence_fisher_kl_divergence/p25": 1.1235475540161133e-05, "sentence_fisher_kl_divergence/p75": 3.153085708618164e-05, "sentence_fisher_kl_divergence/p85": 3.808736801147461e-05, "sentence_fisher_kl_divergence/p90": 4.7087669372558594e-05, "sentence_fisher_kl_divergence/p95": 5.888938903808594e-05, "sentence_fisher_kl_divergence/p99": 6.802083953516558e-05, "sentence_fisher_kl_divergence/var": 3.1199678951487897e-10, "sentence_full_gradient_variance/max_squared_error": 5689.69921875, "sentence_full_gradient_variance/metric": 5689.69921875, "sentence_full_gradient_variance/p75": 5689.69921875, "sentence_full_gradient_variance/p90": 5689.69921875, "sentence_full_gradient_variance/p95": 5689.69921875, "sentence_full_gradient_variance/p99": 5689.69921875, "sentence_full_update_term": 0.04798571392893791, "sentence_full_update_term/max": 0.1591796875, "sentence_full_update_term/median": 0.040283203125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.020111083984375, "sentence_full_update_term/p75": 0.0662841796875, "sentence_full_update_term/p85": 0.088134765625, "sentence_full_update_term/p90": 0.10009765625, "sentence_full_update_term/p95": 0.1173095703125, "sentence_full_update_term/p99": 0.12856455147266388, "sentence_full_update_term/var": 0.0013361485907807946, "sentence_hessian_coeff": 22012.5, "sentence_hessian_coeff/max": 286720.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -216064.0, "sentence_hessian_coeff/p25": -20608.0, "sentence_hessian_coeff/p75": 71424.0, "sentence_hessian_coeff/p99": 282828.8125, "sentence_hessian_coeff/var": 7232426496.0, "sentence_hessian_coeff_abs": 60792.16796875, "sentence_hessian_coeff_abs/max": 286720.0, "sentence_hessian_coeff_abs/median": 44800.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 12912.0, "sentence_hessian_coeff_abs/p75": 87040.0, "sentence_hessian_coeff_abs/p99": 282828.8125, "sentence_hessian_coeff_abs/var": 3987487232.0, "step": 20, "token_fisher_curvature": 182356.4375, "token_fisher_curvature/max": 68157440.0, "token_fisher_curvature/median": 9.381384558082573e-15, "token_fisher_curvature/min": 3.489748854003666e-39, "token_fisher_curvature/p25": 4.1081097941833566e-20, "token_fisher_curvature/p75": 1.724401954561472e-09, "token_fisher_curvature/p85": 1.1399388313293457e-06, "token_fisher_curvature/p90": 0.00022029876708984375, "token_fisher_curvature/p95": 5.568603515625, "token_fisher_curvature/p99": 2323968.0, "token_fisher_curvature/var": 5355749244928.0, "token_fisher_kl_divergence": 1.9519948182278313e-05, "token_fisher_kl_divergence/max": 0.007293701171875, "token_fisher_kl_divergence/median": 1.001664023013432e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.388038785291878e-30, "token_fisher_kl_divergence/p75": 1.8465318250143747e-19, "token_fisher_kl_divergence/p85": 1.222980050563649e-16, "token_fisher_kl_divergence/p90": 2.353672812205332e-14, "token_fisher_kl_divergence/p95": 5.97339067098801e-10, "token_fisher_kl_divergence/p99": 0.00024956464767456055, "token_fisher_kl_divergence/var": 6.13654620451598e-08, "token_full_update_term": 0.0006672561867162585, "token_full_update_term/max": 0.1279296875, "token_full_update_term/median": 6.776263578034403e-18, "token_full_update_term/min": -1.1473894119262695e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.1311840353300795e-11, "token_full_update_term/p85": 1.2005330063402653e-09, "token_full_update_term/p90": 2.7031546778744087e-08, "token_full_update_term/p95": 5.128909833729267e-06, "token_full_update_term/p99": 0.021484375, "token_full_update_term/var": 3.9966787880985066e-05, "token_hessian_coeff": 5496.46142578125, "token_hessian_coeff/max": 66584576.0, "token_hessian_coeff/median": -1.7113052308559418e-08, "token_hessian_coeff/min": -9306112.0, "token_hessian_coeff/p25": -0.00135040283203125, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.43463134765625, "token_hessian_coeff/var": 3270778290176.0, "token_hessian_coeff_abs": 168160.453125, "token_hessian_coeff_abs/max": 66584576.0, "token_hessian_coeff_abs/median": 9.499490261077881e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 6.730727086790012e-16, "token_hessian_coeff_abs/p75": 0.0046234130859375, "token_hessian_coeff_abs/p99": 5505024.0, "token_hessian_coeff_abs/var": 3242529914880.0 }, { "accuracy_reward": 0.6041666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24166668951511383, "adam_stats/lm_head/lr_effective_max": 7.091811130521819e-05, "adam_stats/lm_head/lr_effective_mean": 6.422175985454359e-11, "adam_stats/lm_head/lr_effective_min": -7.131262100301683e-05, "adam_stats/lm_head/lr_effective_std": 1.6610345028311713e-06, "adam_stats/lr_effective_max": 8.74923644005321e-05, "adam_stats/lr_effective_mean": -1.6405311831224623e-10, "adam_stats/lr_effective_min": -8.4472652815748e-05, "adam_stats/m_t_max": 0.005119892302900553, "adam_stats/m_t_mean": -2.3585359359978852e-11, "adam_stats/m_t_min": -0.003430082695558667, "adam_stats/v_t_max": 2.627485264383722e-05, "adam_stats/v_t_mean": 1.7350351841013967e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6041666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24166668951511383, "all_logprobs": -0.01033184677362442, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.90625, "all_logprobs/p1": -0.2021484375, "all_logprobs/p10": -1.6689300537109375e-05, "all_logprobs/p25": -1.1920928955078125e-07, "all_logprobs/p5": -0.000629425048828125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.016924738883972168, "clip_ratio": 0.0, "completion_length": 568.0104370117188, "completion_length/correct": 502.862060546875, "completion_length/correct/max": 724.0, "completion_length/correct/median": 457.0, "completion_length/correct/min": 262.0, "completion_length/correct/p25": 354.0, "completion_length/correct/p75": 702.0, "completion_length/correct/var": 24135.8046875, "completion_length/incorrect": 667.4473876953125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 638.0, "completion_length/incorrect/min": 235.0, "completion_length/incorrect/p25": 583.75, "completion_length/incorrect/p75": 737.0, "completion_length/incorrect/var": 51877.22265625, "completion_length/max": 1024.0, "completion_length/median": 571.0, "completion_length/min": 235.0, "completion_length/p25": 378.25, "completion_length/p75": 703.5, "completion_length/var": 41232.6484375, "curvature_clip_ratio_token_fisher": 0.010874947533011436, "curvature_clip_ratio_token_hessian": 0.006070164497941732, "curvature_clip_ratio_total_fisher": 0.010874947533011436, "curvature_clip_ratio_total_full": 0.010874947533011436, "curvature_clip_ratio_total_hessian": 0.006070164497941732, "epoch": 0.0336, "feature_vector_variance/max_squared_error": 61159.8515625, "feature_vector_variance/metric": 26819.140625, "generated_tokens/total": 1335341.0, "global_fisher_curvature": 52736.0, "global_fisher_curvature/max": 52736.0, "global_fisher_curvature/median": 52736.0, "global_fisher_curvature/min": 52736.0, "global_fisher_curvature/p25": 52736.0, "global_fisher_curvature/p75": 52736.0, "global_fisher_curvature/p85": 52736.0, "global_fisher_curvature/p90": 52736.0, "global_fisher_curvature/p95": 52736.0, "global_fisher_curvature/p99": 52736.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.5730342864990234e-06, "global_fisher_kl_divergence/max": 5.5730342864990234e-06, "global_fisher_kl_divergence/median": 5.5730342864990234e-06, "global_fisher_kl_divergence/min": 5.5730342864990234e-06, "global_fisher_kl_divergence/p25": 5.5730342864990234e-06, "global_fisher_kl_divergence/p75": 5.5730342864990234e-06, "global_fisher_kl_divergence/p85": 5.5730342864990234e-06, "global_fisher_kl_divergence/p90": 5.5730342864990234e-06, "global_fisher_kl_divergence/p95": 5.5730342864990234e-06, "global_fisher_kl_divergence/p99": 5.5730342864990234e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.068359375, "global_full_update_term/max": 0.068359375, "global_full_update_term/median": 0.068359375, "global_full_update_term/min": 0.068359375, "global_full_update_term/p25": 0.068359375, "global_full_update_term/p75": 0.068359375, "global_full_update_term/p85": 0.068359375, "global_full_update_term/p90": 0.068359375, "global_full_update_term/p95": 0.068359375, "global_full_update_term/p99": 0.068359375, "global_full_update_term/var": NaN, "global_hessian_coeff": 5504.0, "global_hessian_coeff/max": 5504.0, "global_hessian_coeff/median": 5504.0, "global_hessian_coeff/min": 5504.0, "global_hessian_coeff/p25": 5504.0, "global_hessian_coeff/p75": 5504.0, "global_hessian_coeff/p99": 5504.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 5504.0, "global_hessian_coeff_abs/max": 5504.0, "global_hessian_coeff_abs/median": 5504.0, "global_hessian_coeff_abs/min": 5504.0, "global_hessian_coeff_abs/p25": 5504.0, "global_hessian_coeff_abs/p75": 5504.0, "global_hessian_coeff_abs/p99": 5504.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04944595322012901, "learning_rate": 1.4453878909250906e-05, "loss": -0.6042, "masked_global_fisher_curvature": 160.0, "masked_global_fisher_curvature/max": 160.0, "masked_global_fisher_curvature/median": 160.0, "masked_global_fisher_curvature/min": 160.0, "masked_global_fisher_curvature/p25": 160.0, "masked_global_fisher_curvature/p75": 160.0, "masked_global_fisher_curvature/p85": 160.0, "masked_global_fisher_curvature/p90": 160.0, "masked_global_fisher_curvature/p95": 160.0, "masked_global_fisher_curvature/p99": 160.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/max": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/median": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/min": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/p25": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/p75": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/p85": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/p90": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/p95": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/p99": 1.6880221664905548e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.002166748046875, "masked_global_full_update_term/max": 0.002166748046875, "masked_global_full_update_term/median": 0.002166748046875, "masked_global_full_update_term/min": 0.002166748046875, "masked_global_full_update_term/p25": 0.002166748046875, "masked_global_full_update_term/p75": 0.002166748046875, "masked_global_full_update_term/p85": 0.002166748046875, "masked_global_full_update_term/p90": 0.002166748046875, "masked_global_full_update_term/p95": 0.002166748046875, "masked_global_full_update_term/p99": 0.002166748046875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2176.0, "masked_global_hessian_coeff/max": -2176.0, "masked_global_hessian_coeff/median": -2176.0, "masked_global_hessian_coeff/min": -2176.0, "masked_global_hessian_coeff/p25": -2176.0, "masked_global_hessian_coeff/p75": -2176.0, "masked_global_hessian_coeff/p99": -2176.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2176.0, "masked_global_hessian_coeff_abs/max": 2176.0, "masked_global_hessian_coeff_abs/median": 2176.0, "masked_global_hessian_coeff_abs/min": 2176.0, "masked_global_hessian_coeff_abs/p25": 2176.0, "masked_global_hessian_coeff_abs/p75": 2176.0, "masked_global_hessian_coeff_abs/p99": 2176.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.570068359375, "masked_per_sentence_gradient_norm/max": 9.8125, "masked_per_sentence_gradient_norm/median": 0.984375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.59375, "masked_per_sentence_gradient_norm/var": 3.8252978324890137, "masked_per_token_gradient_norm": 0.031641002744436264, "masked_per_token_gradient_norm/max": 9.5, "masked_per_token_gradient_norm/median": 1.3784529073745944e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.9837170839309692e-07, "masked_per_token_gradient_norm/var": 0.14774829149246216, "masked_sentence_fisher_curvature": 223.0234375, "masked_sentence_fisher_curvature/max": 756.0, "masked_sentence_fisher_curvature/median": 221.0, "masked_sentence_fisher_curvature/min": 22.25, "masked_sentence_fisher_curvature/p25": 125.75, "masked_sentence_fisher_curvature/p75": 273.0, "masked_sentence_fisher_curvature/p85": 322.0, "masked_sentence_fisher_curvature/p90": 345.0, "masked_sentence_fisher_curvature/p95": 471.0, "masked_sentence_fisher_curvature/p99": 634.400390625, "masked_sentence_fisher_curvature/var": 18424.033203125, "masked_sentence_fisher_kl_divergence": 2.360002326895483e-08, "masked_sentence_fisher_kl_divergence/max": 8.009374141693115e-08, "masked_sentence_fisher_kl_divergence/median": 2.3399479687213898e-08, "masked_sentence_fisher_kl_divergence/min": 2.35741026699543e-09, "masked_sentence_fisher_kl_divergence/p25": 1.3300450518727303e-08, "masked_sentence_fisher_kl_divergence/p75": 2.8870999813079834e-08, "masked_sentence_fisher_kl_divergence/p85": 3.3993273973464966e-08, "masked_sentence_fisher_kl_divergence/p90": 3.64379957318306e-08, "masked_sentence_fisher_kl_divergence/p95": 4.988396540284157e-08, "masked_sentence_fisher_kl_divergence/p99": 6.726481416308161e-08, "masked_sentence_fisher_kl_divergence/var": 2.0649484025378587e-16, "masked_sentence_full_gradient_variance/max_squared_error": 5.979081153869629, "masked_sentence_full_gradient_variance/metric": 5.979081153869629, "masked_sentence_full_gradient_variance/p75": 5.979081153869629, "masked_sentence_full_gradient_variance/p90": 5.979081153869629, "masked_sentence_full_gradient_variance/p95": 5.979081153869629, "masked_sentence_full_gradient_variance/p99": 5.979081153869629, "masked_sentence_full_update_term": 0.0010913610458374023, "masked_sentence_full_update_term/max": 0.00653076171875, "masked_sentence_full_update_term/median": 0.00074005126953125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00160980224609375, "masked_sentence_full_update_term/p85": 0.00220489501953125, "masked_sentence_full_update_term/p90": 0.0030059814453125, "masked_sentence_full_update_term/p95": 0.00339508056640625, "masked_sentence_full_update_term/p99": 0.0046173157170414925, "masked_sentence_full_update_term/var": 1.6725904288250604e-06, "masked_sentence_hessian_coeff": -5914.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -4032.0, "masked_sentence_hessian_coeff/min": -30592.0, "masked_sentence_hessian_coeff/p25": -9552.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 42755072.0, "masked_sentence_hessian_coeff_abs": 5914.0, "masked_sentence_hessian_coeff_abs/max": 30592.0, "masked_sentence_hessian_coeff_abs/median": 3888.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 9552.0, "masked_sentence_hessian_coeff_abs/p99": 25120.017578125, "masked_sentence_hessian_coeff_abs/var": 42755072.0, "masked_token_fisher_curvature": 256.0728454589844, "masked_token_fisher_curvature/max": 93696.0, "masked_token_fisher_curvature/median": 7.022160630754115e-15, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.2684318385133148e-19, "masked_token_fisher_curvature/p75": 4.001776687800884e-10, "masked_token_fisher_curvature/p85": 1.685693860054016e-07, "masked_token_fisher_curvature/p90": 1.800060272216797e-05, "masked_token_fisher_curvature/p95": 0.01849365234375, "masked_token_fisher_curvature/p99": 3088.0, "masked_token_fisher_curvature/var": 10892186.0, "masked_token_fisher_kl_divergence": 2.710022606322582e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 7.431700815906108e-25, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.3385983485469044e-29, "masked_token_fisher_kl_divergence/p75": 4.2351647362715017e-20, "masked_token_fisher_kl_divergence/p85": 1.7889335846010823e-17, "masked_token_fisher_kl_divergence/p90": 1.9012569296705806e-15, "masked_token_fisher_kl_divergence/p95": 1.9539925233402755e-12, "masked_token_fisher_kl_divergence/p99": 3.259629011154175e-07, "masked_token_fisher_kl_divergence/var": 1.2197687792540185e-13, "masked_token_full_update_term": 1.3388145816861652e-05, "masked_token_full_update_term/max": 0.00408935546875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.1175870895385742e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.2523315717771766e-13, "masked_token_full_update_term/p85": 3.865352482534945e-11, "masked_token_full_update_term/p90": 1.2005330063402653e-09, "masked_token_full_update_term/p95": 1.0849907994270325e-07, "masked_token_full_update_term/p99": 0.00022220611572265625, "masked_token_full_update_term/var": 2.7886208897598408e-08, "masked_token_hessian_coeff": -7207.794921875, "masked_token_hessian_coeff/max": 40.25, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2195456.0, "masked_token_hessian_coeff/p25": -2.4080276489257812e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0145263671875, "masked_token_hessian_coeff/var": 8390607360.0, "masked_token_hessian_coeff_abs": 7207.84130859375, "masked_token_hessian_coeff_abs/max": 2195456.0, "masked_token_hessian_coeff_abs/median": 1.418811734765768e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.00010442733764648438, "masked_token_hessian_coeff_abs/p99": 72704.0, "masked_token_hessian_coeff_abs/var": 8390607360.0, "mean_logprobs": -0.0107421875, "mean_logprobs/var": 5.459785461425781e-05, "num_completions/total": 2016, "per_sentence_gradient_norm": 34.111328125, "per_sentence_gradient_norm/max": 207.0, "per_sentence_gradient_norm/median": 13.875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 40.5, "per_sentence_gradient_norm/var": 2556.438720703125, "per_token_feature_norm": 180.04420471191406, "per_token_feature_norm/max": 251.0, "per_token_feature_norm/median": 182.0, "per_token_feature_norm/min": 90.5, "per_token_feature_norm/p25": 173.0, "per_token_feature_norm/p75": 188.0, "per_token_feature_norm/var": 185.9763641357422, "per_token_gradient_norm": 0.6600998640060425, "per_token_gradient_norm/max": 270.0, "per_token_gradient_norm/median": 2.0889956431346945e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.4959444999694824e-07, "per_token_gradient_norm/var": 80.08796691894531, "per_token_policy_error_norm": 0.00577889196574688, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005413678474724293, "policy_entropy": 0.011584608815610409, "policy_entropy/max": 2.734375, "policy_entropy/median": 1.6996636986732483e-08, "policy_entropy/min": 1.212951180468158e-18, "policy_entropy/p25": 1.1232259566895664e-10, "policy_entropy/p75": 2.1904706954956055e-06, "policy_entropy/var": 0.006568538025021553, "policy_loss": -0.6041666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24166668951511383, "policy_sharpness": 9.649760246276855, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.184011459350586, "reward": 0.6041666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24166668951511383, "rewards/accuracy_reward": 0.6041666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24166668951511383, "sentence_fisher_curvature": 107456.453125, "sentence_fisher_curvature/max": 864256.0, "sentence_fisher_curvature/median": 44544.0, "sentence_fisher_curvature/min": 155.0, "sentence_fisher_curvature/p25": 1400.0, "sentence_fisher_curvature/p75": 171776.0, "sentence_fisher_curvature/p85": 240128.0, "sentence_fisher_curvature/p90": 279552.0, "sentence_fisher_curvature/p95": 366080.0, "sentence_fisher_curvature/p99": 560743.375, "sentence_fisher_curvature/var": 21772589056.0, "sentence_fisher_kl_divergence": 1.1370266292942688e-05, "sentence_fisher_kl_divergence/max": 9.1552734375e-05, "sentence_fisher_kl_divergence/median": 4.708766937255859e-06, "sentence_fisher_kl_divergence/min": 1.641456037759781e-08, "sentence_fisher_kl_divergence/p25": 1.4831312000751495e-07, "sentence_fisher_kl_divergence/p75": 1.817941665649414e-05, "sentence_fisher_kl_divergence/p85": 2.5391578674316406e-05, "sentence_fisher_kl_divergence/p90": 2.956390380859375e-05, "sentence_fisher_kl_divergence/p95": 3.8683414459228516e-05, "sentence_fisher_kl_divergence/p99": 5.939017137279734e-05, "sentence_fisher_kl_divergence/var": 2.439295154754717e-10, "sentence_full_gradient_variance/max_squared_error": 3627.89208984375, "sentence_full_gradient_variance/metric": 3627.89208984375, "sentence_full_gradient_variance/p75": 3627.89208984375, "sentence_full_gradient_variance/p90": 3627.89208984375, "sentence_full_gradient_variance/p95": 3627.89208984375, "sentence_full_gradient_variance/p99": 3627.89208984375, "sentence_full_update_term": 0.02613067626953125, "sentence_full_update_term/max": 0.189453125, "sentence_full_update_term/median": 0.013671875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.033203125, "sentence_full_update_term/p85": 0.05535888671875, "sentence_full_update_term/p90": 0.072998046875, "sentence_full_update_term/p95": 0.1082763671875, "sentence_full_update_term/p99": 0.14677748084068298, "sentence_full_update_term/var": 0.0014385099057108164, "sentence_hessian_coeff": 5855.33349609375, "sentence_hessian_coeff/max": 462848.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -149504.0, "sentence_hessian_coeff/p25": -32832.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 301363.71875, "sentence_hessian_coeff/var": 7605847552.0, "sentence_hessian_coeff_abs": 47158.66796875, "sentence_hessian_coeff_abs/max": 462848.0, "sentence_hessian_coeff_abs/median": 22272.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 56576.0, "sentence_hessian_coeff_abs/p99": 301363.71875, "sentence_hessian_coeff_abs/var": 5393143808.0, "step": 21, "token_fisher_curvature": 85985.015625, "token_fisher_curvature/max": 69206016.0, "token_fisher_curvature/median": 9.270362255620057e-15, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.4992483166401116e-19, "token_fisher_curvature/p75": 6.439222488552332e-10, "token_fisher_curvature/p85": 3.9674341678619385e-07, "token_fisher_curvature/p90": 6.771087646484375e-05, "token_fisher_curvature/p95": 0.25390625, "token_fisher_curvature/p99": 140288.0, "token_fisher_curvature/var": 2871346855936.0, "token_fisher_kl_divergence": 9.10136350285029e-06, "token_fisher_kl_divergence/max": 0.00732421875, "token_fisher_kl_divergence/median": 9.822769774067204e-25, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.5875825717572863e-29, "token_fisher_kl_divergence/p75": 6.818615225397118e-20, "token_fisher_kl_divergence/p85": 4.206704429243757e-17, "token_fisher_kl_divergence/p90": 7.16093850883226e-15, "token_fisher_kl_divergence/p95": 2.6830093702301383e-11, "token_fisher_kl_divergence/p99": 1.4841556549072266e-05, "token_fisher_kl_divergence/var": 3.217342126049516e-08, "token_full_update_term": 0.00031190726440399885, "token_full_update_term/max": 0.12890625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.1175870895385742e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.8918200339612667e-13, "token_full_update_term/p85": 6.139089236967266e-11, "token_full_update_term/p90": 2.08092387765646e-09, "token_full_update_term/p95": 2.905726432800293e-07, "token_full_update_term/p99": 0.0034551620483398438, "token_full_update_term/var": 1.87340410775505e-05, "token_hessian_coeff": -1768.8321533203125, "token_hessian_coeff/max": 69206016.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -9437184.0, "token_hessian_coeff/p25": -3.0517578125e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0342254638671875, "token_hessian_coeff/var": 1969134108672.0, "token_hessian_coeff_abs": 88780.84375, "token_hessian_coeff_abs/max": 69206016.0, "token_hessian_coeff_abs/median": 2.3646862246096134e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.00014209747314453125, "token_hessian_coeff_abs/p99": 1622016.0, "token_hessian_coeff_abs/var": 1961255239680.0 }, { "accuracy_reward": 0.7083333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20877191424369812, "adam_stats/lm_head/lr_effective_max": 6.938257138244808e-05, "adam_stats/lm_head/lr_effective_mean": 5.998389979167129e-11, "adam_stats/lm_head/lr_effective_min": -7.17413640813902e-05, "adam_stats/lm_head/lr_effective_std": 1.6295363138851826e-06, "adam_stats/lr_effective_max": 8.6132378783077e-05, "adam_stats/lr_effective_mean": -2.3540322469095543e-10, "adam_stats/lr_effective_min": -8.492494816891849e-05, "adam_stats/m_t_max": 0.004599176812916994, "adam_stats/m_t_mean": -2.5006129111826425e-11, "adam_stats/m_t_min": -0.003070671111345291, "adam_stats/v_t_max": 2.6248584617860615e-05, "adam_stats/v_t_mean": 1.7334983275218985e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7083333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20877191424369812, "all_logprobs": -0.016436105594038963, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.625, "all_logprobs/p1": -0.474609375, "all_logprobs/p10": -3.910064697265625e-05, "all_logprobs/p25": -1.1920928955078125e-07, "all_logprobs/p5": -0.0026992782950401306, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.024545768275856972, "clip_ratio": 0.0, "completion_length": 497.0, "completion_length/correct": 474.73529052734375, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 425.0, "completion_length/correct/min": 176.0, "completion_length/correct/p25": 245.25, "completion_length/correct/p75": 688.75, "completion_length/correct/var": 68041.0078125, "completion_length/incorrect": 551.0714721679688, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 497.0, "completion_length/incorrect/min": 286.0, "completion_length/incorrect/p25": 390.25, "completion_length/incorrect/p75": 618.0, "completion_length/incorrect/var": 48020.3671875, "completion_length/max": 1024.0, "completion_length/median": 451.0, "completion_length/min": 176.0, "completion_length/p25": 277.5, "completion_length/p75": 688.0, "completion_length/var": 62851.2578125, "curvature_clip_ratio_token_fisher": 0.01708165742456913, "curvature_clip_ratio_token_hessian": 0.01161133497953415, "curvature_clip_ratio_total_fisher": 0.01708165742456913, "curvature_clip_ratio_total_full": 0.01708165742456913, "curvature_clip_ratio_total_hessian": 0.01161133497953415, "epoch": 0.0352, "feature_vector_variance/max_squared_error": 124569.421875, "feature_vector_variance/metric": 27701.236328125, "generated_tokens/total": 1383053.0, "global_fisher_curvature": 71680.0, "global_fisher_curvature/max": 71680.0, "global_fisher_curvature/median": 71680.0, "global_fisher_curvature/min": 71680.0, "global_fisher_curvature/p25": 71680.0, "global_fisher_curvature/p75": 71680.0, "global_fisher_curvature/p85": 71680.0, "global_fisher_curvature/p90": 71680.0, "global_fisher_curvature/p95": 71680.0, "global_fisher_curvature/p99": 71680.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.4803829193115234e-06, "global_fisher_kl_divergence/max": 7.4803829193115234e-06, "global_fisher_kl_divergence/median": 7.4803829193115234e-06, "global_fisher_kl_divergence/min": 7.4803829193115234e-06, "global_fisher_kl_divergence/p25": 7.4803829193115234e-06, "global_fisher_kl_divergence/p75": 7.4803829193115234e-06, "global_fisher_kl_divergence/p85": 7.4803829193115234e-06, "global_fisher_kl_divergence/p90": 7.4803829193115234e-06, "global_fisher_kl_divergence/p95": 7.4803829193115234e-06, "global_fisher_kl_divergence/p99": 7.4803829193115234e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.099609375, "global_full_update_term/max": 0.099609375, "global_full_update_term/median": 0.099609375, "global_full_update_term/min": 0.099609375, "global_full_update_term/p25": 0.099609375, "global_full_update_term/p75": 0.099609375, "global_full_update_term/p85": 0.099609375, "global_full_update_term/p90": 0.099609375, "global_full_update_term/p95": 0.099609375, "global_full_update_term/p99": 0.099609375, "global_full_update_term/var": NaN, "global_hessian_coeff": 15360.0, "global_hessian_coeff/max": 15360.0, "global_hessian_coeff/median": 15360.0, "global_hessian_coeff/min": 15360.0, "global_hessian_coeff/p25": 15360.0, "global_hessian_coeff/p75": 15360.0, "global_hessian_coeff/p99": 15360.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 15360.0, "global_hessian_coeff_abs/max": 15360.0, "global_hessian_coeff_abs/median": 15360.0, "global_hessian_coeff_abs/min": 15360.0, "global_hessian_coeff_abs/p25": 15360.0, "global_hessian_coeff_abs/p75": 15360.0, "global_hessian_coeff_abs/p99": 15360.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.03642020747065544, "learning_rate": 1.4351590932319506e-05, "loss": -0.7083, "masked_global_fisher_curvature": 238.0, "masked_global_fisher_curvature/max": 238.0, "masked_global_fisher_curvature/median": 238.0, "masked_global_fisher_curvature/min": 238.0, "masked_global_fisher_curvature/p25": 238.0, "masked_global_fisher_curvature/p75": 238.0, "masked_global_fisher_curvature/p85": 238.0, "masked_global_fisher_curvature/p90": 238.0, "masked_global_fisher_curvature/p95": 238.0, "masked_global_fisher_curvature/p99": 238.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/max": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/median": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/min": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/p25": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/p75": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/p85": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/p90": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/p95": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/p99": 2.491287887096405e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00396728515625, "masked_global_full_update_term/max": 0.00396728515625, "masked_global_full_update_term/median": 0.00396728515625, "masked_global_full_update_term/min": 0.00396728515625, "masked_global_full_update_term/p25": 0.00396728515625, "masked_global_full_update_term/p75": 0.00396728515625, "masked_global_full_update_term/p85": 0.00396728515625, "masked_global_full_update_term/p90": 0.00396728515625, "masked_global_full_update_term/p95": 0.00396728515625, "masked_global_full_update_term/p99": 0.00396728515625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2512.0, "masked_global_hessian_coeff/max": -2512.0, "masked_global_hessian_coeff/median": -2512.0, "masked_global_hessian_coeff/min": -2512.0, "masked_global_hessian_coeff/p25": -2512.0, "masked_global_hessian_coeff/p75": -2512.0, "masked_global_hessian_coeff/p99": -2512.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2512.0, "masked_global_hessian_coeff_abs/max": 2512.0, "masked_global_hessian_coeff_abs/median": 2512.0, "masked_global_hessian_coeff_abs/min": 2512.0, "masked_global_hessian_coeff_abs/p25": 2512.0, "masked_global_hessian_coeff_abs/p75": 2512.0, "masked_global_hessian_coeff_abs/p99": 2512.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.9984996318817139, "masked_per_sentence_gradient_norm/max": 9.5, "masked_per_sentence_gradient_norm/median": 1.1484375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.984375, "masked_per_sentence_gradient_norm/var": 5.688296794891357, "masked_per_token_gradient_norm": 0.0469990000128746, "masked_per_token_gradient_norm/max": 12.625, "masked_per_token_gradient_norm/median": 2.892193151637912e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 6.221234798431396e-07, "masked_per_token_gradient_norm/var": 0.24076808989048004, "masked_sentence_fisher_curvature": 299.228759765625, "masked_sentence_fisher_curvature/max": 1192.0, "masked_sentence_fisher_curvature/median": 234.0, "masked_sentence_fisher_curvature/min": 1.1640625, "masked_sentence_fisher_curvature/p25": 142.0, "masked_sentence_fisher_curvature/p75": 415.0, "masked_sentence_fisher_curvature/p85": 520.5, "masked_sentence_fisher_curvature/p90": 618.0, "masked_sentence_fisher_curvature/p95": 873.0, "masked_sentence_fisher_curvature/p99": 1192.0, "masked_sentence_fisher_curvature/var": 64012.78515625, "masked_sentence_fisher_kl_divergence": 3.126879022374851e-08, "masked_sentence_fisher_kl_divergence/max": 1.2479722499847412e-07, "masked_sentence_fisher_kl_divergence/median": 2.444721758365631e-08, "masked_sentence_fisher_kl_divergence/min": 1.2187229003757238e-10, "masked_sentence_fisher_kl_divergence/p25": 1.4842953532934189e-08, "masked_sentence_fisher_kl_divergence/p75": 4.336470738053322e-08, "masked_sentence_fisher_kl_divergence/p85": 5.436595529317856e-08, "masked_sentence_fisher_kl_divergence/p90": 6.472691893577576e-08, "masked_sentence_fisher_kl_divergence/p95": 9.12696123123169e-08, "masked_sentence_fisher_kl_divergence/p99": 1.2479722499847412e-07, "masked_sentence_fisher_kl_divergence/var": 6.991904931515547e-16, "masked_sentence_full_gradient_variance/max_squared_error": 9.30468463897705, "masked_sentence_full_gradient_variance/metric": 9.30468463897705, "masked_sentence_full_gradient_variance/p75": 9.30468463897705, "masked_sentence_full_gradient_variance/p90": 9.30468463897705, "masked_sentence_full_gradient_variance/p95": 9.30468463897705, "masked_sentence_full_gradient_variance/p99": 9.30468463897705, "masked_sentence_full_update_term": 0.001474102376960218, "masked_sentence_full_update_term/max": 0.00933837890625, "masked_sentence_full_update_term/median": 0.000827789306640625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0018768310546875, "masked_sentence_full_update_term/p85": 0.002605438232421875, "masked_sentence_full_update_term/p90": 0.0038909912109375, "masked_sentence_full_update_term/p95": 0.00670623779296875, "masked_sentence_full_update_term/p99": 0.008294681087136269, "masked_sentence_full_update_term/var": 4.138634722039569e-06, "masked_sentence_hessian_coeff": -7803.33349609375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -6048.0, "masked_sentence_hessian_coeff/min": -45824.0, "masked_sentence_hessian_coeff/p25": -9568.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 94690616.0, "masked_sentence_hessian_coeff_abs": 7803.33349609375, "masked_sentence_hessian_coeff_abs/max": 45824.0, "masked_sentence_hessian_coeff_abs/median": 6016.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 9568.0, "masked_sentence_hessian_coeff_abs/p99": 45580.80078125, "masked_sentence_hessian_coeff_abs/var": 94690616.0, "masked_token_fisher_curvature": 357.46417236328125, "masked_token_fisher_curvature/max": 94720.0, "masked_token_fisher_curvature/median": 1.0880185641326534e-14, "masked_token_fisher_curvature/min": 9.183549615799121e-41, "masked_token_fisher_curvature/p25": 7.326834993749698e-20, "masked_token_fisher_curvature/p75": 5.857145879417658e-10, "masked_token_fisher_curvature/p85": 2.443848643451929e-07, "masked_token_fisher_curvature/p90": 3.218650817871094e-05, "masked_token_fisher_curvature/p95": 0.14687347412109375, "masked_token_fisher_curvature/p99": 6880.0, "masked_token_fisher_curvature/var": 17029892.0, "masked_token_fisher_kl_divergence": 3.733834930130797e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 1.137373342260413e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 7.642090019328552e-30, "masked_token_fisher_kl_divergence/p75": 6.098637220230962e-20, "masked_token_fisher_kl_divergence/p85": 2.5522796766666578e-17, "masked_token_fisher_kl_divergence/p90": 3.3584246494910985e-15, "masked_token_fisher_kl_divergence/p95": 1.5370371642120517e-11, "masked_token_fisher_kl_divergence/p99": 7.189810276031494e-07, "masked_token_fisher_kl_divergence/var": 1.8581501354947294e-13, "masked_token_full_update_term": 1.9524364688550122e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.428889274597168e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.2789769243681803e-12, "masked_token_full_update_term/p85": 1.0277290130034089e-10, "masked_token_full_update_term/p90": 2.0081643015146255e-09, "masked_token_full_update_term/p95": 1.8719583749771118e-07, "masked_token_full_update_term/p99": 0.000457763671875, "masked_token_full_update_term/var": 4.3472379473996625e-08, "masked_token_hessian_coeff": -10515.1982421875, "masked_token_hessian_coeff/max": 3728.0, "masked_token_hessian_coeff/median": -3.2741809263825417e-10, "masked_token_hessian_coeff/min": -2310144.0, "masked_token_hessian_coeff/p25": -0.00012874603271484375, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.02783203125, "masked_token_hessian_coeff/var": 13109299200.0, "masked_token_hessian_coeff_abs": 10515.4306640625, "masked_token_hessian_coeff_abs/max": 2310144.0, "masked_token_hessian_coeff_abs/median": 7.59027898311615e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.0004138946533203125, "masked_token_hessian_coeff_abs/p99": 268368.0, "masked_token_hessian_coeff_abs/var": 13109294080.0, "mean_logprobs": -0.0159912109375, "mean_logprobs/var": 0.00012874603271484375, "num_completions/total": 2112, "per_sentence_gradient_norm": 50.78580856323242, "per_sentence_gradient_norm/max": 232.0, "per_sentence_gradient_norm/median": 39.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 66.5, "per_sentence_gradient_norm/var": 2718.104736328125, "per_token_feature_norm": 181.4587860107422, "per_token_feature_norm/max": 314.0, "per_token_feature_norm/median": 183.0, "per_token_feature_norm/min": 92.5, "per_token_feature_norm/p25": 175.0, "per_token_feature_norm/p75": 189.0, "per_token_feature_norm/var": 200.32484436035156, "per_token_gradient_norm": 1.286425232887268, "per_token_gradient_norm/max": 328.0, "per_token_gradient_norm/median": 3.7834979593753815e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 8.502975106239319e-07, "per_token_gradient_norm/var": 169.14845275878906, "per_token_policy_error_norm": 0.009243285283446312, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.008368264883756638, "policy_entropy": 0.018658611923456192, "policy_entropy/max": 3.34375, "policy_entropy/median": 2.1187588572502136e-08, "policy_entropy/min": 2.337810934421869e-19, "policy_entropy/p25": 9.458744898438454e-11, "policy_entropy/p75": 2.8759241104125977e-06, "policy_entropy/var": 0.013815244659781456, "policy_loss": -0.7083333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20877191424369812, "policy_sharpness": 9.536962509155273, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.9625213146209717, "reward": 0.7083333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20877191424369812, "rewards/accuracy_reward": 0.7083333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20877191424369812, "sentence_fisher_curvature": 168437.25, "sentence_fisher_curvature/max": 761856.0, "sentence_fisher_curvature/median": 93184.0, "sentence_fisher_curvature/min": 228.0, "sentence_fisher_curvature/p25": 3708.0, "sentence_fisher_curvature/p75": 283136.0, "sentence_fisher_curvature/p85": 366592.0, "sentence_fisher_curvature/p90": 451584.0, "sentence_fisher_curvature/p95": 510976.0, "sentence_fisher_curvature/p99": 707379.375, "sentence_fisher_curvature/var": 32612907008.0, "sentence_fisher_kl_divergence": 1.758834332576953e-05, "sentence_fisher_kl_divergence/max": 7.963180541992188e-05, "sentence_fisher_kl_divergence/median": 9.715557098388672e-06, "sentence_fisher_kl_divergence/min": 2.3865140974521637e-08, "sentence_fisher_kl_divergence/p25": 3.8743019104003906e-07, "sentence_fisher_kl_divergence/p75": 2.9593706130981445e-05, "sentence_fisher_kl_divergence/p85": 3.826618194580078e-05, "sentence_fisher_kl_divergence/p90": 4.7206878662109375e-05, "sentence_fisher_kl_divergence/p95": 5.340576171875e-05, "sentence_fisher_kl_divergence/p99": 7.374288543360308e-05, "sentence_fisher_kl_divergence/var": 3.557626138128711e-10, "sentence_full_gradient_variance/max_squared_error": 5168.98974609375, "sentence_full_gradient_variance/metric": 5168.98974609375, "sentence_full_gradient_variance/p75": 5168.98974609375, "sentence_full_gradient_variance/p90": 5168.98974609375, "sentence_full_gradient_variance/p95": 5168.98974609375, "sentence_full_gradient_variance/p99": 5168.98974609375, "sentence_full_update_term": 0.041212718933820724, "sentence_full_update_term/max": 0.265625, "sentence_full_update_term/median": 0.037109375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0631103515625, "sentence_full_update_term/p85": 0.06982421875, "sentence_full_update_term/p90": 0.075927734375, "sentence_full_update_term/p95": 0.0950927734375, "sentence_full_update_term/p99": 0.18955102562904358, "sentence_full_update_term/var": 0.0017295449506491423, "sentence_hessian_coeff": 13317.0, "sentence_hessian_coeff/max": 342016.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -233472.0, "sentence_hessian_coeff/p25": -34304.0, "sentence_hessian_coeff/p75": 48576.0, "sentence_hessian_coeff/p99": 328396.84375, "sentence_hessian_coeff/var": 9688018944.0, "sentence_hessian_coeff_abs": 63764.66796875, "sentence_hessian_coeff_abs/max": 342016.0, "sentence_hessian_coeff_abs/median": 39680.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 106496.0, "sentence_hessian_coeff_abs/p99": 328396.84375, "sentence_hessian_coeff_abs/var": 5758496768.0, "step": 22, "token_fisher_curvature": 168517.84375, "token_fisher_curvature/max": 72351744.0, "token_fisher_curvature/median": 1.532107773982716e-14, "token_fisher_curvature/min": 9.183549615799121e-41, "token_fisher_curvature/p25": 9.317362419797304e-20, "token_fisher_curvature/p75": 1.1496013030409813e-09, "token_fisher_curvature/p85": 7.711350917816162e-07, "token_fisher_curvature/p90": 0.00026664137840270996, "token_fisher_curvature/p95": 13.99365234375, "token_fisher_curvature/p99": 1097728.0, "token_fisher_curvature/var": 5609813442560.0, "token_fisher_kl_divergence": 1.7598935301066376e-05, "token_fisher_kl_divergence/max": 0.007568359375, "token_fisher_kl_divergence/median": 1.6026624368214911e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 9.712849895533708e-30, "token_fisher_kl_divergence/p75": 1.2027867851011065e-19, "token_fisher_kl_divergence/p85": 8.066464163292153e-17, "token_fisher_kl_divergence/p90": 2.783277081030988e-14, "token_fisher_kl_divergence/p95": 1.461728516005678e-09, "token_fisher_kl_divergence/p99": 0.00011444091796875, "token_fisher_kl_divergence/var": 6.117517870052325e-08, "token_full_update_term": 0.0005859644152224064, "token_full_update_term/max": 0.1298828125, "token_full_update_term/median": 4.2599801546480925e-23, "token_full_update_term/min": -2.428889274597168e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.1742607714259066e-12, "token_full_update_term/p85": 2.1645973902195692e-10, "token_full_update_term/p90": 5.966285243630409e-09, "token_full_update_term/p95": 1.6160192899405956e-06, "token_full_update_term/p99": 0.014404296875, "token_full_update_term/var": 3.61272286681924e-05, "token_hessian_coeff": 5922.04931640625, "token_hessian_coeff/max": 70254592.0, "token_hessian_coeff/median": -4.5656634029001e-10, "token_hessian_coeff/min": -9699328.0, "token_hessian_coeff/p25": -0.00018024444580078125, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.083282470703125, "token_hessian_coeff/var": 3558693666816.0, "token_hessian_coeff_abs": 159382.40625, "token_hessian_coeff_abs/max": 70254592.0, "token_hessian_coeff_abs/median": 9.918585419654846e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.000583648681640625, "token_hessian_coeff_abs/p99": 4620288.0, "token_hessian_coeff_abs/var": 3533325991936.0 }, { "accuracy_reward": 0.53125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.25164473056793213, "adam_stats/lm_head/lr_effective_max": 6.59797151456587e-05, "adam_stats/lm_head/lr_effective_mean": 3.3338758836931603e-11, "adam_stats/lm_head/lr_effective_min": -6.658214260824025e-05, "adam_stats/lm_head/lr_effective_std": 1.5633290786354337e-06, "adam_stats/lr_effective_max": 7.96550593804568e-05, "adam_stats/lr_effective_mean": -2.400008247693819e-10, "adam_stats/lr_effective_min": -8.220854215323925e-05, "adam_stats/m_t_max": 0.004107215441763401, "adam_stats/m_t_mean": -2.222273060015212e-11, "adam_stats/m_t_min": -0.002792404731735587, "adam_stats/v_t_max": 2.6222438464174047e-05, "adam_stats/v_t_mean": 1.7320227283651457e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.53125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.25164473056793213, "all_logprobs": -0.015583057887852192, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.53125, "all_logprobs/p1": -0.38671875, "all_logprobs/p10": -3.621575888246298e-05, "all_logprobs/p25": -2.384185791015625e-07, "all_logprobs/p5": -0.003173828125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.024077987298369408, "clip_ratio": 0.0, "completion_length": 603.1458740234375, "completion_length/correct": 521.5490112304688, "completion_length/correct/max": 1021.0, "completion_length/correct/median": 472.0, "completion_length/correct/min": 292.0, "completion_length/correct/p25": 408.0, "completion_length/correct/p75": 601.0, "completion_length/correct/var": 26163.4921875, "completion_length/incorrect": 695.6222534179688, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 622.0, "completion_length/incorrect/min": 307.0, "completion_length/incorrect/p25": 451.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 75291.8359375, "completion_length/max": 1024.0, "completion_length/median": 549.0, "completion_length/min": 292.0, "completion_length/p25": 414.0, "completion_length/p75": 769.75, "completion_length/var": 56267.48046875, "curvature_clip_ratio_token_fisher": 0.013885530643165112, "curvature_clip_ratio_token_hessian": 0.009930572472512722, "curvature_clip_ratio_total_fisher": 0.013885530643165112, "curvature_clip_ratio_total_full": 0.013885530643165112, "curvature_clip_ratio_total_hessian": 0.009930572472512722, "epoch": 0.0368, "feature_vector_variance/max_squared_error": 55219.3828125, "feature_vector_variance/metric": 27764.962890625, "generated_tokens/total": 1440955.0, "global_fisher_curvature": 77312.0, "global_fisher_curvature/max": 77312.0, "global_fisher_curvature/median": 77312.0, "global_fisher_curvature/min": 77312.0, "global_fisher_curvature/p25": 77312.0, "global_fisher_curvature/p75": 77312.0, "global_fisher_curvature/p85": 77312.0, "global_fisher_curvature/p90": 77312.0, "global_fisher_curvature/p95": 77312.0, "global_fisher_curvature/p99": 77312.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.987022399902344e-06, "global_fisher_kl_divergence/max": 7.987022399902344e-06, "global_fisher_kl_divergence/median": 7.987022399902344e-06, "global_fisher_kl_divergence/min": 7.987022399902344e-06, "global_fisher_kl_divergence/p25": 7.987022399902344e-06, "global_fisher_kl_divergence/p75": 7.987022399902344e-06, "global_fisher_kl_divergence/p85": 7.987022399902344e-06, "global_fisher_kl_divergence/p90": 7.987022399902344e-06, "global_fisher_kl_divergence/p95": 7.987022399902344e-06, "global_fisher_kl_divergence/p99": 7.987022399902344e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0732421875, "global_full_update_term/max": 0.0732421875, "global_full_update_term/median": 0.0732421875, "global_full_update_term/min": 0.0732421875, "global_full_update_term/p25": 0.0732421875, "global_full_update_term/p75": 0.0732421875, "global_full_update_term/p85": 0.0732421875, "global_full_update_term/p90": 0.0732421875, "global_full_update_term/p95": 0.0732421875, "global_full_update_term/p99": 0.0732421875, "global_full_update_term/var": NaN, "global_hessian_coeff": 17280.0, "global_hessian_coeff/max": 17280.0, "global_hessian_coeff/median": 17280.0, "global_hessian_coeff/min": 17280.0, "global_hessian_coeff/p25": 17280.0, "global_hessian_coeff/p75": 17280.0, "global_hessian_coeff/p99": 17280.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 17280.0, "global_hessian_coeff_abs/max": 17280.0, "global_hessian_coeff_abs/median": 17280.0, "global_hessian_coeff_abs/min": 17280.0, "global_hessian_coeff_abs/p25": 17280.0, "global_hessian_coeff_abs/p75": 17280.0, "global_hessian_coeff_abs/p99": 17280.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.02944144234061241, "learning_rate": 1.4240955347243754e-05, "loss": -0.5312, "masked_global_fisher_curvature": 560.0, "masked_global_fisher_curvature/max": 560.0, "masked_global_fisher_curvature/median": 560.0, "masked_global_fisher_curvature/min": 560.0, "masked_global_fisher_curvature/p25": 560.0, "masked_global_fisher_curvature/p75": 560.0, "masked_global_fisher_curvature/p85": 560.0, "masked_global_fisher_curvature/p90": 560.0, "masked_global_fisher_curvature/p95": 560.0, "masked_global_fisher_curvature/p99": 560.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/max": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/median": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/min": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/p25": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/p75": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/p85": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/p90": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/p95": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/p99": 5.774199962615967e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00183868408203125, "masked_global_full_update_term/max": 0.00183868408203125, "masked_global_full_update_term/median": 0.00183868408203125, "masked_global_full_update_term/min": 0.00183868408203125, "masked_global_full_update_term/p25": 0.00183868408203125, "masked_global_full_update_term/p75": 0.00183868408203125, "masked_global_full_update_term/p85": 0.00183868408203125, "masked_global_full_update_term/p90": 0.00183868408203125, "masked_global_full_update_term/p95": 0.00183868408203125, "masked_global_full_update_term/p99": 0.00183868408203125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1152.0, "masked_global_hessian_coeff/max": -1152.0, "masked_global_hessian_coeff/median": -1152.0, "masked_global_hessian_coeff/min": -1152.0, "masked_global_hessian_coeff/p25": -1152.0, "masked_global_hessian_coeff/p75": -1152.0, "masked_global_hessian_coeff/p99": -1152.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1152.0, "masked_global_hessian_coeff_abs/max": 1152.0, "masked_global_hessian_coeff_abs/median": 1152.0, "masked_global_hessian_coeff_abs/min": 1152.0, "masked_global_hessian_coeff_abs/p25": 1152.0, "masked_global_hessian_coeff_abs/p75": 1152.0, "masked_global_hessian_coeff_abs/p99": 1152.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.3544414043426514, "masked_per_sentence_gradient_norm/max": 10.3125, "masked_per_sentence_gradient_norm/median": 0.70703125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.23828125, "masked_per_sentence_gradient_norm/var": 3.577237367630005, "masked_per_token_gradient_norm": 0.029772719368338585, "masked_per_token_gradient_norm/max": 10.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 4.0046870708465576e-08, "masked_per_token_gradient_norm/var": 0.13167941570281982, "masked_sentence_fisher_curvature": 260.1809997558594, "masked_sentence_fisher_curvature/max": 716.0, "masked_sentence_fisher_curvature/median": 246.0, "masked_sentence_fisher_curvature/min": 11.25, "masked_sentence_fisher_curvature/p25": 137.5, "masked_sentence_fisher_curvature/p75": 359.0, "masked_sentence_fisher_curvature/p85": 396.0, "masked_sentence_fisher_curvature/p90": 426.0, "masked_sentence_fisher_curvature/p95": 557.0, "masked_sentence_fisher_curvature/p99": 681.8001098632812, "masked_sentence_fisher_curvature/var": 25289.673828125, "masked_sentence_fisher_kl_divergence": 2.6787500218006244e-08, "masked_sentence_fisher_kl_divergence/max": 7.35744833946228e-08, "masked_sentence_fisher_kl_divergence/median": 2.537854015827179e-08, "masked_sentence_fisher_kl_divergence/min": 1.1568772606551647e-09, "masked_sentence_fisher_kl_divergence/p25": 1.4159013517200947e-08, "masked_sentence_fisher_kl_divergence/p75": 3.6903657019138336e-08, "masked_sentence_fisher_kl_divergence/p85": 4.0745362639427185e-08, "masked_sentence_fisher_kl_divergence/p90": 4.3888576328754425e-08, "masked_sentence_fisher_kl_divergence/p95": 5.7334545999765396e-08, "masked_sentence_fisher_kl_divergence/p99": 7.003546897976776e-08, "masked_sentence_fisher_kl_divergence/var": 2.6754451493401295e-16, "masked_sentence_full_gradient_variance/max_squared_error": 5.265111446380615, "masked_sentence_full_gradient_variance/metric": 5.265111446380615, "masked_sentence_full_gradient_variance/p75": 5.265111446380615, "masked_sentence_full_gradient_variance/p90": 5.265111446380615, "masked_sentence_full_gradient_variance/p95": 5.265111446380615, "masked_sentence_full_gradient_variance/p99": 5.265111446380615, "masked_sentence_full_update_term": 0.0009346107835881412, "masked_sentence_full_update_term/max": 0.0081787109375, "masked_sentence_full_update_term/median": 0.000446319580078125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.001560211181640625, "masked_sentence_full_update_term/p85": 0.00215911865234375, "masked_sentence_full_update_term/p90": 0.0024261474609375, "masked_sentence_full_update_term/p95": 0.0031585693359375, "masked_sentence_full_update_term/p99": 0.004090894479304552, "masked_sentence_full_update_term/var": 1.6744504591770237e-06, "masked_sentence_hessian_coeff": -5127.5, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -2384.0, "masked_sentence_hessian_coeff/min": -27264.0, "masked_sentence_hessian_coeff/p25": -9312.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 41234572.0, "masked_sentence_hessian_coeff_abs": 5127.5, "masked_sentence_hessian_coeff_abs/max": 27264.0, "masked_sentence_hessian_coeff_abs/median": 2272.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 9312.0, "masked_sentence_hessian_coeff_abs/p99": 25804.8046875, "masked_sentence_hessian_coeff_abs/var": 41234572.0, "masked_token_fisher_curvature": 307.2123107910156, "masked_token_fisher_curvature/max": 94720.0, "masked_token_fisher_curvature/median": 1.7541523789077473e-14, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.1180834903756764e-19, "masked_token_fisher_curvature/p75": 8.039933163672686e-10, "masked_token_fisher_curvature/p85": 3.2223761081695557e-07, "masked_token_fisher_curvature/p90": 3.1948089599609375e-05, "masked_token_fisher_curvature/p95": 0.1376953125, "masked_token_fisher_curvature/p99": 5184.0, "masked_token_fisher_curvature/var": 13453529.0, "masked_token_fisher_kl_divergence": 3.1649403098299445e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 1.809457589959748e-24, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.1537090738857298e-29, "masked_token_fisher_kl_divergence/p75": 8.300922883092143e-20, "masked_token_fisher_kl_divergence/p85": 3.3176586478056436e-17, "masked_token_fisher_kl_divergence/p90": 3.2890357104520263e-15, "masked_token_fisher_kl_divergence/p95": 1.4154011296341196e-11, "masked_token_fisher_kl_divergence/p99": 5.327165126800537e-07, "masked_token_fisher_kl_divergence/var": 1.428251641304515e-13, "masked_token_full_update_term": 1.2617861102626193e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.1771917343139648e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.433953204596719e-15, "masked_token_full_update_term/p85": 7.673861546209082e-12, "masked_token_full_update_term/p90": 2.3283064365386963e-10, "masked_token_full_update_term/p95": 2.3865140974521637e-08, "masked_token_full_update_term/p99": 0.0001087188720703125, "masked_token_full_update_term/var": 2.5339421227954517e-08, "masked_token_hessian_coeff": -7068.94677734375, "masked_token_hessian_coeff/max": 2048.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2424832.0, "masked_token_hessian_coeff/p25": -1.5944242477416992e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.01904296875, "masked_token_hessian_coeff/var": 8224268800.0, "masked_token_hessian_coeff_abs": 7069.08935546875, "masked_token_hessian_coeff_abs/max": 2424832.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.3828277587890625e-05, "masked_token_hessian_coeff_abs/p99": 43776.0, "masked_token_hessian_coeff_abs/var": 8224267264.0, "mean_logprobs": -0.0152587890625, "mean_logprobs/var": 4.458427429199219e-05, "num_completions/total": 2208, "per_sentence_gradient_norm": 36.63932418823242, "per_sentence_gradient_norm/max": 183.0, "per_sentence_gradient_norm/median": 27.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 64.25, "per_sentence_gradient_norm/var": 2007.72314453125, "per_token_feature_norm": 181.59666442871094, "per_token_feature_norm/max": 256.0, "per_token_feature_norm/median": 183.0, "per_token_feature_norm/min": 92.5, "per_token_feature_norm/p25": 175.0, "per_token_feature_norm/p75": 189.0, "per_token_feature_norm/var": 169.95050048828125, "per_token_gradient_norm": 0.9429880976676941, "per_token_gradient_norm/max": 264.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 5.844049155712128e-08, "per_token_gradient_norm/var": 119.23322296142578, "per_token_policy_error_norm": 0.008884050883352757, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.008654218167066574, "policy_entropy": 0.016963038593530655, "policy_entropy/max": 3.59375, "policy_entropy/median": 2.7008354663848877e-08, "policy_entropy/min": 1.8211208365967457e-19, "policy_entropy/p25": 1.1823431123048067e-10, "policy_entropy/p75": 3.3192336559295654e-06, "policy_entropy/var": 0.009782967157661915, "policy_loss": -0.53125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.25164473056793213, "policy_sharpness": 9.539824485778809, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.9252638816833496, "reward": 0.53125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.25164473056793213, "rewards/accuracy_reward": 0.53125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.25164473056793213, "sentence_fisher_curvature": 144888.59375, "sentence_fisher_curvature/max": 847872.0, "sentence_fisher_curvature/median": 104960.0, "sentence_fisher_curvature/min": 338.0, "sentence_fisher_curvature/p25": 1168.0, "sentence_fisher_curvature/p75": 249856.0, "sentence_fisher_curvature/p85": 322560.0, "sentence_fisher_curvature/p90": 355328.0, "sentence_fisher_curvature/p95": 392192.0, "sentence_fisher_curvature/p99": 637747.875, "sentence_fisher_curvature/var": 28288237568.0, "sentence_fisher_kl_divergence": 1.4919101886334829e-05, "sentence_fisher_kl_divergence/max": 8.726119995117188e-05, "sentence_fisher_kl_divergence/median": 1.0788440704345703e-05, "sentence_fisher_kl_divergence/min": 3.4924596548080444e-08, "sentence_fisher_kl_divergence/p25": 1.2025702744722366e-07, "sentence_fisher_kl_divergence/p75": 2.574920654296875e-05, "sentence_fisher_kl_divergence/p85": 3.325939178466797e-05, "sentence_fisher_kl_divergence/p90": 3.6597251892089844e-05, "sentence_fisher_kl_divergence/p95": 4.0411949157714844e-05, "sentence_fisher_kl_divergence/p99": 6.551749538630247e-05, "sentence_fisher_kl_divergence/var": 2.9970975701232305e-10, "sentence_full_gradient_variance/max_squared_error": 3278.74951171875, "sentence_full_gradient_variance/metric": 3278.74951171875, "sentence_full_gradient_variance/p75": 3278.74951171875, "sentence_full_gradient_variance/p90": 3278.74951171875, "sentence_full_gradient_variance/p95": 3278.74951171875, "sentence_full_gradient_variance/p99": 3278.74951171875, "sentence_full_update_term": 0.033804576843976974, "sentence_full_update_term/max": 0.1328125, "sentence_full_update_term/median": 0.028076171875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.05718994140625, "sentence_full_update_term/p85": 0.0731201171875, "sentence_full_update_term/p90": 0.0927734375, "sentence_full_update_term/p95": 0.1107177734375, "sentence_full_update_term/p99": 0.13095703721046448, "sentence_full_update_term/var": 0.0014655604027211666, "sentence_hessian_coeff": 13353.25, "sentence_hessian_coeff/max": 274432.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -149504.0, "sentence_hessian_coeff/p25": -94.0, "sentence_hessian_coeff/p75": 15008.0, "sentence_hessian_coeff/p99": 248166.484375, "sentence_hessian_coeff/var": 5787046912.0, "sentence_hessian_coeff_abs": 43243.41796875, "sentence_hessian_coeff_abs/max": 274432.0, "sentence_hessian_coeff_abs/median": 2208.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 72448.0, "sentence_hessian_coeff_abs/p99": 248166.484375, "sentence_hessian_coeff_abs/var": 4077555712.0, "step": 23, "token_fisher_curvature": 131868.625, "token_fisher_curvature/max": 73400320.0, "token_fisher_curvature/median": 2.3314683517128287e-14, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.3467823861343375e-19, "token_fisher_curvature/p75": 1.382431946694851e-09, "token_fisher_curvature/p85": 8.381903171539307e-07, "token_fisher_curvature/p90": 0.00018024444580078125, "token_fisher_curvature/p95": 6.5, "token_fisher_curvature/p99": 585728.0, "token_fisher_curvature/var": 4847631335424.0, "token_fisher_kl_divergence": 1.3583649888460059e-05, "token_fisher_kl_divergence/max": 0.007568359375, "token_fisher_kl_divergence/median": 2.4039936552322367e-24, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.3903673454520333e-29, "token_fisher_kl_divergence/p75": 1.4230153513872246e-19, "token_fisher_kl_divergence/p85": 8.630249292984615e-17, "token_fisher_kl_divergence/p90": 1.8540724511240114e-14, "token_fisher_kl_divergence/p95": 6.693881005048752e-10, "token_fisher_kl_divergence/p99": 6.031990051269531e-05, "token_fisher_kl_divergence/var": 5.143690984255045e-08, "token_full_update_term": 0.0004469057312235236, "token_full_update_term/max": 0.130859375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.1771917343139648e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 9.492406860545088e-15, "token_full_update_term/p85": 1.5234036254696548e-11, "token_full_update_term/p90": 5.784386303275824e-10, "token_full_update_term/p95": 1.4062970876693726e-07, "token_full_update_term/p99": 0.009459495544433594, "token_full_update_term/var": 2.8029708118992858e-05, "token_hessian_coeff": 3071.206787109375, "token_hessian_coeff/max": 71303168.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -10027008.0, "token_hessian_coeff/p25": -2.3990869522094727e-06, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.041748046875, "token_hessian_coeff/var": 3511031169024.0, "token_hessian_coeff_abs": 138275.796875, "token_hessian_coeff_abs/max": 71303168.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.238154411315918e-05, "token_hessian_coeff_abs/p99": 4161536.0, "token_hessian_coeff_abs/var": 3491920084992.0 }, { "accuracy_reward": 0.8541666865348816, "accuracy_reward/correct": 0.9999999403953552, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.12587721645832062, "adam_stats/lm_head/lr_effective_max": 6.93278998369351e-05, "adam_stats/lm_head/lr_effective_mean": 1.1739559845069802e-11, "adam_stats/lm_head/lr_effective_min": -6.626277172472328e-05, "adam_stats/lm_head/lr_effective_std": 1.5785959703862318e-06, "adam_stats/lr_effective_max": 8.23326117824763e-05, "adam_stats/lr_effective_mean": -2.541260812893853e-10, "adam_stats/lr_effective_min": -8.48115814733319e-05, "adam_stats/m_t_max": 0.003626303281635046, "adam_stats/m_t_mean": -2.0005264805833534e-11, "adam_stats/m_t_min": -0.0024923740420490503, "adam_stats/v_t_max": 2.619670885906089e-05, "adam_stats/v_t_mean": 1.7312133714433853e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8541666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.12587721645832062, "all_logprobs": -0.013030155561864376, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.6875, "all_logprobs/p1": -0.33716797828674316, "all_logprobs/p10": -2.1457672119140625e-05, "all_logprobs/p25": -1.1920928955078125e-07, "all_logprobs/p5": -0.00150299072265625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01928088255226612, "clip_ratio": 0.0, "completion_length": 497.2708435058594, "completion_length/correct": 420.0121765136719, "completion_length/correct/max": 1009.0, "completion_length/correct/median": 416.0, "completion_length/correct/min": 178.0, "completion_length/correct/p25": 247.5, "completion_length/correct/p75": 554.75, "completion_length/correct/var": 37111.9609375, "completion_length/incorrect": 949.7857666015625, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 376.0, "completion_length/incorrect/p25": 1018.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 32022.1796875, "completion_length/max": 1024.0, "completion_length/median": 420.0, "completion_length/min": 178.0, "completion_length/p25": 263.0, "completion_length/p75": 630.5, "completion_length/var": 71353.5078125, "curvature_clip_ratio_token_fisher": 0.020298294723033905, "curvature_clip_ratio_token_hessian": 0.014600527472794056, "curvature_clip_ratio_total_fisher": 0.020298294723033905, "curvature_clip_ratio_total_full": 0.020298294723033905, "curvature_clip_ratio_total_hessian": 0.014600527472794056, "epoch": 0.0384, "feature_vector_variance/max_squared_error": 58451.96875, "feature_vector_variance/metric": 28084.47265625, "generated_tokens/total": 1488693.0, "global_fisher_curvature": 95744.0, "global_fisher_curvature/max": 95744.0, "global_fisher_curvature/median": 95744.0, "global_fisher_curvature/min": 95744.0, "global_fisher_curvature/p25": 95744.0, "global_fisher_curvature/p75": 95744.0, "global_fisher_curvature/p85": 95744.0, "global_fisher_curvature/p90": 95744.0, "global_fisher_curvature/p95": 95744.0, "global_fisher_curvature/p99": 95744.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 9.715557098388672e-06, "global_fisher_kl_divergence/max": 9.715557098388672e-06, "global_fisher_kl_divergence/median": 9.715557098388672e-06, "global_fisher_kl_divergence/min": 9.715557098388672e-06, "global_fisher_kl_divergence/p25": 9.715557098388672e-06, "global_fisher_kl_divergence/p75": 9.715557098388672e-06, "global_fisher_kl_divergence/p85": 9.715557098388672e-06, "global_fisher_kl_divergence/p90": 9.715557098388672e-06, "global_fisher_kl_divergence/p95": 9.715557098388672e-06, "global_fisher_kl_divergence/p99": 9.715557098388672e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.283203125, "global_full_update_term/max": 0.283203125, "global_full_update_term/median": 0.283203125, "global_full_update_term/min": 0.283203125, "global_full_update_term/p25": 0.283203125, "global_full_update_term/p75": 0.283203125, "global_full_update_term/p85": 0.283203125, "global_full_update_term/p90": 0.283203125, "global_full_update_term/p95": 0.283203125, "global_full_update_term/p99": 0.283203125, "global_full_update_term/var": NaN, "global_hessian_coeff": 14784.0, "global_hessian_coeff/max": 14784.0, "global_hessian_coeff/median": 14784.0, "global_hessian_coeff/min": 14784.0, "global_hessian_coeff/p25": 14784.0, "global_hessian_coeff/p75": 14784.0, "global_hessian_coeff/p99": 14784.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 14784.0, "global_hessian_coeff_abs/max": 14784.0, "global_hessian_coeff_abs/median": 14784.0, "global_hessian_coeff_abs/min": 14784.0, "global_hessian_coeff_abs/p25": 14784.0, "global_hessian_coeff_abs/p75": 14784.0, "global_hessian_coeff_abs/p99": 14784.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05630405619740486, "learning_rate": 1.4122106946441953e-05, "loss": -0.8542, "masked_global_fisher_curvature": 131.0, "masked_global_fisher_curvature/max": 131.0, "masked_global_fisher_curvature/median": 131.0, "masked_global_fisher_curvature/min": 131.0, "masked_global_fisher_curvature/p25": 131.0, "masked_global_fisher_curvature/p75": 131.0, "masked_global_fisher_curvature/p85": 131.0, "masked_global_fisher_curvature/p90": 131.0, "masked_global_fisher_curvature/p95": 131.0, "masked_global_fisher_curvature/p99": 131.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/max": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/median": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/min": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p25": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p75": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p85": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p90": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p95": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p99": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00396728515625, "masked_global_full_update_term/max": 0.00396728515625, "masked_global_full_update_term/median": 0.00396728515625, "masked_global_full_update_term/min": 0.00396728515625, "masked_global_full_update_term/p25": 0.00396728515625, "masked_global_full_update_term/p75": 0.00396728515625, "masked_global_full_update_term/p85": 0.00396728515625, "masked_global_full_update_term/p90": 0.00396728515625, "masked_global_full_update_term/p95": 0.00396728515625, "masked_global_full_update_term/p99": 0.00396728515625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2592.0, "masked_global_hessian_coeff/max": -2592.0, "masked_global_hessian_coeff/median": -2592.0, "masked_global_hessian_coeff/min": -2592.0, "masked_global_hessian_coeff/p25": -2592.0, "masked_global_hessian_coeff/p75": -2592.0, "masked_global_hessian_coeff/p99": -2592.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2592.0, "masked_global_hessian_coeff_abs/max": 2592.0, "masked_global_hessian_coeff_abs/median": 2592.0, "masked_global_hessian_coeff_abs/min": 2592.0, "masked_global_hessian_coeff_abs/p25": 2592.0, "masked_global_hessian_coeff_abs/p75": 2592.0, "masked_global_hessian_coeff_abs/p99": 2592.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.3873190879821777, "masked_per_sentence_gradient_norm/max": 7.21875, "masked_per_sentence_gradient_norm/median": 1.7890625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.9765625, "masked_per_sentence_gradient_norm/p75": 4.078125, "masked_per_sentence_gradient_norm/var": 3.62709641456604, "masked_per_token_gradient_norm": 0.04901842027902603, "masked_per_token_gradient_norm/max": 9.4375, "masked_per_token_gradient_norm/median": 7.530616130679846e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.341104507446289e-06, "masked_per_token_gradient_norm/var": 0.2135729342699051, "masked_sentence_fisher_curvature": 237.193359375, "masked_sentence_fisher_curvature/max": 600.0, "masked_sentence_fisher_curvature/median": 227.0, "masked_sentence_fisher_curvature/min": 8.4375, "masked_sentence_fisher_curvature/p25": 114.375, "masked_sentence_fisher_curvature/p75": 331.0, "masked_sentence_fisher_curvature/p85": 402.0, "masked_sentence_fisher_curvature/p90": 425.0, "masked_sentence_fisher_curvature/p95": 490.5, "masked_sentence_fisher_curvature/p99": 554.400146484375, "masked_sentence_fisher_curvature/var": 20960.53515625, "masked_sentence_fisher_kl_divergence": 2.405397481197724e-08, "masked_sentence_fisher_kl_divergence/max": 6.100162863731384e-08, "masked_sentence_fisher_kl_divergence/median": 2.3050233721733093e-08, "masked_sentence_fisher_kl_divergence/min": 8.549250196665525e-10, "masked_sentence_fisher_kl_divergence/p25": 1.1597876437008381e-08, "masked_sentence_fisher_kl_divergence/p75": 3.3585820347070694e-08, "masked_sentence_fisher_kl_divergence/p85": 4.0745362639427185e-08, "masked_sentence_fisher_kl_divergence/p90": 4.307366907596588e-08, "masked_sentence_fisher_kl_divergence/p95": 4.96511347591877e-08, "masked_sentence_fisher_kl_divergence/p99": 5.6135483816888154e-08, "masked_sentence_fisher_kl_divergence/var": 2.1559038592052095e-16, "masked_sentence_full_gradient_variance/max_squared_error": 8.948762893676758, "masked_sentence_full_gradient_variance/metric": 8.948762893676758, "masked_sentence_full_gradient_variance/p75": 8.948762893676758, "masked_sentence_full_gradient_variance/p90": 8.948762893676758, "masked_sentence_full_gradient_variance/p95": 8.948762893676758, "masked_sentence_full_gradient_variance/p99": 8.948762893676758, "masked_sentence_full_update_term": 0.0016501247882843018, "masked_sentence_full_update_term/max": 0.0054931640625, "masked_sentence_full_update_term/median": 0.00118255615234375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.000583648681640625, "masked_sentence_full_update_term/p75": 0.002544403076171875, "masked_sentence_full_update_term/p85": 0.0032958984375, "masked_sentence_full_update_term/p90": 0.0038909912109375, "masked_sentence_full_update_term/p95": 0.004241943359375, "masked_sentence_full_update_term/p99": 0.0054641724564135075, "masked_sentence_full_update_term/var": 2.035995748883579e-06, "masked_sentence_hessian_coeff": -9892.5, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -9792.0, "masked_sentence_hessian_coeff/min": -24192.0, "masked_sentence_hessian_coeff/p25": -14352.0, "masked_sentence_hessian_coeff/p75": -4568.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 50091540.0, "masked_sentence_hessian_coeff_abs": 9892.5, "masked_sentence_hessian_coeff_abs/max": 24192.0, "masked_sentence_hessian_coeff_abs/median": 9792.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 4568.0, "masked_sentence_hessian_coeff_abs/p75": 14352.0, "masked_sentence_hessian_coeff_abs/p99": 24192.0, "masked_sentence_hessian_coeff_abs/var": 50091540.0, "masked_token_fisher_curvature": 295.723388671875, "masked_token_fisher_curvature/max": 94720.0, "masked_token_fisher_curvature/median": 1.6028844918025698e-15, "masked_token_fisher_curvature/min": 2.7550648847397363e-40, "masked_token_fisher_curvature/p25": 8.629148150153185e-21, "masked_token_fisher_curvature/p75": 1.609805622138083e-10, "masked_token_fisher_curvature/p85": 9.220093488693237e-08, "masked_token_fisher_curvature/p90": 8.428003638982773e-06, "masked_token_fisher_curvature/p95": 0.033593177795410156, "masked_token_fisher_curvature/p99": 4896.0, "masked_token_fisher_curvature/var": 12888189.0, "masked_token_fisher_kl_divergence": 2.9978306059774695e-08, "masked_token_fisher_kl_divergence/max": 9.59634780883789e-06, "masked_token_fisher_kl_divergence/median": 1.6236650695620953e-25, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 8.7514256672956e-31, "masked_token_fisher_kl_divergence/p75": 1.6305384234645282e-20, "masked_token_fisher_kl_divergence/p85": 9.324138683375338e-18, "masked_token_fisher_kl_divergence/p90": 8.555574868354676e-16, "masked_token_fisher_kl_divergence/p95": 3.4048874830716613e-12, "masked_token_fisher_kl_divergence/p99": 4.954636096954346e-07, "masked_token_fisher_kl_divergence/var": 1.3240500547091055e-13, "masked_token_full_update_term": 2.0746478185174055e-05, "masked_token_full_update_term/max": 0.004150390625, "masked_token_full_update_term/median": 1.0503208545953324e-19, "masked_token_full_update_term/min": -5.62518835067749e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.0037305148434825e-12, "masked_token_full_update_term/p85": 2.177351632326463e-10, "masked_token_full_update_term/p90": 5.209585651755333e-09, "masked_token_full_update_term/p95": 4.76837158203125e-07, "masked_token_full_update_term/p99": 0.0005314648151397705, "masked_token_full_update_term/var": 4.067437942012475e-08, "masked_token_hessian_coeff": -12086.3095703125, "masked_token_hessian_coeff/max": 444.0, "masked_token_hessian_coeff/median": -1.9208528101444244e-09, "masked_token_hessian_coeff/min": -2555904.0, "masked_token_hessian_coeff/p25": -0.0002460479736328125, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.044189453125, "masked_token_hessian_coeff/var": 14455619584.0, "masked_token_hessian_coeff_abs": 12086.3916015625, "masked_token_hessian_coeff_abs/max": 2555904.0, "masked_token_hessian_coeff_abs/median": 1.816079020500183e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.00084686279296875, "masked_token_hessian_coeff_abs/p99": 362496.0, "masked_token_hessian_coeff_abs/var": 14455619584.0, "mean_logprobs": -0.0130615234375, "mean_logprobs/var": 6.151199340820312e-05, "num_completions/total": 2304, "per_sentence_gradient_norm": 74.00260925292969, "per_sentence_gradient_norm/max": 292.0, "per_sentence_gradient_norm/median": 67.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 31.875, "per_sentence_gradient_norm/p75": 104.25, "per_sentence_gradient_norm/var": 3412.807861328125, "per_token_feature_norm": 182.87356567382812, "per_token_feature_norm/max": 246.0, "per_token_feature_norm/median": 184.0, "per_token_feature_norm/min": 91.5, "per_token_feature_norm/p25": 177.0, "per_token_feature_norm/p75": 190.0, "per_token_feature_norm/var": 167.19830322265625, "per_token_gradient_norm": 1.5155938863754272, "per_token_gradient_norm/max": 286.0, "per_token_gradient_norm/median": 1.0986695997416973e-09, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.041459083557129e-06, "per_token_gradient_norm/var": 191.21212768554688, "per_token_policy_error_norm": 0.007661324925720692, "per_token_policy_error_norm/max": 1.96875, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0071109323762357235, "policy_entropy": 0.013970781117677689, "policy_entropy/max": 3.1875, "policy_entropy/median": 1.0593794286251068e-08, "policy_entropy/min": 1.1434944787933055e-19, "policy_entropy/p25": 3.979039320256561e-11, "policy_entropy/p75": 1.6838312149047852e-06, "policy_entropy/var": 0.007650598883628845, "policy_loss": -0.8541666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.12587721645832062, "policy_sharpness": 9.595415115356445, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.559292793273926, "reward": 0.8541666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.12587721645832062, "rewards/accuracy_reward": 0.8541666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.12587721645832062, "sentence_fisher_curvature": 230976.546875, "sentence_fisher_curvature/max": 753664.0, "sentence_fisher_curvature/median": 209920.0, "sentence_fisher_curvature/min": 72.0, "sentence_fisher_curvature/p25": 59904.0, "sentence_fisher_curvature/p75": 317952.0, "sentence_fisher_curvature/p85": 413696.0, "sentence_fisher_curvature/p90": 503808.0, "sentence_fisher_curvature/p95": 608256.0, "sentence_fisher_curvature/p99": 741990.4375, "sentence_fisher_curvature/var": 35430039552.0, "sentence_fisher_kl_divergence": 2.342003062949516e-05, "sentence_fisher_kl_divergence/max": 7.62939453125e-05, "sentence_fisher_kl_divergence/median": 2.1338462829589844e-05, "sentence_fisher_kl_divergence/min": 7.30506144464016e-09, "sentence_fisher_kl_divergence/p25": 6.079673767089844e-06, "sentence_fisher_kl_divergence/p75": 3.224611282348633e-05, "sentence_fisher_kl_divergence/p85": 4.1961669921875e-05, "sentence_fisher_kl_divergence/p90": 5.1021575927734375e-05, "sentence_fisher_kl_divergence/p95": 6.16312026977539e-05, "sentence_fisher_kl_divergence/p99": 7.538795762229711e-05, "sentence_fisher_kl_divergence/var": 3.639507861752378e-10, "sentence_full_gradient_variance/max_squared_error": 8697.642578125, "sentence_full_gradient_variance/metric": 8697.642578125, "sentence_full_gradient_variance/p75": 8697.642578125, "sentence_full_gradient_variance/p90": 8697.642578125, "sentence_full_gradient_variance/p95": 8697.642578125, "sentence_full_gradient_variance/p99": 8697.642578125, "sentence_full_update_term": 0.06146176904439926, "sentence_full_update_term/max": 0.408203125, "sentence_full_update_term/median": 0.055908203125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.017333984375, "sentence_full_update_term/p75": 0.0780029296875, "sentence_full_update_term/p85": 0.1026611328125, "sentence_full_update_term/p90": 0.118896484375, "sentence_full_update_term/p95": 0.147216796875, "sentence_full_update_term/p99": 0.22915096580982208, "sentence_full_update_term/var": 0.003380489768460393, "sentence_hessian_coeff": 10604.75, "sentence_hessian_coeff/max": 573440.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -231424.0, "sentence_hessian_coeff/p25": -66048.0, "sentence_hessian_coeff/p75": 55232.0, "sentence_hessian_coeff/p99": 378880.625, "sentence_hessian_coeff/var": 18748618752.0, "sentence_hessian_coeff_abs": 94136.75, "sentence_hessian_coeff_abs/max": 573440.0, "sentence_hessian_coeff_abs/median": 61696.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 22880.0, "sentence_hessian_coeff_abs/p75": 121984.0, "sentence_hessian_coeff_abs/p99": 378880.625, "sentence_hessian_coeff_abs/var": 9907253248.0, "step": 24, "token_fisher_curvature": 211720.890625, "token_fisher_curvature/max": 80216064.0, "token_fisher_curvature/median": 2.5118795932144167e-15, "token_fisher_curvature/min": 2.7550648847397363e-40, "token_fisher_curvature/p25": 1.172611236355172e-20, "token_fisher_curvature/p75": 3.637978807091713e-10, "token_fisher_curvature/p85": 4.0978193283081055e-07, "token_fisher_curvature/p90": 9.202957153320312e-05, "token_fisher_curvature/p95": 7.46875, "token_fisher_curvature/p99": 2500672.0, "token_fisher_curvature/var": 7463839465472.0, "token_fisher_kl_divergence": 2.146626684407238e-05, "token_fisher_kl_divergence/max": 0.00811767578125, "token_fisher_kl_divergence/median": 2.552627671550359e-25, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.1863728457425373e-30, "token_fisher_kl_divergence/p75": 3.6845933205562065e-20, "token_fisher_kl_divergence/p85": 4.163336342344337e-17, "token_fisher_kl_divergence/p90": 9.325873406851315e-15, "token_fisher_kl_divergence/p95": 7.566995918750763e-10, "token_fisher_kl_divergence/p99": 0.00025296956300735474, "token_fisher_kl_divergence/var": 7.67067618312467e-08, "token_full_update_term": 0.0007102670497260988, "token_full_update_term/max": 0.1357421875, "token_full_update_term/median": 4.54009659728305e-19, "token_full_update_term/min": -5.62518835067749e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 3.751665644813329e-12, "token_full_update_term/p85": 5.966285243630409e-10, "token_full_update_term/p90": 2.10711732506752e-08, "token_full_update_term/p95": 8.302740752696991e-06, "token_full_update_term/p99": 0.0216064453125, "token_full_update_term/var": 4.4000207708450034e-05, "token_hessian_coeff": -248.658935546875, "token_hessian_coeff/max": 78643200.0, "token_hessian_coeff/median": -2.5902409106492996e-09, "token_hessian_coeff/min": -10158080.0, "token_hessian_coeff/p25": -0.0003757476806640625, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.19361114501953125, "token_hessian_coeff/var": 4831776342016.0, "token_hessian_coeff_abs": 196810.0625, "token_hessian_coeff_abs/max": 78643200.0, "token_hessian_coeff_abs/median": 2.60770320892334e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.0014095306396484375, "token_hessian_coeff_abs/p99": 5918848.0, "token_hessian_coeff_abs/var": 4793041420288.0 }, { "accuracy_reward": 0.8229166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.14725878834724426, "adam_stats/lm_head/lr_effective_max": 6.972911796765402e-05, "adam_stats/lm_head/lr_effective_mean": -1.907197143269368e-11, "adam_stats/lm_head/lr_effective_min": -6.781258707633242e-05, "adam_stats/lm_head/lr_effective_std": 1.577570060362632e-06, "adam_stats/lr_effective_max": 7.9239638580475e-05, "adam_stats/lr_effective_mean": -2.945776678142664e-10, "adam_stats/lr_effective_min": -7.832627306925133e-05, "adam_stats/m_t_max": 0.003161438973620534, "adam_stats/m_t_mean": -1.5777101847191943e-11, "adam_stats/m_t_min": -0.0022944516967982054, "adam_stats/v_t_max": 2.6171557692578062e-05, "adam_stats/v_t_mean": 1.729957865327647e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8229166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.14725878834724426, "all_logprobs": -0.012012380175292492, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.28125, "all_logprobs/p1": -0.25751960277557373, "all_logprobs/p10": -1.1026859283447266e-05, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.001068115234375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.019124740734696388, "clip_ratio": 0.0, "completion_length": 644.9583740234375, "completion_length/correct": 567.50634765625, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 547.0, "completion_length/correct/min": 222.0, "completion_length/correct/p25": 313.0, "completion_length/correct/p75": 754.0, "completion_length/correct/var": 58127.97265625, "completion_length/incorrect": 1004.8823852539062, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 699.0, "completion_length/incorrect/p25": 1024.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 6213.2353515625, "completion_length/max": 1024.0, "completion_length/median": 587.0, "completion_length/min": 222.0, "completion_length/p25": 403.75, "completion_length/p75": 936.25, "completion_length/var": 76942.84375, "curvature_clip_ratio_token_fisher": 0.016570838168263435, "curvature_clip_ratio_token_hessian": 0.011241036467254162, "curvature_clip_ratio_total_fisher": 0.016570838168263435, "curvature_clip_ratio_total_full": 0.016570838168263435, "curvature_clip_ratio_total_hessian": 0.011241036467254162, "epoch": 0.04, "feature_vector_variance/max_squared_error": 71137.21875, "feature_vector_variance/metric": 28227.953125, "generated_tokens/total": 1550609.0, "global_fisher_curvature": 83456.0, "global_fisher_curvature/max": 83456.0, "global_fisher_curvature/median": 83456.0, "global_fisher_curvature/min": 83456.0, "global_fisher_curvature/p25": 83456.0, "global_fisher_curvature/p75": 83456.0, "global_fisher_curvature/p85": 83456.0, "global_fisher_curvature/p90": 83456.0, "global_fisher_curvature/p95": 83456.0, "global_fisher_curvature/p99": 83456.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 8.344650268554688e-06, "global_fisher_kl_divergence/max": 8.344650268554688e-06, "global_fisher_kl_divergence/median": 8.344650268554688e-06, "global_fisher_kl_divergence/min": 8.344650268554688e-06, "global_fisher_kl_divergence/p25": 8.344650268554688e-06, "global_fisher_kl_divergence/p75": 8.344650268554688e-06, "global_fisher_kl_divergence/p85": 8.344650268554688e-06, "global_fisher_kl_divergence/p90": 8.344650268554688e-06, "global_fisher_kl_divergence/p95": 8.344650268554688e-06, "global_fisher_kl_divergence/p99": 8.344650268554688e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.115234375, "global_full_update_term/max": 0.115234375, "global_full_update_term/median": 0.115234375, "global_full_update_term/min": 0.115234375, "global_full_update_term/p25": 0.115234375, "global_full_update_term/p75": 0.115234375, "global_full_update_term/p85": 0.115234375, "global_full_update_term/p90": 0.115234375, "global_full_update_term/p95": 0.115234375, "global_full_update_term/p99": 0.115234375, "global_full_update_term/var": NaN, "global_hessian_coeff": 15104.0, "global_hessian_coeff/max": 15104.0, "global_hessian_coeff/median": 15104.0, "global_hessian_coeff/min": 15104.0, "global_hessian_coeff/p25": 15104.0, "global_hessian_coeff/p75": 15104.0, "global_hessian_coeff/p99": 15104.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 15104.0, "global_hessian_coeff_abs/max": 15104.0, "global_hessian_coeff_abs/median": 15104.0, "global_hessian_coeff_abs/min": 15104.0, "global_hessian_coeff_abs/p25": 15104.0, "global_hessian_coeff_abs/p75": 15104.0, "global_hessian_coeff_abs/p99": 15104.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04175562784075737, "learning_rate": 1.3995190528383292e-05, "loss": -0.8229, "masked_global_fisher_curvature": 312.0, "masked_global_fisher_curvature/max": 312.0, "masked_global_fisher_curvature/median": 312.0, "masked_global_fisher_curvature/min": 312.0, "masked_global_fisher_curvature/p25": 312.0, "masked_global_fisher_curvature/p75": 312.0, "masked_global_fisher_curvature/p85": 312.0, "masked_global_fisher_curvature/p90": 312.0, "masked_global_fisher_curvature/p95": 312.0, "masked_global_fisher_curvature/p99": 312.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/max": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/median": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/min": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/p25": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/p75": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/p85": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/p90": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/p95": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/p99": 3.119930624961853e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.005523681640625, "masked_global_full_update_term/max": 0.005523681640625, "masked_global_full_update_term/median": 0.005523681640625, "masked_global_full_update_term/min": 0.005523681640625, "masked_global_full_update_term/p25": 0.005523681640625, "masked_global_full_update_term/p75": 0.005523681640625, "masked_global_full_update_term/p85": 0.005523681640625, "masked_global_full_update_term/p90": 0.005523681640625, "masked_global_full_update_term/p95": 0.005523681640625, "masked_global_full_update_term/p99": 0.005523681640625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2544.0, "masked_global_hessian_coeff/max": -2544.0, "masked_global_hessian_coeff/median": -2544.0, "masked_global_hessian_coeff/min": -2544.0, "masked_global_hessian_coeff/p25": -2544.0, "masked_global_hessian_coeff/p75": -2544.0, "masked_global_hessian_coeff/p99": -2544.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2544.0, "masked_global_hessian_coeff_abs/max": 2544.0, "masked_global_hessian_coeff_abs/median": 2544.0, "masked_global_hessian_coeff_abs/min": 2544.0, "masked_global_hessian_coeff_abs/p25": 2544.0, "masked_global_hessian_coeff_abs/p75": 2544.0, "masked_global_hessian_coeff_abs/p99": 2544.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.15106201171875, "masked_per_sentence_gradient_norm/max": 9.25, "masked_per_sentence_gradient_norm/median": 1.3984375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.728515625, "masked_per_sentence_gradient_norm/p75": 2.73046875, "masked_per_sentence_gradient_norm/var": 4.679042816162109, "masked_per_token_gradient_norm": 0.04255068674683571, "masked_per_token_gradient_norm/max": 10.3125, "masked_per_token_gradient_norm/median": 2.764863893389702e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.6135315895080566e-07, "masked_per_token_gradient_norm/var": 0.196370467543602, "masked_sentence_fisher_curvature": 243.80078125, "masked_sentence_fisher_curvature/max": 764.0, "masked_sentence_fisher_curvature/median": 218.0, "masked_sentence_fisher_curvature/min": 25.625, "masked_sentence_fisher_curvature/p25": 129.0, "masked_sentence_fisher_curvature/p75": 319.5, "masked_sentence_fisher_curvature/p85": 397.5, "masked_sentence_fisher_curvature/p90": 431.0, "masked_sentence_fisher_curvature/p95": 568.0, "masked_sentence_fisher_curvature/p99": 726.0001220703125, "masked_sentence_fisher_curvature/var": 23837.744140625, "masked_sentence_fisher_kl_divergence": 2.431109713540991e-08, "masked_sentence_fisher_kl_divergence/max": 7.636845111846924e-08, "masked_sentence_fisher_kl_divergence/median": 2.176966518163681e-08, "masked_sentence_fisher_kl_divergence/min": 2.561137080192566e-09, "masked_sentence_fisher_kl_divergence/p25": 1.2863893061876297e-08, "masked_sentence_fisher_kl_divergence/p75": 3.1781382858753204e-08, "masked_sentence_fisher_kl_divergence/p85": 3.958120942115784e-08, "masked_sentence_fisher_kl_divergence/p90": 4.2957253754138947e-08, "masked_sentence_fisher_kl_divergence/p95": 5.657784640789032e-08, "masked_sentence_fisher_kl_divergence/p99": 7.238705990175731e-08, "masked_sentence_fisher_kl_divergence/var": 2.3706263142874806e-16, "masked_sentence_full_gradient_variance/max_squared_error": 8.896041870117188, "masked_sentence_full_gradient_variance/metric": 8.896041870117188, "masked_sentence_full_gradient_variance/p75": 8.896041870117188, "masked_sentence_full_gradient_variance/p90": 8.896041870117188, "masked_sentence_full_gradient_variance/p95": 8.896041870117188, "masked_sentence_full_gradient_variance/p99": 8.896041870117188, "masked_sentence_full_update_term": 0.0016303261509165168, "masked_sentence_full_update_term/max": 0.00714111328125, "masked_sentence_full_update_term/median": 0.001129150390625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00047588348388671875, "masked_sentence_full_update_term/p75": 0.002307891845703125, "masked_sentence_full_update_term/p85": 0.00302886962890625, "masked_sentence_full_update_term/p90": 0.00384521484375, "masked_sentence_full_update_term/p95": 0.00540924072265625, "masked_sentence_full_update_term/p99": 0.006329348310828209, "masked_sentence_full_update_term/var": 2.596531430754112e-06, "masked_sentence_hessian_coeff": -8585.25, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -8640.0, "masked_sentence_hessian_coeff/min": -27264.0, "masked_sentence_hessian_coeff/p25": -11568.0, "masked_sentence_hessian_coeff/p75": -3412.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 43040352.0, "masked_sentence_hessian_coeff_abs": 8585.25, "masked_sentence_hessian_coeff_abs/max": 27264.0, "masked_sentence_hessian_coeff_abs/median": 8576.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 3412.0, "masked_sentence_hessian_coeff_abs/p75": 11568.0, "masked_sentence_hessian_coeff_abs/p99": 27142.400390625, "masked_sentence_hessian_coeff_abs/var": 43040352.0, "masked_token_fisher_curvature": 295.8544921875, "masked_token_fisher_curvature/max": 99840.0, "masked_token_fisher_curvature/median": 1.951563910473908e-16, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.105113298370845e-21, "masked_token_fisher_curvature/p75": 2.7853275241795927e-11, "masked_token_fisher_curvature/p85": 1.909211277961731e-08, "masked_token_fisher_curvature/p90": 2.9533402994275093e-06, "masked_token_fisher_curvature/p95": 0.01447153091430664, "masked_token_fisher_curvature/p99": 3936.0, "masked_token_fisher_curvature/var": 14856729.0, "masked_token_fisher_kl_divergence": 2.9495655695654932e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.9488019802579876e-26, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.101631928189499e-31, "masked_token_fisher_kl_divergence/p75": 2.779326858178173e-21, "masked_token_fisher_kl_divergence/p85": 1.8973538018496328e-18, "masked_token_fisher_kl_divergence/p90": 2.9507239750550807e-16, "masked_token_fisher_kl_divergence/p95": 1.4431233985590097e-12, "masked_token_fisher_kl_divergence/p99": 3.9301812648773193e-07, "masked_token_fisher_kl_divergence/var": 1.4763506443835755e-13, "masked_token_full_update_term": 1.8300239389645867e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 1.7110065534536867e-19, "masked_token_full_update_term/min": -3.7997961044311523e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.755396159656812e-13, "masked_token_full_update_term/p85": 4.774847184307873e-11, "masked_token_full_update_term/p90": 1.025910023599863e-09, "masked_token_full_update_term/p95": 1.2014061212539673e-07, "masked_token_full_update_term/p99": 0.0004482269287109375, "masked_token_full_update_term/var": 3.8597473661639015e-08, "masked_token_hessian_coeff": -10854.4560546875, "masked_token_hessian_coeff/max": 1472.0, "masked_token_hessian_coeff/median": -1.127773430198431e-09, "masked_token_hessian_coeff/min": -2539520.0, "masked_token_hessian_coeff/p25": -6.330013275146484e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.011969566345214844, "masked_token_hessian_coeff/var": 13917021184.0, "masked_token_hessian_coeff_abs": 10854.6201171875, "masked_token_hessian_coeff_abs/max": 2539520.0, "masked_token_hessian_coeff_abs/median": 5.2852556109428406e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.00019741058349609375, "masked_token_hessian_coeff_abs/p99": 276704.0, "masked_token_hessian_coeff_abs/var": 13917019136.0, "mean_logprobs": -0.011962890625, "mean_logprobs/var": 5.364418029785156e-05, "num_completions/total": 2400, "per_sentence_gradient_norm": 57.040367126464844, "per_sentence_gradient_norm/max": 191.0, "per_sentence_gradient_norm/median": 42.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 20.1875, "per_sentence_gradient_norm/p75": 77.375, "per_sentence_gradient_norm/var": 2389.32470703125, "per_token_feature_norm": 183.58294677734375, "per_token_feature_norm/max": 292.0, "per_token_feature_norm/median": 185.0, "per_token_feature_norm/min": 87.0, "per_token_feature_norm/p25": 178.0, "per_token_feature_norm/p75": 191.0, "per_token_feature_norm/var": 169.96734619140625, "per_token_gradient_norm": 1.1537704467773438, "per_token_gradient_norm/max": 280.0, "per_token_gradient_norm/median": 3.6925484891980886e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 5.103647708892822e-07, "per_token_gradient_norm/var": 141.4595184326172, "per_token_policy_error_norm": 0.006675911135971546, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006422416772693396, "policy_entropy": 0.013356692157685757, "policy_entropy/max": 2.96875, "policy_entropy/median": 3.8708094507455826e-09, "policy_entropy/min": 2.1281702799764296e-20, "policy_entropy/p25": 1.5916157281026244e-11, "policy_entropy/p75": 8.046627044677734e-07, "policy_entropy/var": 0.008011363446712494, "policy_loss": -0.8229166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.14725878834724426, "policy_sharpness": 9.621469497680664, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.426316261291504, "reward": 0.8229166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.14725878834724426, "rewards/accuracy_reward": 0.8229166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.14725878834724426, "sentence_fisher_curvature": 189725.375, "sentence_fisher_curvature/max": 757760.0, "sentence_fisher_curvature/median": 154624.0, "sentence_fisher_curvature/min": 219.0, "sentence_fisher_curvature/p25": 82432.0, "sentence_fisher_curvature/p75": 257280.0, "sentence_fisher_curvature/p85": 361984.0, "sentence_fisher_curvature/p90": 441344.0, "sentence_fisher_curvature/p95": 519168.0, "sentence_fisher_curvature/p99": 679936.25, "sentence_fisher_curvature/var": 27720398848.0, "sentence_fisher_kl_divergence": 1.890854400699027e-05, "sentence_fisher_kl_divergence/max": 7.534027099609375e-05, "sentence_fisher_kl_divergence/median": 1.537799835205078e-05, "sentence_fisher_kl_divergence/min": 2.1886080503463745e-08, "sentence_fisher_kl_divergence/p25": 8.225440979003906e-06, "sentence_fisher_kl_divergence/p75": 2.562999725341797e-05, "sentence_fisher_kl_divergence/p85": 3.606081008911133e-05, "sentence_fisher_kl_divergence/p90": 4.398822784423828e-05, "sentence_fisher_kl_divergence/p95": 5.179643630981445e-05, "sentence_fisher_kl_divergence/p99": 6.763937562936917e-05, "sentence_fisher_kl_divergence/var": 2.7512761513470707e-10, "sentence_full_gradient_variance/max_squared_error": 5520.5390625, "sentence_full_gradient_variance/metric": 5520.5390625, "sentence_full_gradient_variance/p75": 5520.5390625, "sentence_full_gradient_variance/p90": 5520.5390625, "sentence_full_gradient_variance/p95": 5520.5390625, "sentence_full_gradient_variance/p99": 5520.5390625, "sentence_full_update_term": 0.05035400390625, "sentence_full_update_term/max": 0.1552734375, "sentence_full_update_term/median": 0.044677734375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.022003173828125, "sentence_full_update_term/p75": 0.078125, "sentence_full_update_term/p85": 0.09619140625, "sentence_full_update_term/p90": 0.105224609375, "sentence_full_update_term/p95": 0.123291015625, "sentence_full_update_term/p99": 0.1478515863418579, "sentence_full_update_term/var": 0.0015113799599930644, "sentence_hessian_coeff": 4332.58349609375, "sentence_hessian_coeff/max": 419840.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -229376.0, "sentence_hessian_coeff/p25": -71168.0, "sentence_hessian_coeff/p75": 49408.0, "sentence_hessian_coeff/p99": 365363.375, "sentence_hessian_coeff/var": 14710391808.0, "sentence_hessian_coeff_abs": 85225.25, "sentence_hessian_coeff_abs/max": 419840.0, "sentence_hessian_coeff_abs/median": 63744.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 16240.0, "sentence_hessian_coeff_abs/p75": 125056.0, "sentence_hessian_coeff_abs/p99": 365363.375, "sentence_hessian_coeff_abs/var": 7389561344.0, "step": 25, "token_fisher_curvature": 163956.75, "token_fisher_curvature/max": 78118912.0, "token_fisher_curvature/median": 2.8102520310824275e-16, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.4095157637903592e-21, "token_fisher_curvature/p75": 5.6843418860808015e-11, "token_fisher_curvature/p85": 6.938353180885315e-08, "token_fisher_curvature/p90": 1.9729137420654297e-05, "token_fisher_curvature/p95": 1.7421875, "token_fisher_curvature/p99": 1013376.0, "token_fisher_curvature/var": 5998739718144.0, "token_fisher_kl_divergence": 1.6345464246114716e-05, "token_fisher_kl_divergence/max": 0.007781982421875, "token_fisher_kl_divergence/median": 2.8070826451384484e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.4020769995139077e-31, "token_fisher_kl_divergence/p75": 5.6645328347631335e-21, "token_fisher_kl_divergence/p85": 6.911788849595091e-18, "token_fisher_kl_divergence/p90": 1.9637069748057456e-15, "token_fisher_kl_divergence/p95": 1.737134880386293e-10, "token_fisher_kl_divergence/p99": 0.00010087713599205017, "token_fisher_kl_divergence/var": 5.963658367136304e-08, "token_full_update_term": 0.0005496621015481651, "token_full_update_term/max": 0.1318359375, "token_full_update_term/median": 4.743384504624082e-19, "token_full_update_term/min": -3.7997961044311523e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 9.379164112033322e-13, "token_full_update_term/p85": 1.0265921446261927e-10, "token_full_update_term/p90": 3.448803909122944e-09, "token_full_update_term/p95": 1.1846423149108887e-06, "token_full_update_term/p99": 0.0135498046875, "token_full_update_term/var": 3.366101373103447e-05, "token_hessian_coeff": -7214.98974609375, "token_hessian_coeff/max": 76546048.0, "token_hessian_coeff/median": -1.4915713109076023e-09, "token_hessian_coeff/min": -10485760.0, "token_hessian_coeff/p25": -9.21487808227539e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.033374786376953125, "token_hessian_coeff/var": 4092864495616.0, "token_hessian_coeff_abs": 168737.09375, "token_hessian_coeff_abs/max": 76546048.0, "token_hessian_coeff_abs/median": 6.612390279769897e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.0002880096435546875, "token_hessian_coeff_abs/p99": 5242880.0, "token_hessian_coeff_abs/var": 4064443891712.0 }, { "accuracy_reward": 0.8541666865348816, "accuracy_reward/correct": 0.9999999403953552, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.12587718665599823, "adam_stats/lm_head/lr_effective_max": 6.952504918444902e-05, "adam_stats/lm_head/lr_effective_mean": 1.0703281143331633e-11, "adam_stats/lm_head/lr_effective_min": -7.341931632254273e-05, "adam_stats/lm_head/lr_effective_std": 1.5834800706215901e-06, "adam_stats/lr_effective_max": 7.946069672470912e-05, "adam_stats/lr_effective_mean": -3.1480426598839983e-10, "adam_stats/lr_effective_min": -8.013558544917032e-05, "adam_stats/m_t_max": 0.0026751593686640263, "adam_stats/m_t_mean": -1.5920934709479084e-11, "adam_stats/m_t_min": -0.002014511963352561, "adam_stats/v_t_max": 2.6148280085180886e-05, "adam_stats/v_t_mean": 1.730154539601736e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8541666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.12587718665599823, "all_logprobs": -0.02667071484029293, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.0, "all_logprobs/p1": -0.912109375, "all_logprobs/p10": -3.814697265625e-05, "all_logprobs/p25": -1.1920928955078125e-07, "all_logprobs/p5": -0.0067138671875, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.04673554375767708, "clip_ratio": 0.0, "completion_length": 519.28125, "completion_length/correct": 457.0243835449219, "completion_length/correct/max": 948.0, "completion_length/correct/median": 416.0, "completion_length/correct/min": 205.0, "completion_length/correct/p25": 380.0, "completion_length/correct/p75": 590.5, "completion_length/correct/var": 22743.58203125, "completion_length/incorrect": 883.9285888671875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 988.0, "completion_length/incorrect/min": 345.0, "completion_length/incorrect/p25": 877.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 48795.4609375, "completion_length/max": 1024.0, "completion_length/median": 428.0, "completion_length/min": 205.0, "completion_length/p25": 389.0, "completion_length/p75": 638.0, "completion_length/var": 49009.9296875, "curvature_clip_ratio_token_fisher": 0.02966841123998165, "curvature_clip_ratio_token_hessian": 0.02324928343296051, "curvature_clip_ratio_total_fisher": 0.02966841123998165, "curvature_clip_ratio_total_full": 0.02966841123998165, "curvature_clip_ratio_total_hessian": 0.02324928343296051, "epoch": 0.0416, "feature_vector_variance/max_squared_error": 55220.5390625, "feature_vector_variance/metric": 29012.419921875, "generated_tokens/total": 1600460.0, "global_fisher_curvature": 117760.0, "global_fisher_curvature/max": 117760.0, "global_fisher_curvature/median": 117760.0, "global_fisher_curvature/min": 117760.0, "global_fisher_curvature/p25": 117760.0, "global_fisher_curvature/p75": 117760.0, "global_fisher_curvature/p85": 117760.0, "global_fisher_curvature/p90": 117760.0, "global_fisher_curvature/p95": 117760.0, "global_fisher_curvature/p99": 117760.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.150369644165039e-05, "global_fisher_kl_divergence/max": 1.150369644165039e-05, "global_fisher_kl_divergence/median": 1.150369644165039e-05, "global_fisher_kl_divergence/min": 1.150369644165039e-05, "global_fisher_kl_divergence/p25": 1.150369644165039e-05, "global_fisher_kl_divergence/p75": 1.150369644165039e-05, "global_fisher_kl_divergence/p85": 1.150369644165039e-05, "global_fisher_kl_divergence/p90": 1.150369644165039e-05, "global_fisher_kl_divergence/p95": 1.150369644165039e-05, "global_fisher_kl_divergence/p99": 1.150369644165039e-05, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.14453125, "global_full_update_term/max": 0.14453125, "global_full_update_term/median": 0.14453125, "global_full_update_term/min": 0.14453125, "global_full_update_term/p25": 0.14453125, "global_full_update_term/p75": 0.14453125, "global_full_update_term/p85": 0.14453125, "global_full_update_term/p90": 0.14453125, "global_full_update_term/p95": 0.14453125, "global_full_update_term/p99": 0.14453125, "global_full_update_term/var": NaN, "global_hessian_coeff": 34304.0, "global_hessian_coeff/max": 34304.0, "global_hessian_coeff/median": 34304.0, "global_hessian_coeff/min": 34304.0, "global_hessian_coeff/p25": 34304.0, "global_hessian_coeff/p75": 34304.0, "global_hessian_coeff/p99": 34304.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 34304.0, "global_hessian_coeff_abs/max": 34304.0, "global_hessian_coeff_abs/median": 34304.0, "global_hessian_coeff_abs/min": 34304.0, "global_hessian_coeff_abs/p25": 34304.0, "global_hessian_coeff_abs/p75": 34304.0, "global_hessian_coeff_abs/p99": 34304.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.07133883237838745, "learning_rate": 1.3860360721173195e-05, "loss": -0.8542, "masked_global_fisher_curvature": 266.0, "masked_global_fisher_curvature/max": 266.0, "masked_global_fisher_curvature/median": 266.0, "masked_global_fisher_curvature/min": 266.0, "masked_global_fisher_curvature/p25": 266.0, "masked_global_fisher_curvature/p75": 266.0, "masked_global_fisher_curvature/p85": 266.0, "masked_global_fisher_curvature/p90": 266.0, "masked_global_fisher_curvature/p95": 266.0, "masked_global_fisher_curvature/p99": 266.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/max": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/median": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/min": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p25": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p75": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p85": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p90": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p95": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p99": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.004241943359375, "masked_global_full_update_term/max": 0.004241943359375, "masked_global_full_update_term/median": 0.004241943359375, "masked_global_full_update_term/min": 0.004241943359375, "masked_global_full_update_term/p25": 0.004241943359375, "masked_global_full_update_term/p75": 0.004241943359375, "masked_global_full_update_term/p85": 0.004241943359375, "masked_global_full_update_term/p90": 0.004241943359375, "masked_global_full_update_term/p95": 0.004241943359375, "masked_global_full_update_term/p99": 0.004241943359375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2912.0, "masked_global_hessian_coeff/max": -2912.0, "masked_global_hessian_coeff/median": -2912.0, "masked_global_hessian_coeff/min": -2912.0, "masked_global_hessian_coeff/p25": -2912.0, "masked_global_hessian_coeff/p75": -2912.0, "masked_global_hessian_coeff/p99": -2912.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2912.0, "masked_global_hessian_coeff_abs/max": 2912.0, "masked_global_hessian_coeff_abs/median": 2912.0, "masked_global_hessian_coeff_abs/min": 2912.0, "masked_global_hessian_coeff_abs/p25": 2912.0, "masked_global_hessian_coeff_abs/p75": 2912.0, "masked_global_hessian_coeff_abs/p99": 2912.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.2774658203125, "masked_per_sentence_gradient_norm/max": 7.09375, "masked_per_sentence_gradient_norm/median": 1.6875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 1.037109375, "masked_per_sentence_gradient_norm/p75": 3.03125, "masked_per_sentence_gradient_norm/var": 3.6429035663604736, "masked_per_token_gradient_norm": 0.05792699009180069, "masked_per_token_gradient_norm/max": 10.4375, "masked_per_token_gradient_norm/median": 8.076312951743603e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 9.98377799987793e-07, "masked_per_token_gradient_norm/var": 0.27330127358436584, "masked_sentence_fisher_curvature": 302.640625, "masked_sentence_fisher_curvature/max": 840.0, "masked_sentence_fisher_curvature/median": 268.0, "masked_sentence_fisher_curvature/min": 34.25, "masked_sentence_fisher_curvature/p25": 177.5, "masked_sentence_fisher_curvature/p75": 418.0, "masked_sentence_fisher_curvature/p85": 483.0, "masked_sentence_fisher_curvature/p90": 592.0, "masked_sentence_fisher_curvature/p95": 639.0, "masked_sentence_fisher_curvature/p99": 798.2001342773438, "masked_sentence_fisher_curvature/var": 34262.640625, "masked_sentence_fisher_kl_divergence": 2.9646042065678557e-08, "masked_sentence_fisher_kl_divergence/max": 8.242204785346985e-08, "masked_sentence_fisher_kl_divergence/median": 2.6193447411060333e-08, "masked_sentence_fisher_kl_divergence/min": 3.346940502524376e-09, "masked_sentence_fisher_kl_divergence/p25": 1.7345882952213287e-08, "masked_sentence_fisher_kl_divergence/p75": 4.0978193283081055e-08, "masked_sentence_fisher_kl_divergence/p85": 4.738103598356247e-08, "masked_sentence_fisher_kl_divergence/p90": 5.797483026981354e-08, "masked_sentence_fisher_kl_divergence/p95": 6.263144314289093e-08, "masked_sentence_fisher_kl_divergence/p99": 7.799827983490104e-08, "masked_sentence_fisher_kl_divergence/var": 3.2894389510744785e-16, "masked_sentence_full_gradient_variance/max_squared_error": 8.430479049682617, "masked_sentence_full_gradient_variance/metric": 8.430479049682617, "masked_sentence_full_gradient_variance/p75": 8.430479049682617, "masked_sentence_full_gradient_variance/p90": 8.430479049682617, "masked_sentence_full_gradient_variance/p95": 8.430479049682617, "masked_sentence_full_gradient_variance/p99": 8.430479049682617, "masked_sentence_full_update_term": 0.0018005173187702894, "masked_sentence_full_update_term/max": 0.006622314453125, "masked_sentence_full_update_term/median": 0.001556396484375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0005064010620117188, "masked_sentence_full_update_term/p75": 0.002582550048828125, "masked_sentence_full_update_term/p85": 0.003383636474609375, "masked_sentence_full_update_term/p90": 0.00405120849609375, "masked_sentence_full_update_term/p95": 0.004852294921875, "masked_sentence_full_update_term/p99": 0.005955507513135672, "masked_sentence_full_update_term/var": 2.418523763481062e-06, "masked_sentence_hessian_coeff": -11616.833984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -11648.0, "masked_sentence_hessian_coeff/min": -35072.0, "masked_sentence_hessian_coeff/p25": -16992.0, "masked_sentence_hessian_coeff/p75": -4912.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 68174728.0, "masked_sentence_hessian_coeff_abs": 11616.833984375, "masked_sentence_hessian_coeff_abs/max": 35072.0, "masked_sentence_hessian_coeff_abs/median": 11392.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 4912.0, "masked_sentence_hessian_coeff_abs/p75": 16992.0, "masked_sentence_hessian_coeff_abs/p99": 30329.615234375, "masked_sentence_hessian_coeff_abs/var": 68174728.0, "masked_token_fisher_curvature": 384.5079345703125, "masked_token_fisher_curvature/max": 99840.0, "masked_token_fisher_curvature/median": 5.967448757360216e-16, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.064642808932357e-21, "masked_token_fisher_curvature/p75": 7.867129170335829e-11, "masked_token_fisher_curvature/p85": 4.905814421363175e-08, "masked_token_fisher_curvature/p90": 7.063150405883789e-06, "masked_token_fisher_curvature/p95": 0.06989479064941406, "masked_token_fisher_curvature/p99": 8311.5, "masked_token_fisher_curvature/var": 18812804.0, "masked_token_fisher_kl_divergence": 3.764683143003822e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 5.856503360360792e-26, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.018374581717823e-31, "masked_token_fisher_kl_divergence/p75": 7.72917564369549e-21, "masked_token_fisher_kl_divergence/p85": 4.807123733904968e-18, "masked_token_fisher_kl_divergence/p90": 6.904199434387692e-16, "masked_token_fisher_kl_divergence/p95": 6.847966638190428e-12, "masked_token_fisher_kl_divergence/p99": 8.137722034007311e-07, "masked_token_fisher_kl_divergence/var": 1.803100041237321e-13, "masked_token_full_update_term": 2.521421447454486e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 1.666960840196463e-18, "masked_token_full_update_term/min": -1.296401023864746e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.5916157281026244e-12, "masked_token_full_update_term/p85": 1.509761204943061e-10, "masked_token_full_update_term/p90": 3.448803909122944e-09, "masked_token_full_update_term/p95": 5.157635314390063e-07, "masked_token_full_update_term/p99": 0.00083160400390625, "masked_token_full_update_term/var": 5.476357145539623e-08, "masked_token_hessian_coeff": -15371.955078125, "masked_token_hessian_coeff/max": 239.0, "masked_token_hessian_coeff/median": -3.1141098588705063e-09, "masked_token_hessian_coeff/min": -2637824.0, "masked_token_hessian_coeff/p25": -0.00017070770263671875, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.04025077819824219, "masked_token_hessian_coeff/var": 21012531200.0, "masked_token_hessian_coeff_abs": 15371.998046875, "masked_token_hessian_coeff_abs/max": 2637824.0, "masked_token_hessian_coeff_abs/median": 1.7229467630386353e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.0006265640258789062, "masked_token_hessian_coeff_abs/p99": 509088.0, "masked_token_hessian_coeff_abs/var": 21012531200.0, "mean_logprobs": -0.0230712890625, "mean_logprobs/var": 0.002044677734375, "num_completions/total": 2496, "per_sentence_gradient_norm": 59.75260543823242, "per_sentence_gradient_norm/max": 177.0, "per_sentence_gradient_norm/median": 63.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 18.5, "per_sentence_gradient_norm/p75": 85.625, "per_sentence_gradient_norm/var": 2260.541015625, "per_token_feature_norm": 184.8432159423828, "per_token_feature_norm/max": 253.0, "per_token_feature_norm/median": 186.0, "per_token_feature_norm/min": 93.0, "per_token_feature_norm/p25": 179.0, "per_token_feature_norm/p75": 192.0, "per_token_feature_norm/var": 162.51242065429688, "per_token_gradient_norm": 2.7332661151885986, "per_token_gradient_norm/max": 266.0, "per_token_gradient_norm/median": 1.3533281162381172e-09, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 9.513874063560301e-18, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.8328428268432617e-06, "per_token_gradient_norm/var": 371.86688232421875, "per_token_policy_error_norm": 0.01398879662156105, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.012813858687877655, "policy_entropy": 0.028518468141555786, "policy_entropy/max": 3.03125, "policy_entropy/median": 7.799826562404633e-09, "policy_entropy/min": 7.093900933254765e-21, "policy_entropy/p25": 2.262368070660159e-11, "policy_entropy/p75": 1.5050172805786133e-06, "policy_entropy/var": 0.030089590698480606, "policy_loss": -0.8541666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.12587718665599823, "policy_sharpness": 9.487606048583984, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 3.4857702255249023, "reward": 0.8541666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.12587718665599823, "rewards/accuracy_reward": 0.8541666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.12587718665599823, "sentence_fisher_curvature": 273008.21875, "sentence_fisher_curvature/max": 933888.0, "sentence_fisher_curvature/median": 252928.0, "sentence_fisher_curvature/min": 139.0, "sentence_fisher_curvature/p25": 121216.0, "sentence_fisher_curvature/p75": 385024.0, "sentence_fisher_curvature/p85": 499200.0, "sentence_fisher_curvature/p90": 557056.0, "sentence_fisher_curvature/p95": 645120.0, "sentence_fisher_curvature/p99": 809370.0, "sentence_fisher_curvature/var": 43678785536.0, "sentence_fisher_kl_divergence": 2.6737601729109883e-05, "sentence_fisher_kl_divergence/max": 9.1552734375e-05, "sentence_fisher_kl_divergence/median": 2.47955322265625e-05, "sentence_fisher_kl_divergence/min": 1.3620592653751373e-08, "sentence_fisher_kl_divergence/p25": 1.1876225471496582e-05, "sentence_fisher_kl_divergence/p75": 3.7670135498046875e-05, "sentence_fisher_kl_divergence/p85": 4.881620407104492e-05, "sentence_fisher_kl_divergence/p90": 5.459785461425781e-05, "sentence_fisher_kl_divergence/p95": 6.306171417236328e-05, "sentence_fisher_kl_divergence/p99": 7.932190055726096e-05, "sentence_fisher_kl_divergence/var": 4.1893988278474126e-10, "sentence_full_gradient_variance/max_squared_error": 5698.8671875, "sentence_full_gradient_variance/metric": 5698.8671875, "sentence_full_gradient_variance/p75": 5698.8671875, "sentence_full_gradient_variance/p90": 5698.8671875, "sentence_full_gradient_variance/p95": 5698.8671875, "sentence_full_gradient_variance/p99": 5698.8671875, "sentence_full_update_term": 0.05674394220113754, "sentence_full_update_term/max": 0.2060546875, "sentence_full_update_term/median": 0.046142578125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.01788330078125, "sentence_full_update_term/p75": 0.0849609375, "sentence_full_update_term/p85": 0.103271484375, "sentence_full_update_term/p90": 0.12060546875, "sentence_full_update_term/p95": 0.14013671875, "sentence_full_update_term/p99": 0.17636728286743164, "sentence_full_update_term/var": 0.0022637243382632732, "sentence_hessian_coeff": 42654.0859375, "sentence_hessian_coeff/max": 499712.0, "sentence_hessian_coeff/median": 24064.0, "sentence_hessian_coeff/min": -220160.0, "sentence_hessian_coeff/p25": -33056.0, "sentence_hessian_coeff/p75": 95872.0, "sentence_hessian_coeff/p99": 416051.46875, "sentence_hessian_coeff/var": 16160137216.0, "sentence_hessian_coeff_abs": 94584.75, "sentence_hessian_coeff_abs/max": 499712.0, "sentence_hessian_coeff_abs/median": 77312.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 25472.0, "sentence_hessian_coeff_abs/p75": 121856.0, "sentence_hessian_coeff_abs/p99": 416051.46875, "sentence_hessian_coeff_abs/var": 8958213120.0, "step": 26, "token_fisher_curvature": 555583.3125, "token_fisher_curvature/max": 83886080.0, "token_fisher_curvature/median": 1.1102230246251565e-15, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 3.3087224502121107e-21, "token_fisher_curvature/p75": 2.7466739993542433e-10, "token_fisher_curvature/p85": 5.103647708892822e-07, "token_fisher_curvature/p90": 0.0004253387451171875, "token_fisher_curvature/p95": 612.0, "token_fisher_curvature/p99": 20971520.0, "token_fisher_curvature/var": 24919488331776.0, "token_fisher_kl_divergence": 5.4404063121182844e-05, "token_fisher_kl_divergence/max": 0.00823974609375, "token_fisher_kl_divergence/median": 1.0905213153775267e-25, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.2355623065705562e-31, "token_fisher_kl_divergence/p75": 2.6893296075324036e-20, "token_fisher_kl_divergence/p85": 4.9873299934333204e-17, "token_fisher_kl_divergence/p90": 4.1744385725905886e-14, "token_fisher_kl_divergence/p95": 6.007030606269836e-08, "token_fisher_kl_divergence/p99": 0.0020599365234375, "token_fisher_kl_divergence/var": 2.389543851677445e-07, "token_full_update_term": 0.0014623124152421951, "token_full_update_term/max": 0.1357421875, "token_full_update_term/median": 6.830473686658678e-18, "token_full_update_term/min": -1.296401023864746e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.050093593832571e-12, "token_full_update_term/p85": 6.439222488552332e-10, "token_full_update_term/p90": 3.585591912269592e-08, "token_full_update_term/p95": 5.042552947998047e-05, "token_full_update_term/p99": 0.064453125, "token_full_update_term/var": 0.00011321314377710223, "token_hessian_coeff": 217800.609375, "token_hessian_coeff/max": 83361792.0, "token_hessian_coeff/median": -3.754394128918648e-09, "token_hessian_coeff/min": -10747904.0, "token_hessian_coeff/p25": -0.000255584716796875, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 3686400.0, "token_hessian_coeff/var": 17067975639040.0, "token_hessian_coeff_abs": 466662.875, "token_hessian_coeff_abs/max": 83361792.0, "token_hessian_coeff_abs/median": 2.8312206268310547e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 2.7538735181131813e-17, "token_hessian_coeff_abs/p75": 0.0013275146484375, "token_hessian_coeff_abs/p99": 9830400.0, "token_hessian_coeff_abs/var": 16897635516416.0 }, { "accuracy_reward": 0.7395833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.19462722539901733, "adam_stats/lm_head/lr_effective_max": 7.240094419103116e-05, "adam_stats/lm_head/lr_effective_mean": -2.715602395730876e-11, "adam_stats/lm_head/lr_effective_min": -7.64549695304595e-05, "adam_stats/lm_head/lr_effective_std": 1.536059244244825e-06, "adam_stats/lr_effective_max": 7.983062823768705e-05, "adam_stats/lr_effective_mean": -2.884493199850624e-10, "adam_stats/lr_effective_min": -7.862566417315975e-05, "adam_stats/m_t_max": 0.00234050489962101, "adam_stats/m_t_mean": -1.1328219612360968e-11, "adam_stats/m_t_min": -0.00184434128459543, "adam_stats/v_t_max": 2.6122583221876994e-05, "adam_stats/v_t_mean": 1.7289517257115805e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7395833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.19462722539901733, "all_logprobs": -0.013867865316569805, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.75, "all_logprobs/p1": -0.38671875, "all_logprobs/p10": -1.4781951904296875e-05, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0012527462095022202, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.021649224683642387, "clip_ratio": 0.0, "completion_length": 628.3021240234375, "completion_length/correct": 570.3943481445312, "completion_length/correct/max": 1014.0, "completion_length/correct/median": 514.0, "completion_length/correct/min": 258.0, "completion_length/correct/p25": 451.0, "completion_length/correct/p75": 680.0, "completion_length/correct/var": 39995.296875, "completion_length/incorrect": 792.760009765625, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 730.0, "completion_length/incorrect/min": 354.0, "completion_length/incorrect/p25": 624.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 56382.35546875, "completion_length/max": 1024.0, "completion_length/median": 577.0, "completion_length/min": 258.0, "completion_length/p25": 465.25, "completion_length/p75": 836.25, "completion_length/var": 53337.81640625, "curvature_clip_ratio_token_fisher": 0.0162972304970026, "curvature_clip_ratio_token_hessian": 0.011787721887230873, "curvature_clip_ratio_total_fisher": 0.0162972304970026, "curvature_clip_ratio_total_full": 0.0162972304970026, "curvature_clip_ratio_total_hessian": 0.011787721887230873, "epoch": 0.0432, "feature_vector_variance/max_squared_error": 66598.03125, "feature_vector_variance/metric": 28803.830078125, "generated_tokens/total": 1660777.0, "global_fisher_curvature": 93184.0, "global_fisher_curvature/max": 93184.0, "global_fisher_curvature/median": 93184.0, "global_fisher_curvature/min": 93184.0, "global_fisher_curvature/p25": 93184.0, "global_fisher_curvature/p75": 93184.0, "global_fisher_curvature/p85": 93184.0, "global_fisher_curvature/p90": 93184.0, "global_fisher_curvature/p95": 93184.0, "global_fisher_curvature/p99": 93184.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 8.940696716308594e-06, "global_fisher_kl_divergence/max": 8.940696716308594e-06, "global_fisher_kl_divergence/median": 8.940696716308594e-06, "global_fisher_kl_divergence/min": 8.940696716308594e-06, "global_fisher_kl_divergence/p25": 8.940696716308594e-06, "global_fisher_kl_divergence/p75": 8.940696716308594e-06, "global_fisher_kl_divergence/p85": 8.940696716308594e-06, "global_fisher_kl_divergence/p90": 8.940696716308594e-06, "global_fisher_kl_divergence/p95": 8.940696716308594e-06, "global_fisher_kl_divergence/p99": 8.940696716308594e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.12255859375, "global_full_update_term/max": 0.12255859375, "global_full_update_term/median": 0.12255859375, "global_full_update_term/min": 0.12255859375, "global_full_update_term/p25": 0.12255859375, "global_full_update_term/p75": 0.12255859375, "global_full_update_term/p85": 0.12255859375, "global_full_update_term/p90": 0.12255859375, "global_full_update_term/p95": 0.12255859375, "global_full_update_term/p99": 0.12255859375, "global_full_update_term/var": NaN, "global_hessian_coeff": 25216.0, "global_hessian_coeff/max": 25216.0, "global_hessian_coeff/median": 25216.0, "global_hessian_coeff/min": 25216.0, "global_hessian_coeff/p25": 25216.0, "global_hessian_coeff/p75": 25216.0, "global_hessian_coeff/p99": 25216.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 25216.0, "global_hessian_coeff_abs/max": 25216.0, "global_hessian_coeff_abs/median": 25216.0, "global_hessian_coeff_abs/min": 25216.0, "global_hessian_coeff_abs/p25": 25216.0, "global_hessian_coeff_abs/p75": 25216.0, "global_hessian_coeff_abs/p99": 25216.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04182526469230652, "learning_rate": 1.3717781794162813e-05, "loss": -0.7396, "masked_global_fisher_curvature": 252.0, "masked_global_fisher_curvature/max": 252.0, "masked_global_fisher_curvature/median": 252.0, "masked_global_fisher_curvature/min": 252.0, "masked_global_fisher_curvature/p25": 252.0, "masked_global_fisher_curvature/p75": 252.0, "masked_global_fisher_curvature/p85": 252.0, "masked_global_fisher_curvature/p90": 252.0, "masked_global_fisher_curvature/p95": 252.0, "masked_global_fisher_curvature/p99": 252.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/max": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/median": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/min": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/p25": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/p75": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/p85": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/p90": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/p95": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/p99": 2.421438694000244e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00543212890625, "masked_global_full_update_term/max": 0.00543212890625, "masked_global_full_update_term/median": 0.00543212890625, "masked_global_full_update_term/min": 0.00543212890625, "masked_global_full_update_term/p25": 0.00543212890625, "masked_global_full_update_term/p75": 0.00543212890625, "masked_global_full_update_term/p85": 0.00543212890625, "masked_global_full_update_term/p90": 0.00543212890625, "masked_global_full_update_term/p95": 0.00543212890625, "masked_global_full_update_term/p99": 0.00543212890625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2320.0, "masked_global_hessian_coeff/max": -2320.0, "masked_global_hessian_coeff/median": -2320.0, "masked_global_hessian_coeff/min": -2320.0, "masked_global_hessian_coeff/p25": -2320.0, "masked_global_hessian_coeff/p75": -2320.0, "masked_global_hessian_coeff/p99": -2320.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2320.0, "masked_global_hessian_coeff_abs/max": 2320.0, "masked_global_hessian_coeff_abs/median": 2320.0, "masked_global_hessian_coeff_abs/min": 2320.0, "masked_global_hessian_coeff_abs/p25": 2320.0, "masked_global_hessian_coeff_abs/p75": 2320.0, "masked_global_hessian_coeff_abs/p99": 2320.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.2226054668426514, "masked_per_sentence_gradient_norm/max": 8.6875, "masked_per_sentence_gradient_norm/median": 1.3515625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.46875, "masked_per_sentence_gradient_norm/var": 5.800905227661133, "masked_per_token_gradient_norm": 0.04178384318947792, "masked_per_token_gradient_norm/max": 10.0, "masked_per_token_gradient_norm/median": 1.2823875294998288e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.757886588573456e-07, "masked_per_token_gradient_norm/var": 0.18805885314941406, "masked_sentence_fisher_curvature": 246.8739471435547, "masked_sentence_fisher_curvature/max": 792.0, "masked_sentence_fisher_curvature/median": 217.0, "masked_sentence_fisher_curvature/min": 0.55859375, "masked_sentence_fisher_curvature/p25": 130.0, "masked_sentence_fisher_curvature/p75": 354.0, "masked_sentence_fisher_curvature/p85": 382.0, "masked_sentence_fisher_curvature/p90": 484.0, "masked_sentence_fisher_curvature/p95": 599.0, "masked_sentence_fisher_curvature/p99": 704.6002807617188, "masked_sentence_fisher_curvature/var": 30763.599609375, "masked_sentence_fisher_kl_divergence": 2.3704179952233062e-08, "masked_sentence_fisher_kl_divergence/max": 7.59027898311615e-08, "masked_sentence_fisher_kl_divergence/median": 2.0838342607021332e-08, "masked_sentence_fisher_kl_divergence/min": 5.3660187404602766e-11, "masked_sentence_fisher_kl_divergence/p25": 1.2514647096395493e-08, "masked_sentence_fisher_kl_divergence/p75": 3.3993273973464966e-08, "masked_sentence_fisher_kl_divergence/p85": 3.67872416973114e-08, "masked_sentence_fisher_kl_divergence/p90": 4.644971340894699e-08, "masked_sentence_fisher_kl_divergence/p95": 5.75091689825058e-08, "masked_sentence_fisher_kl_divergence/p99": 6.749763059588076e-08, "masked_sentence_fisher_kl_divergence/var": 2.833736548940645e-16, "masked_sentence_full_gradient_variance/max_squared_error": 10.397250175476074, "masked_sentence_full_gradient_variance/metric": 10.397250175476074, "masked_sentence_full_gradient_variance/p75": 10.397250175476074, "masked_sentence_full_gradient_variance/p90": 10.397250175476074, "masked_sentence_full_gradient_variance/p95": 10.397250175476074, "masked_sentence_full_gradient_variance/p99": 10.397250175476074, "masked_sentence_full_update_term": 0.001649151323363185, "masked_sentence_full_update_term/max": 0.006103515625, "masked_sentence_full_update_term/median": 0.0010986328125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.002651214599609375, "masked_sentence_full_update_term/p85": 0.003662109375, "masked_sentence_full_update_term/p90": 0.004241943359375, "masked_sentence_full_update_term/p95": 0.00537872314453125, "masked_sentence_full_update_term/p99": 0.005929565988481045, "masked_sentence_full_update_term/var": 3.0715821139892796e-06, "masked_sentence_hessian_coeff": -7990.4169921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -7648.0, "masked_sentence_hessian_coeff/min": -33792.0, "masked_sentence_hessian_coeff/p25": -12192.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 58966000.0, "masked_sentence_hessian_coeff_abs": 7990.4169921875, "masked_sentence_hessian_coeff_abs/max": 33792.0, "masked_sentence_hessian_coeff_abs/median": 7584.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 12192.0, "masked_sentence_hessian_coeff_abs/p99": 28441.6171875, "masked_sentence_hessian_coeff_abs/var": 58966000.0, "masked_token_fisher_curvature": 314.8551330566406, "masked_token_fisher_curvature/max": 103936.0, "masked_token_fisher_curvature/median": 4.753142324176451e-16, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 3.5998900258307764e-21, "masked_token_fisher_curvature/p75": 6.184563972055912e-11, "masked_token_fisher_curvature/p85": 4.330649971961975e-08, "masked_token_fisher_curvature/p90": 6.389571353793144e-06, "masked_token_fisher_curvature/p95": 0.028564453125, "masked_token_fisher_curvature/p99": 5024.5, "masked_token_fisher_curvature/var": 14715046.0, "masked_token_fisher_kl_divergence": 3.023620820385986e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 4.564033653246686e-26, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.4512664603419266e-31, "masked_token_fisher_kl_divergence/p75": 5.929230630780102e-21, "masked_token_fisher_kl_divergence/p85": 4.1470733097570545e-18, "masked_token_fisher_kl_divergence/p90": 6.130485659047724e-16, "masked_token_fisher_kl_divergence/p95": 2.7426949600339867e-12, "masked_token_fisher_kl_divergence/p99": 4.818430170416832e-07, "masked_token_fisher_kl_divergence/var": 1.3566300589417518e-13, "masked_token_full_update_term": 1.7870735973701812e-05, "masked_token_full_update_term/max": 0.004180908203125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.1920928955078125e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.588240815588506e-13, "masked_token_full_update_term/p85": 5.206857167650014e-11, "masked_token_full_update_term/p90": 1.2078089639544487e-09, "masked_token_full_update_term/p95": 1.6391277313232422e-07, "masked_token_full_update_term/p99": 0.00043487548828125, "masked_token_full_update_term/var": 3.657242331200905e-08, "masked_token_hessian_coeff": -11151.0810546875, "masked_token_hessian_coeff/max": 90.0, "masked_token_hessian_coeff/median": -2.580691216280684e-11, "masked_token_hessian_coeff/min": -2801664.0, "masked_token_hessian_coeff/p25": -4.2438507080078125e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.016317367553710938, "masked_token_hessian_coeff/var": 14739973120.0, "masked_token_hessian_coeff_abs": 11151.1044921875, "masked_token_hessian_coeff_abs/max": 2801664.0, "masked_token_hessian_coeff_abs/median": 2.9453076422214508e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.000186920166015625, "masked_token_hessian_coeff_abs/p99": 275808.0, "masked_token_hessian_coeff_abs/var": 14739973120.0, "mean_logprobs": -0.013671875, "mean_logprobs/var": 7.963180541992188e-05, "num_completions/total": 2592, "per_sentence_gradient_norm": 60.46354293823242, "per_sentence_gradient_norm/max": 207.0, "per_sentence_gradient_norm/median": 47.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 89.375, "per_sentence_gradient_norm/var": 3067.76318359375, "per_token_feature_norm": 185.32469177246094, "per_token_feature_norm/max": 278.0, "per_token_feature_norm/median": 186.0, "per_token_feature_norm/min": 85.5, "per_token_feature_norm/p25": 179.0, "per_token_feature_norm/p75": 192.0, "per_token_feature_norm/var": 163.1877899169922, "per_token_gradient_norm": 1.252681851387024, "per_token_gradient_norm/max": 282.0, "per_token_gradient_norm/median": 1.7007550923153758e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 5.252659320831299e-07, "per_token_gradient_norm/var": 159.62657165527344, "per_token_policy_error_norm": 0.007865475490689278, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.007353511173278093, "policy_entropy": 0.015066894702613354, "policy_entropy/max": 2.296875, "policy_entropy/median": 5.966285243630409e-09, "policy_entropy/min": 5.971582278142817e-20, "policy_entropy/p25": 2.7853275241795927e-11, "policy_entropy/p75": 1.1622905731201172e-06, "policy_entropy/var": 0.009603165090084076, "policy_loss": -0.7395833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.19462722539901733, "policy_sharpness": 9.604570388793945, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.556589365005493, "reward": 0.7395833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.19462722539901733, "rewards/accuracy_reward": 0.7395833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.19462722539901733, "sentence_fisher_curvature": 202235.21875, "sentence_fisher_curvature/max": 700416.0, "sentence_fisher_curvature/median": 200704.0, "sentence_fisher_curvature/min": 43.75, "sentence_fisher_curvature/p25": 4544.0, "sentence_fisher_curvature/p75": 294912.0, "sentence_fisher_curvature/p85": 398848.0, "sentence_fisher_curvature/p90": 419840.0, "sentence_fisher_curvature/p95": 478208.0, "sentence_fisher_curvature/p99": 669286.5, "sentence_fisher_curvature/var": 29356177408.0, "sentence_fisher_kl_divergence": 1.942012931976933e-05, "sentence_fisher_kl_divergence/max": 6.723403930664062e-05, "sentence_fisher_kl_divergence/median": 1.9311904907226562e-05, "sentence_fisher_kl_divergence/min": 4.190951585769653e-09, "sentence_fisher_kl_divergence/p25": 4.367902874946594e-07, "sentence_fisher_kl_divergence/p75": 2.8342008590698242e-05, "sentence_fisher_kl_divergence/p85": 3.832578659057617e-05, "sentence_fisher_kl_divergence/p90": 4.029273986816406e-05, "sentence_fisher_kl_divergence/p95": 4.589557647705078e-05, "sentence_fisher_kl_divergence/p99": 6.40630823909305e-05, "sentence_fisher_kl_divergence/var": 2.704749757498348e-10, "sentence_full_gradient_variance/max_squared_error": 6572.6474609375, "sentence_full_gradient_variance/metric": 6572.6474609375, "sentence_full_gradient_variance/p75": 6572.6474609375, "sentence_full_gradient_variance/p90": 6572.6474609375, "sentence_full_gradient_variance/p95": 6572.6474609375, "sentence_full_gradient_variance/p99": 6572.6474609375, "sentence_full_update_term": 0.051278434693813324, "sentence_full_update_term/max": 0.1787109375, "sentence_full_update_term/median": 0.044921875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0770263671875, "sentence_full_update_term/p85": 0.102294921875, "sentence_full_update_term/p90": 0.113037109375, "sentence_full_update_term/p95": 0.138427734375, "sentence_full_update_term/p99": 0.1573731154203415, "sentence_full_update_term/var": 0.0019194434862583876, "sentence_hessian_coeff": 14556.0, "sentence_hessian_coeff/max": 354304.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -219136.0, "sentence_hessian_coeff/p25": -25888.0, "sentence_hessian_coeff/p75": 55488.0, "sentence_hessian_coeff/p99": 344576.03125, "sentence_hessian_coeff/var": 10589580288.0, "sentence_hessian_coeff_abs": 67550.671875, "sentence_hessian_coeff_abs/max": 354304.0, "sentence_hessian_coeff_abs/median": 42496.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 98048.0, "sentence_hessian_coeff_abs/p99": 344576.03125, "sentence_hessian_coeff_abs/var": 6192562176.0, "step": 27, "token_fisher_curvature": 189053.578125, "token_fisher_curvature/max": 83886080.0, "token_fisher_curvature/median": 6.800116025829084e-16, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 4.473392752686774e-21, "token_fisher_curvature/p75": 1.227817847393453e-10, "token_fisher_curvature/p85": 1.4156103134155273e-07, "token_fisher_curvature/p90": 4.220008850097656e-05, "token_fisher_curvature/p95": 2.46875, "token_fisher_curvature/p99": 1294336.0, "token_fisher_curvature/var": 7250943934464.0, "token_fisher_kl_divergence": 1.8160890249419026e-05, "token_fisher_kl_divergence/max": 0.008056640625, "token_fisher_kl_divergence/median": 6.54312789226516e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.2832681963172125e-31, "token_fisher_kl_divergence/p75": 1.1805521702356811e-20, "token_fisher_kl_divergence/p85": 1.360673726469308e-17, "token_fisher_kl_divergence/p90": 4.052314039881821e-15, "token_fisher_kl_divergence/p95": 2.3646862246096134e-10, "token_fisher_kl_divergence/p99": 0.0001239776611328125, "token_fisher_kl_divergence/var": 6.692376075534412e-08, "token_full_update_term": 0.0005925309960730374, "token_full_update_term/max": 0.134765625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.1920928955078125e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.968558980384842e-13, "token_full_update_term/p85": 1.1004885891452432e-10, "token_full_update_term/p90": 3.585569174902048e-09, "token_full_update_term/p95": 1.2740492820739746e-06, "token_full_update_term/p99": 0.014892578125, "token_full_update_term/var": 3.738148370757699e-05, "token_hessian_coeff": 1329.3111572265625, "token_hessian_coeff/max": 82837504.0, "token_hessian_coeff/median": -4.3655745685100555e-11, "token_hessian_coeff/min": -11010048.0, "token_hessian_coeff/p25": -6.151199340820312e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0439453125, "token_hessian_coeff/var": 4834686664704.0, "token_hessian_coeff_abs": 182862.96875, "token_hessian_coeff_abs/max": 82837504.0, "token_hessian_coeff_abs/median": 3.864988684654236e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.0002803802490234375, "token_hessian_coeff_abs/p99": 5636096.0, "token_hessian_coeff_abs/var": 4801249148928.0 }, { "accuracy_reward": 0.5104166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.25252193212509155, "adam_stats/lm_head/lr_effective_max": 7.158552034525201e-05, "adam_stats/lm_head/lr_effective_mean": -2.098981578824155e-11, "adam_stats/lm_head/lr_effective_min": -7.63103598728776e-05, "adam_stats/lm_head/lr_effective_std": 1.4497759366349783e-06, "adam_stats/lr_effective_max": 7.561968232039362e-05, "adam_stats/lr_effective_mean": -2.8594682177640607e-10, "adam_stats/lr_effective_min": -7.63103598728776e-05, "adam_stats/m_t_max": 0.002912315307185054, "adam_stats/m_t_mean": 7.874027818655094e-12, "adam_stats/m_t_min": -0.002242811257019639, "adam_stats/v_t_max": 2.6105773940798827e-05, "adam_stats/v_t_mean": 1.7431743982304626e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5104166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.25252193212509155, "all_logprobs": -0.012818166986107826, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.34375, "all_logprobs/p1": -0.3125, "all_logprobs/p10": -1.1324882507324219e-05, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.000911712646484375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.020434856414794922, "clip_ratio": 0.0, "completion_length": 570.3229370117188, "completion_length/correct": 453.16326904296875, "completion_length/correct/max": 826.0, "completion_length/correct/median": 411.0, "completion_length/correct/min": 281.0, "completion_length/correct/p25": 371.0, "completion_length/correct/p75": 497.0, "completion_length/correct/var": 14105.5556640625, "completion_length/incorrect": 692.4680786132812, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 617.0, "completion_length/incorrect/min": 358.0, "completion_length/incorrect/p25": 463.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 65279.1640625, "completion_length/max": 1024.0, "completion_length/median": 464.0, "completion_length/min": 281.0, "completion_length/p25": 372.75, "completion_length/p75": 660.25, "completion_length/var": 53197.00390625, "curvature_clip_ratio_token_fisher": 0.010191594250500202, "curvature_clip_ratio_token_hessian": 0.006611751392483711, "curvature_clip_ratio_total_fisher": 0.010191594250500202, "curvature_clip_ratio_total_full": 0.010191594250500202, "curvature_clip_ratio_total_hessian": 0.006611751392483711, "epoch": 0.0448, "feature_vector_variance/max_squared_error": 65428.73046875, "feature_vector_variance/metric": 28425.55859375, "generated_tokens/total": 1715528.0, "global_fisher_curvature": 64512.0, "global_fisher_curvature/max": 64512.0, "global_fisher_curvature/median": 64512.0, "global_fisher_curvature/min": 64512.0, "global_fisher_curvature/p25": 64512.0, "global_fisher_curvature/p75": 64512.0, "global_fisher_curvature/p85": 64512.0, "global_fisher_curvature/p90": 64512.0, "global_fisher_curvature/p95": 64512.0, "global_fisher_curvature/p99": 64512.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.079673767089844e-06, "global_fisher_kl_divergence/max": 6.079673767089844e-06, "global_fisher_kl_divergence/median": 6.079673767089844e-06, "global_fisher_kl_divergence/min": 6.079673767089844e-06, "global_fisher_kl_divergence/p25": 6.079673767089844e-06, "global_fisher_kl_divergence/p75": 6.079673767089844e-06, "global_fisher_kl_divergence/p85": 6.079673767089844e-06, "global_fisher_kl_divergence/p90": 6.079673767089844e-06, "global_fisher_kl_divergence/p95": 6.079673767089844e-06, "global_fisher_kl_divergence/p99": 6.079673767089844e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.115234375, "global_full_update_term/max": 0.115234375, "global_full_update_term/median": 0.115234375, "global_full_update_term/min": 0.115234375, "global_full_update_term/p25": 0.115234375, "global_full_update_term/p75": 0.115234375, "global_full_update_term/p85": 0.115234375, "global_full_update_term/p90": 0.115234375, "global_full_update_term/p95": 0.115234375, "global_full_update_term/p99": 0.115234375, "global_full_update_term/var": NaN, "global_hessian_coeff": 19840.0, "global_hessian_coeff/max": 19840.0, "global_hessian_coeff/median": 19840.0, "global_hessian_coeff/min": 19840.0, "global_hessian_coeff/p25": 19840.0, "global_hessian_coeff/p75": 19840.0, "global_hessian_coeff/p99": 19840.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 19840.0, "global_hessian_coeff_abs/max": 19840.0, "global_hessian_coeff_abs/median": 19840.0, "global_hessian_coeff_abs/min": 19840.0, "global_hessian_coeff_abs/p25": 19840.0, "global_hessian_coeff_abs/p75": 19840.0, "global_hessian_coeff_abs/p99": 19840.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.17544032633304596, "learning_rate": 1.3567627457812107e-05, "loss": -0.5104, "masked_global_fisher_curvature": 170.0, "masked_global_fisher_curvature/max": 170.0, "masked_global_fisher_curvature/median": 170.0, "masked_global_fisher_curvature/min": 170.0, "masked_global_fisher_curvature/p25": 170.0, "masked_global_fisher_curvature/p75": 170.0, "masked_global_fisher_curvature/p85": 170.0, "masked_global_fisher_curvature/p90": 170.0, "masked_global_fisher_curvature/p95": 170.0, "masked_global_fisher_curvature/p99": 170.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/max": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/median": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/min": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/p25": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/p75": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/p85": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/p90": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/p95": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/p99": 1.594889909029007e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0032196044921875, "masked_global_full_update_term/max": 0.0032196044921875, "masked_global_full_update_term/median": 0.0032196044921875, "masked_global_full_update_term/min": 0.0032196044921875, "masked_global_full_update_term/p25": 0.0032196044921875, "masked_global_full_update_term/p75": 0.0032196044921875, "masked_global_full_update_term/p85": 0.0032196044921875, "masked_global_full_update_term/p90": 0.0032196044921875, "masked_global_full_update_term/p95": 0.0032196044921875, "masked_global_full_update_term/p99": 0.0032196044921875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1376.0, "masked_global_hessian_coeff/max": -1376.0, "masked_global_hessian_coeff/median": -1376.0, "masked_global_hessian_coeff/min": -1376.0, "masked_global_hessian_coeff/p25": -1376.0, "masked_global_hessian_coeff/p75": -1376.0, "masked_global_hessian_coeff/p99": -1376.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1376.0, "masked_global_hessian_coeff_abs/max": 1376.0, "masked_global_hessian_coeff_abs/median": 1376.0, "masked_global_hessian_coeff_abs/min": 1376.0, "masked_global_hessian_coeff_abs/p25": 1376.0, "masked_global_hessian_coeff_abs/p75": 1376.0, "masked_global_hessian_coeff_abs/p99": 1376.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.3433024883270264, "masked_per_sentence_gradient_norm/max": 7.03125, "masked_per_sentence_gradient_norm/median": 0.3046875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.296875, "masked_per_sentence_gradient_norm/var": 3.1285009384155273, "masked_per_token_gradient_norm": 0.02144358865916729, "masked_per_token_gradient_norm/max": 12.1875, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.3760443329811096e-09, "masked_per_token_gradient_norm/var": 0.09491202235221863, "masked_sentence_fisher_curvature": 219.4479217529297, "masked_sentence_fisher_curvature/max": 788.0, "masked_sentence_fisher_curvature/median": 167.0, "masked_sentence_fisher_curvature/min": 12.8125, "masked_sentence_fisher_curvature/p25": 120.875, "masked_sentence_fisher_curvature/p75": 281.0, "masked_sentence_fisher_curvature/p85": 362.5, "masked_sentence_fisher_curvature/p90": 401.0, "masked_sentence_fisher_curvature/p95": 460.0, "masked_sentence_fisher_curvature/p99": 753.8001098632812, "masked_sentence_fisher_curvature/var": 22169.009765625, "masked_sentence_fisher_kl_divergence": 2.0653564192230078e-08, "masked_sentence_fisher_kl_divergence/max": 7.404014468193054e-08, "masked_sentence_fisher_kl_divergence/median": 1.57160684466362e-08, "masked_sentence_fisher_kl_divergence/min": 1.2078089639544487e-09, "masked_sentence_fisher_kl_divergence/p25": 1.1365045793354511e-08, "masked_sentence_fisher_kl_divergence/p75": 2.6426278054714203e-08, "masked_sentence_fisher_kl_divergence/p85": 3.41096892952919e-08, "masked_sentence_fisher_kl_divergence/p90": 3.771856427192688e-08, "masked_sentence_fisher_kl_divergence/p95": 4.330649971961975e-08, "masked_sentence_fisher_kl_divergence/p99": 7.094350706893238e-08, "masked_sentence_fisher_kl_divergence/var": 1.9621272054195572e-16, "masked_sentence_full_gradient_variance/max_squared_error": 4.697248458862305, "masked_sentence_full_gradient_variance/metric": 4.697248458862305, "masked_sentence_full_gradient_variance/p75": 4.697248458862305, "masked_sentence_full_gradient_variance/p90": 4.697248458862305, "masked_sentence_full_gradient_variance/p95": 4.697248458862305, "masked_sentence_full_gradient_variance/p99": 4.697248458862305, "masked_sentence_full_update_term": 0.0009108086815103889, "masked_sentence_full_update_term/max": 0.00469970703125, "masked_sentence_full_update_term/median": 0.0001850128173828125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00170135498046875, "masked_sentence_full_update_term/p85": 0.002544403076171875, "masked_sentence_full_update_term/p90": 0.002685546875, "masked_sentence_full_update_term/p95": 0.003063201904296875, "masked_sentence_full_update_term/p99": 0.004177858121693134, "masked_sentence_full_update_term/var": 1.4129819874142413e-06, "masked_sentence_hessian_coeff": -4824.75, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -2080.0, "masked_sentence_hessian_coeff/min": -28032.0, "masked_sentence_hessian_coeff/p25": -8384.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 36868396.0, "masked_sentence_hessian_coeff_abs": 4824.75, "masked_sentence_hessian_coeff_abs/max": 28032.0, "masked_sentence_hessian_coeff_abs/median": 1960.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 8384.0, "masked_sentence_hessian_coeff_abs/p99": 20979.22265625, "masked_sentence_hessian_coeff_abs/var": 36868396.0, "masked_token_fisher_curvature": 255.18148803710938, "masked_token_fisher_curvature/max": 104960.0, "masked_token_fisher_curvature/median": 1.1726730697603216e-15, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.711620769102046e-21, "masked_token_fisher_curvature/p75": 1.014086592476815e-10, "masked_token_fisher_curvature/p85": 4.6100467443466187e-08, "masked_token_fisher_curvature/p90": 4.500150680541992e-06, "masked_token_fisher_curvature/p95": 0.018529891967773438, "masked_token_fisher_curvature/p99": 3696.0, "masked_token_fisher_curvature/var": 11339179.0, "masked_token_fisher_kl_divergence": 2.4017735356096637e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 1.106677186716453e-25, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.437342591868191e-31, "masked_token_fisher_kl_divergence/p75": 9.529120656610879e-21, "masked_token_fisher_kl_divergence/p85": 4.336808689942018e-18, "masked_token_fisher_kl_divergence/p90": 4.2327252813834093e-16, "masked_token_fisher_kl_divergence/p95": 1.742217481393027e-12, "masked_token_fisher_kl_divergence/p99": 3.4831464290618896e-07, "masked_token_fisher_kl_divergence/var": 1.0047350321520931e-13, "masked_token_full_update_term": 9.089435479836538e-06, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.2993812561035156e-05, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.3527187142935446e-17, "masked_token_full_update_term/p85": 9.89097692638552e-13, "masked_token_full_update_term/p90": 6.275513442233205e-11, "masked_token_full_update_term/p95": 6.210711944731884e-09, "masked_token_full_update_term/p99": 2.468610182404518e-05, "masked_token_full_update_term/var": 1.7831487397756973e-08, "masked_token_hessian_coeff": -5767.55908203125, "masked_token_hessian_coeff/max": 53.75, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2785280.0, "masked_token_hessian_coeff/p25": -1.909211277961731e-08, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.003220856189727783, "masked_token_hessian_coeff/var": 7479168000.0, "masked_token_hessian_coeff_abs": 5767.5791015625, "masked_token_hessian_coeff_abs/max": 2785280.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.0505318641662598e-06, "masked_token_hessian_coeff_abs/p99": 6368.0, "masked_token_hessian_coeff_abs/var": 7479166976.0, "mean_logprobs": -0.012451171875, "mean_logprobs/var": 7.82012939453125e-05, "num_completions/total": 2688, "per_sentence_gradient_norm": 39.106773376464844, "per_sentence_gradient_norm/max": 219.0, "per_sentence_gradient_norm/median": 19.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 64.5, "per_sentence_gradient_norm/var": 2443.11767578125, "per_token_feature_norm": 184.06576538085938, "per_token_feature_norm/max": 276.0, "per_token_feature_norm/median": 185.0, "per_token_feature_norm/min": 93.5, "per_token_feature_norm/p25": 178.0, "per_token_feature_norm/p75": 191.0, "per_token_feature_norm/var": 163.72225952148438, "per_token_gradient_norm": 0.744531512260437, "per_token_gradient_norm/max": 276.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 5.122274160385132e-09, "per_token_gradient_norm/var": 101.37564086914062, "per_token_policy_error_norm": 0.00729485135525465, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006951694376766682, "policy_entropy": 0.013915340416133404, "policy_entropy/max": 2.703125, "policy_entropy/median": 8.032657206058502e-09, "policy_entropy/min": 7.369186641112413e-20, "policy_entropy/p25": 3.1604940886609256e-11, "policy_entropy/p75": 1.1399388313293457e-06, "policy_entropy/var": 0.008431218564510345, "policy_loss": -0.5104166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.25252193212509155, "policy_sharpness": 9.626957893371582, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.421879768371582, "reward": 0.5104166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.25252193212509155, "rewards/accuracy_reward": 0.5104166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.25252193212509155, "sentence_fisher_curvature": 150592.40625, "sentence_fisher_curvature/max": 1335296.0, "sentence_fisher_curvature/median": 79872.0, "sentence_fisher_curvature/min": 80.0, "sentence_fisher_curvature/p25": 736.0, "sentence_fisher_curvature/p75": 264704.0, "sentence_fisher_curvature/p85": 346112.0, "sentence_fisher_curvature/p90": 390144.0, "sentence_fisher_curvature/p95": 484352.0, "sentence_fisher_curvature/p99": 751617.875, "sentence_fisher_curvature/var": 45278842880.0, "sentence_fisher_kl_divergence": 1.4170769645716064e-05, "sentence_fisher_kl_divergence/max": 0.000125885009765625, "sentence_fisher_kl_divergence/median": 7.510185241699219e-06, "sentence_fisher_kl_divergence/min": 7.508788257837296e-09, "sentence_fisher_kl_divergence/p25": 6.938353180885315e-08, "sentence_fisher_kl_divergence/p75": 2.4944543838500977e-05, "sentence_fisher_kl_divergence/p85": 3.254413604736328e-05, "sentence_fisher_kl_divergence/p90": 3.6716461181640625e-05, "sentence_fisher_kl_divergence/p95": 4.553794860839844e-05, "sentence_fisher_kl_divergence/p99": 7.06197606632486e-05, "sentence_fisher_kl_divergence/var": 4.013998300411714e-10, "sentence_full_gradient_variance/max_squared_error": 3883.00830078125, "sentence_full_gradient_variance/metric": 3883.00830078125, "sentence_full_gradient_variance/p75": 3883.00830078125, "sentence_full_gradient_variance/p90": 3883.00830078125, "sentence_full_gradient_variance/p95": 3883.00830078125, "sentence_full_gradient_variance/p99": 3883.00830078125, "sentence_full_update_term": 0.0317433699965477, "sentence_full_update_term/max": 0.1611328125, "sentence_full_update_term/median": 0.0224609375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0511474609375, "sentence_full_update_term/p85": 0.068359375, "sentence_full_update_term/p90": 0.085205078125, "sentence_full_update_term/p95": 0.101318359375, "sentence_full_update_term/p99": 0.13515633344650269, "sentence_full_update_term/var": 0.0014468070585280657, "sentence_hessian_coeff": 30909.0, "sentence_hessian_coeff/max": 577536.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -164864.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 32736.0, "sentence_hessian_coeff/p99": 390759.0, "sentence_hessian_coeff/var": 11250699264.0, "sentence_hessian_coeff_abs": 56800.3359375, "sentence_hessian_coeff_abs/max": 577536.0, "sentence_hessian_coeff_abs/median": 7328.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 89984.0, "sentence_hessian_coeff_abs/p99": 390759.0, "sentence_hessian_coeff_abs/var": 8955883520.0, "step": 28, "token_fisher_curvature": 124912.40625, "token_fisher_curvature/max": 85983232.0, "token_fisher_curvature/median": 1.457167719820518e-15, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.3336605897419224e-21, "token_fisher_curvature/p75": 1.5370460459962487e-10, "token_fisher_curvature/p85": 8.754432201385498e-08, "token_fisher_curvature/p90": 1.3887882232666016e-05, "token_fisher_curvature/p95": 0.2333984375, "token_fisher_curvature/p99": 116736.0, "token_fisher_curvature/var": 5306585710592.0, "token_fisher_kl_divergence": 1.1754538718378171e-05, "token_fisher_kl_divergence/max": 0.00811767578125, "token_fisher_kl_divergence/median": 1.3732490638087374e-25, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.022825294961911e-31, "token_fisher_kl_divergence/p75": 1.4505439221729893e-20, "token_fisher_kl_divergence/p85": 8.239936510889834e-18, "token_fisher_kl_divergence/p90": 1.304512053934559e-15, "token_fisher_kl_divergence/p95": 2.1941559680271894e-11, "token_fisher_kl_divergence/p99": 1.0967254638671875e-05, "token_fisher_kl_divergence/var": 4.699524680518152e-08, "token_full_update_term": 0.0003567113890312612, "token_full_update_term/max": 0.134765625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.2993812561035156e-05, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.9764879717084654e-17, "token_full_update_term/p85": 1.9326762412674725e-12, "token_full_update_term/p90": 1.255102688446641e-10, "token_full_update_term/p95": 1.979060471057892e-08, "token_full_update_term/p99": 0.0023345947265625, "token_full_update_term/var": 2.4363602278754115e-05, "token_hessian_coeff": 16880.08203125, "token_hessian_coeff/max": 87556096.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -11075584.0, "token_hessian_coeff/p25": -2.922024577856064e-08, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.00933837890625, "token_hessian_coeff/var": 3624397963264.0, "token_hessian_coeff_abs": 114376.2734375, "token_hessian_coeff_abs/max": 87556096.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.5348196029663086e-06, "token_hessian_coeff_abs/p99": 1363968.0, "token_hessian_coeff_abs/var": 3611600355328.0 }, { "accuracy_reward": 0.7604166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.18410086631774902, "adam_stats/lm_head/lr_effective_max": 6.474166002590209e-05, "adam_stats/lm_head/lr_effective_mean": -5.1076979451503135e-11, "adam_stats/lm_head/lr_effective_min": -6.699474761262536e-05, "adam_stats/lm_head/lr_effective_std": 1.4335358855532832e-06, "adam_stats/lr_effective_max": 7.395201828330755e-05, "adam_stats/lr_effective_mean": -1.4585367635877589e-10, "adam_stats/lr_effective_min": -7.291070505743846e-05, "adam_stats/m_t_max": 0.0025051168631762266, "adam_stats/m_t_mean": 6.659266887920623e-12, "adam_stats/m_t_min": -0.0019536803010851145, "adam_stats/v_t_max": 2.6079738745465875e-05, "adam_stats/v_t_mean": 1.741741625059523e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7604166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.18410086631774902, "all_logprobs": -0.00967944785952568, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.875, "all_logprobs/p1": -0.20650392770767212, "all_logprobs/p10": -6.9141387939453125e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0003057480789721012, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.013331043533980846, "clip_ratio": 0.0, "completion_length": 629.7396240234375, "completion_length/correct": 560.5205688476562, "completion_length/correct/max": 1000.0, "completion_length/correct/median": 576.0, "completion_length/correct/min": 265.0, "completion_length/correct/p25": 323.0, "completion_length/correct/p75": 757.0, "completion_length/correct/var": 46664.86328125, "completion_length/incorrect": 849.434814453125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 483.0, "completion_length/incorrect/p25": 569.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 53253.7109375, "completion_length/max": 1024.0, "completion_length/median": 582.0, "completion_length/min": 265.0, "completion_length/p25": 410.5, "completion_length/p75": 812.25, "completion_length/var": 63066.6640625, "curvature_clip_ratio_token_fisher": 0.013811926357448101, "curvature_clip_ratio_token_hessian": 0.009842031635344028, "curvature_clip_ratio_total_fisher": 0.013811926357448101, "curvature_clip_ratio_total_full": 0.013811926357448101, "curvature_clip_ratio_total_hessian": 0.009842031635344028, "epoch": 0.0464, "feature_vector_variance/max_squared_error": 68645.21875, "feature_vector_variance/metric": 28733.001953125, "generated_tokens/total": 1775983.0, "global_fisher_curvature": 73728.0, "global_fisher_curvature/max": 73728.0, "global_fisher_curvature/median": 73728.0, "global_fisher_curvature/min": 73728.0, "global_fisher_curvature/p25": 73728.0, "global_fisher_curvature/p75": 73728.0, "global_fisher_curvature/p85": 73728.0, "global_fisher_curvature/p90": 73728.0, "global_fisher_curvature/p95": 73728.0, "global_fisher_curvature/p99": 73728.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.794929504394531e-06, "global_fisher_kl_divergence/max": 6.794929504394531e-06, "global_fisher_kl_divergence/median": 6.794929504394531e-06, "global_fisher_kl_divergence/min": 6.794929504394531e-06, "global_fisher_kl_divergence/p25": 6.794929504394531e-06, "global_fisher_kl_divergence/p75": 6.794929504394531e-06, "global_fisher_kl_divergence/p85": 6.794929504394531e-06, "global_fisher_kl_divergence/p90": 6.794929504394531e-06, "global_fisher_kl_divergence/p95": 6.794929504394531e-06, "global_fisher_kl_divergence/p99": 6.794929504394531e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.08935546875, "global_full_update_term/max": 0.08935546875, "global_full_update_term/median": 0.08935546875, "global_full_update_term/min": 0.08935546875, "global_full_update_term/p25": 0.08935546875, "global_full_update_term/p75": 0.08935546875, "global_full_update_term/p85": 0.08935546875, "global_full_update_term/p90": 0.08935546875, "global_full_update_term/p95": 0.08935546875, "global_full_update_term/p99": 0.08935546875, "global_full_update_term/var": NaN, "global_hessian_coeff": 16768.0, "global_hessian_coeff/max": 16768.0, "global_hessian_coeff/median": 16768.0, "global_hessian_coeff/min": 16768.0, "global_hessian_coeff/p25": 16768.0, "global_hessian_coeff/p75": 16768.0, "global_hessian_coeff/p99": 16768.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 16768.0, "global_hessian_coeff_abs/max": 16768.0, "global_hessian_coeff_abs/median": 16768.0, "global_hessian_coeff_abs/min": 16768.0, "global_hessian_coeff_abs/p25": 16768.0, "global_hessian_coeff_abs/p75": 16768.0, "global_hessian_coeff_abs/p99": 16768.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.046630725264549255, "learning_rate": 1.3410080652050414e-05, "loss": -0.7604, "masked_global_fisher_curvature": 676.0, "masked_global_fisher_curvature/max": 676.0, "masked_global_fisher_curvature/median": 676.0, "masked_global_fisher_curvature/min": 676.0, "masked_global_fisher_curvature/p25": 676.0, "masked_global_fisher_curvature/p75": 676.0, "masked_global_fisher_curvature/p85": 676.0, "masked_global_fisher_curvature/p90": 676.0, "masked_global_fisher_curvature/p95": 676.0, "masked_global_fisher_curvature/p99": 676.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/max": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/median": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/min": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/p25": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/p75": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/p85": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/p90": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/p95": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/p99": 6.239861249923706e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00188446044921875, "masked_global_full_update_term/max": 0.00188446044921875, "masked_global_full_update_term/median": 0.00188446044921875, "masked_global_full_update_term/min": 0.00188446044921875, "masked_global_full_update_term/p25": 0.00188446044921875, "masked_global_full_update_term/p75": 0.00188446044921875, "masked_global_full_update_term/p85": 0.00188446044921875, "masked_global_full_update_term/p90": 0.00188446044921875, "masked_global_full_update_term/p95": 0.00188446044921875, "masked_global_full_update_term/p99": 0.00188446044921875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1240.0, "masked_global_hessian_coeff/max": -1240.0, "masked_global_hessian_coeff/median": -1240.0, "masked_global_hessian_coeff/min": -1240.0, "masked_global_hessian_coeff/p25": -1240.0, "masked_global_hessian_coeff/p75": -1240.0, "masked_global_hessian_coeff/p99": -1240.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1240.0, "masked_global_hessian_coeff_abs/max": 1240.0, "masked_global_hessian_coeff_abs/median": 1240.0, "masked_global_hessian_coeff_abs/min": 1240.0, "masked_global_hessian_coeff_abs/p25": 1240.0, "masked_global_hessian_coeff_abs/p75": 1240.0, "masked_global_hessian_coeff_abs/p99": 1240.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.6387532949447632, "masked_per_sentence_gradient_norm/max": 5.96875, "masked_per_sentence_gradient_norm/median": 1.6875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.16064453125, "masked_per_sentence_gradient_norm/p75": 2.3671875, "masked_per_sentence_gradient_norm/var": 1.9191949367523193, "masked_per_token_gradient_norm": 0.03634347394108772, "masked_per_token_gradient_norm/max": 10.1875, "masked_per_token_gradient_norm/median": 4.5702108764089644e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.7066638469696045e-07, "masked_per_token_gradient_norm/var": 0.164062038064003, "masked_sentence_fisher_curvature": 195.50033569335938, "masked_sentence_fisher_curvature/max": 808.0, "masked_sentence_fisher_curvature/median": 165.0, "masked_sentence_fisher_curvature/min": 1.28125, "masked_sentence_fisher_curvature/p25": 88.125, "masked_sentence_fisher_curvature/p75": 252.0, "masked_sentence_fisher_curvature/p85": 309.0, "masked_sentence_fisher_curvature/p90": 350.0, "masked_sentence_fisher_curvature/p95": 458.5, "masked_sentence_fisher_curvature/p99": 644.6005249023438, "masked_sentence_fisher_curvature/var": 19316.119140625, "masked_sentence_fisher_kl_divergence": 1.798921900331152e-08, "masked_sentence_fisher_kl_divergence/max": 7.450580596923828e-08, "masked_sentence_fisher_kl_divergence/median": 1.5133991837501526e-08, "masked_sentence_fisher_kl_divergence/min": 1.1823431123048067e-10, "masked_sentence_fisher_kl_divergence/p25": 8.13452061265707e-09, "masked_sentence_fisher_kl_divergence/p75": 2.3166649043560028e-08, "masked_sentence_fisher_kl_divergence/p85": 2.843444235622883e-08, "masked_sentence_fisher_kl_divergence/p90": 3.2247044146060944e-08, "masked_sentence_fisher_kl_divergence/p95": 4.21423465013504e-08, "masked_sentence_fisher_kl_divergence/p99": 5.9243806305175895e-08, "masked_sentence_fisher_kl_divergence/var": 1.6368418044299782e-16, "masked_sentence_full_gradient_variance/max_squared_error": 4.439208030700684, "masked_sentence_full_gradient_variance/metric": 4.439208030700684, "masked_sentence_full_gradient_variance/p75": 4.439208030700684, "masked_sentence_full_gradient_variance/p90": 4.439208030700684, "masked_sentence_full_gradient_variance/p95": 4.439208030700684, "masked_sentence_full_gradient_variance/p99": 4.439208030700684, "masked_sentence_full_update_term": 0.001136119244620204, "masked_sentence_full_update_term/max": 0.003936767578125, "masked_sentence_full_update_term/median": 0.00104522705078125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 5.4717063903808594e-05, "masked_sentence_full_update_term/p75": 0.00182342529296875, "masked_sentence_full_update_term/p85": 0.002147674560546875, "masked_sentence_full_update_term/p90": 0.0023345947265625, "masked_sentence_full_update_term/p95": 0.00299835205078125, "masked_sentence_full_update_term/p99": 0.0034729018807411194, "masked_sentence_full_update_term/var": 9.285158171223884e-07, "masked_sentence_hessian_coeff": -6393.1669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -6464.0, "masked_sentence_hessian_coeff/min": -23040.0, "masked_sentence_hessian_coeff/p25": -9280.0, "masked_sentence_hessian_coeff/p75": -1792.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 25900208.0, "masked_sentence_hessian_coeff_abs": 6393.1669921875, "masked_sentence_hessian_coeff_abs/max": 23040.0, "masked_sentence_hessian_coeff_abs/median": 6464.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 1792.0, "masked_sentence_hessian_coeff_abs/p75": 9280.0, "masked_sentence_hessian_coeff_abs/p99": 19148.8125, "masked_sentence_hessian_coeff_abs/var": 25900208.0, "masked_token_fisher_curvature": 268.545654296875, "masked_token_fisher_curvature/max": 107520.0, "masked_token_fisher_curvature/median": 2.220446049250313e-16, "masked_token_fisher_curvature/min": 9.183549615799121e-41, "masked_token_fisher_curvature/p25": 7.858215819253763e-22, "masked_token_fisher_curvature/p75": 2.1373125491663814e-11, "masked_token_fisher_curvature/p85": 1.5366822481155396e-08, "masked_token_fisher_curvature/p90": 2.0563602447509766e-06, "masked_token_fisher_curvature/p95": 0.004150390625, "masked_token_fisher_curvature/p99": 2752.0, "masked_token_fisher_curvature/var": 14141301.0, "masked_token_fisher_kl_divergence": 2.472311955159512e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 2.039678756539448e-26, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 7.231866941174071e-32, "masked_token_fisher_kl_divergence/p75": 1.971998580326418e-21, "masked_token_fisher_kl_divergence/p85": 1.4162390878091902e-18, "masked_token_fisher_kl_divergence/p90": 1.8908485888147197e-16, "masked_token_fisher_kl_divergence/p95": 3.8191672047105385e-13, "masked_token_fisher_kl_divergence/p99": 2.5331974029541016e-07, "masked_token_fisher_kl_divergence/var": 1.1991296356480413e-13, "masked_token_full_update_term": 1.5450352293555625e-05, "masked_token_full_update_term/max": 0.00408935546875, "masked_token_full_update_term/median": 5.426304818347862e-22, "masked_token_full_update_term/min": -6.034970283508301e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.948752353404416e-13, "masked_token_full_update_term/p85": 4.482725302068502e-11, "masked_token_full_update_term/p90": 1.2885550404462265e-09, "masked_token_full_update_term/p95": 1.0663643479347229e-07, "masked_token_full_update_term/p99": 0.0003814697265625, "masked_token_full_update_term/var": 3.1269898670416296e-08, "masked_token_hessian_coeff": -9892.2080078125, "masked_token_hessian_coeff/max": 338.0, "masked_token_hessian_coeff/median": -2.5920599000528455e-11, "masked_token_hessian_coeff/min": -2719744.0, "masked_token_hessian_coeff/p25": -3.314018249511719e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.016845703125, "masked_token_hessian_coeff/var": 13111180288.0, "masked_token_hessian_coeff_abs": 9892.248046875, "masked_token_hessian_coeff_abs/max": 2719744.0, "masked_token_hessian_coeff_abs/median": 9.371433407068253e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.00014781951904296875, "masked_token_hessian_coeff_abs/p99": 232256.0, "masked_token_hessian_coeff_abs/var": 13111179264.0, "mean_logprobs": -0.00946044921875, "mean_logprobs/var": 3.2901763916015625e-05, "num_completions/total": 2784, "per_sentence_gradient_norm": 57.6845703125, "per_sentence_gradient_norm/max": 225.0, "per_sentence_gradient_norm/median": 47.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 7.53125, "per_sentence_gradient_norm/p75": 99.25, "per_sentence_gradient_norm/var": 3168.097412109375, "per_token_feature_norm": 184.9296417236328, "per_token_feature_norm/max": 276.0, "per_token_feature_norm/median": 185.0, "per_token_feature_norm/min": 97.0, "per_token_feature_norm/p25": 179.0, "per_token_feature_norm/p75": 192.0, "per_token_feature_norm/var": 161.70956420898438, "per_token_gradient_norm": 1.0055452585220337, "per_token_gradient_norm/max": 354.0, "per_token_gradient_norm/median": 5.866240826435387e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 5.066394805908203e-07, "per_token_gradient_norm/var": 125.99801635742188, "per_token_policy_error_norm": 0.0057493047788739204, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005337280686944723, "policy_entropy": 0.010862224735319614, "policy_entropy/max": 2.546875, "policy_entropy/median": 4.016328603029251e-09, "policy_entropy/min": 1.0672615135404184e-19, "policy_entropy/p25": 1.2818190953112207e-11, "policy_entropy/p75": 7.040798664093018e-07, "policy_entropy/var": 0.005872008856385946, "policy_loss": -0.7604166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.18410086631774902, "policy_sharpness": 9.686188697814941, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.979123830795288, "reward": 0.7604166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.18410086631774902, "rewards/accuracy_reward": 0.7604166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.18410086631774902, "sentence_fisher_curvature": 155988.234375, "sentence_fisher_curvature/max": 589824.0, "sentence_fisher_curvature/median": 140288.0, "sentence_fisher_curvature/min": 66.0, "sentence_fisher_curvature/p25": 3568.0, "sentence_fisher_curvature/p75": 228352.0, "sentence_fisher_curvature/p85": 323072.0, "sentence_fisher_curvature/p90": 358400.0, "sentence_fisher_curvature/p95": 468992.0, "sentence_fisher_curvature/p99": 550912.125, "sentence_fisher_curvature/var": 22349750272.0, "sentence_fisher_kl_divergence": 1.4361862668010872e-05, "sentence_fisher_kl_divergence/max": 5.435943603515625e-05, "sentence_fisher_kl_divergence/median": 1.2934207916259766e-05, "sentence_fisher_kl_divergence/min": 6.082700565457344e-09, "sentence_fisher_kl_divergence/p25": 3.2782554626464844e-07, "sentence_fisher_kl_divergence/p75": 2.09808349609375e-05, "sentence_fisher_kl_divergence/p85": 2.9742717742919922e-05, "sentence_fisher_kl_divergence/p90": 3.3020973205566406e-05, "sentence_fisher_kl_divergence/p95": 4.32133674621582e-05, "sentence_fisher_kl_divergence/p99": 5.073548527434468e-05, "sentence_fisher_kl_divergence/var": 1.8958315473049936e-10, "sentence_full_gradient_variance/max_squared_error": 6373.6064453125, "sentence_full_gradient_variance/metric": 6373.6064453125, "sentence_full_gradient_variance/p75": 6373.6064453125, "sentence_full_gradient_variance/p90": 6373.6064453125, "sentence_full_gradient_variance/p95": 6373.6064453125, "sentence_full_gradient_variance/p99": 6373.6064453125, "sentence_full_update_term": 0.04708798974752426, "sentence_full_update_term/max": 0.1865234375, "sentence_full_update_term/median": 0.03369140625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.006103515625, "sentence_full_update_term/p75": 0.078857421875, "sentence_full_update_term/p85": 0.0909423828125, "sentence_full_update_term/p90": 0.09912109375, "sentence_full_update_term/p95": 0.1204833984375, "sentence_full_update_term/p99": 0.1772461235523224, "sentence_full_update_term/var": 0.0019908458925783634, "sentence_hessian_coeff": 9966.6669921875, "sentence_hessian_coeff/max": 344064.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -146432.0, "sentence_hessian_coeff/p25": -41728.0, "sentence_hessian_coeff/p75": 33472.0, "sentence_hessian_coeff/p99": 285696.1875, "sentence_hessian_coeff/var": 6754444288.0, "sentence_hessian_coeff_abs": 54358.66796875, "sentence_hessian_coeff_abs/max": 344064.0, "sentence_hessian_coeff_abs/median": 41472.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 7328.0, "sentence_hessian_coeff_abs/p75": 74496.0, "sentence_hessian_coeff_abs/p99": 285696.1875, "sentence_hessian_coeff_abs/var": 3868856064.0, "step": 29, "token_fisher_curvature": 151088.4375, "token_fisher_curvature/max": 90701824.0, "token_fisher_curvature/median": 3.0531133177191805e-16, "token_fisher_curvature/min": 9.183549615799121e-41, "token_fisher_curvature/p25": 9.529120656610879e-22, "token_fisher_curvature/p75": 3.7061909097246826e-11, "token_fisher_curvature/p85": 4.1676685214042664e-08, "token_fisher_curvature/p90": 9.298324584960938e-06, "token_fisher_curvature/p95": 0.10297966003417969, "token_fisher_curvature/p99": 548864.0, "token_fisher_curvature/var": 5771727208448.0, "token_fisher_kl_divergence": 1.3907402717450168e-05, "token_fisher_kl_divergence/max": 0.00836181640625, "token_fisher_kl_divergence/median": 2.8070826451384484e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 8.782240546405795e-32, "token_fisher_kl_divergence/p75": 3.414601568618898e-21, "token_fisher_kl_divergence/p85": 3.848917712323541e-18, "token_fisher_kl_divergence/p90": 8.569533971325427e-16, "token_fisher_kl_divergence/p95": 9.470202400052585e-12, "token_fisher_kl_divergence/p99": 5.054473876953125e-05, "token_fisher_kl_divergence/var": 4.890281957159459e-08, "token_full_update_term": 0.0004708764608949423, "token_full_update_term/max": 0.13671875, "token_full_update_term/median": 6.299807545203859e-21, "token_full_update_term/min": -6.034970283508301e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.867217739956686e-13, "token_full_update_term/p85": 8.36735125631094e-11, "token_full_update_term/p90": 3.259629011154175e-09, "token_full_update_term/p95": 5.103647708892822e-07, "token_full_update_term/p99": 0.009183406829833984, "token_full_update_term/var": 2.8598878998309374e-05, "token_hessian_coeff": -5281.44873046875, "token_hessian_coeff/max": 90177536.0, "token_hessian_coeff/median": -3.3651303965598345e-11, "token_hessian_coeff/min": -11141120.0, "token_hessian_coeff/p25": -4.5418739318847656e-05, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.039775848388671875, "token_hessian_coeff/var": 3861365915648.0, "token_hessian_coeff_abs": 152288.953125, "token_hessian_coeff_abs/max": 90177536.0, "token_hessian_coeff_abs/median": 1.2456439435482025e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.00020313262939453125, "token_hessian_coeff_abs/p99": 4653056.0, "token_hessian_coeff_abs/var": 3838201823232.0 }, { "accuracy_reward": 0.5, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 0.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.25263160467147827, "adam_stats/lm_head/lr_effective_max": 6.301431858446449e-05, "adam_stats/lm_head/lr_effective_mean": -8.804375978277434e-11, "adam_stats/lm_head/lr_effective_min": -6.495702837128192e-05, "adam_stats/lm_head/lr_effective_std": 1.3738297184318071e-06, "adam_stats/lr_effective_max": 6.761062104487792e-05, "adam_stats/lr_effective_mean": -1.5798186370208356e-10, "adam_stats/lr_effective_min": -6.82617537677288e-05, "adam_stats/m_t_max": 0.002282070927321911, "adam_stats/m_t_mean": 7.506116388167339e-12, "adam_stats/m_t_min": -0.0018006553873419762, "adam_stats/v_t_max": 2.6053736291942187e-05, "adam_stats/v_t_mean": 1.7406801911326597e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5, "advantages/max": 1.0, "advantages/median": 0.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.25263160467147827, "all_logprobs": -0.011238460429012775, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.25, "all_logprobs/p1": -0.2381349802017212, "all_logprobs/p10": -6.794929504394531e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0004596710205078125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01761896163225174, "clip_ratio": 0.0, "completion_length": 601.4166870117188, "completion_length/correct": 489.5833435058594, "completion_length/correct/max": 998.0, "completion_length/correct/median": 453.0, "completion_length/correct/min": 278.0, "completion_length/correct/p25": 365.75, "completion_length/correct/p75": 507.0, "completion_length/correct/var": 35217.18359375, "completion_length/incorrect": 713.25, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 623.0, "completion_length/incorrect/min": 398.0, "completion_length/incorrect/p25": 514.75, "completion_length/incorrect/p75": 858.25, "completion_length/incorrect/var": 48002.7890625, "completion_length/max": 1024.0, "completion_length/median": 511.0, "completion_length/min": 278.0, "completion_length/p25": 441.5, "completion_length/p75": 811.5, "completion_length/var": 53810.33203125, "curvature_clip_ratio_token_fisher": 0.007239850237965584, "curvature_clip_ratio_token_hessian": 0.004832340404391289, "curvature_clip_ratio_total_fisher": 0.007239850237965584, "curvature_clip_ratio_total_full": 0.007239850237965584, "curvature_clip_ratio_total_hessian": 0.004832340404391289, "epoch": 0.048, "feature_vector_variance/max_squared_error": 61278.48828125, "feature_vector_variance/metric": 28923.669921875, "generated_tokens/total": 1833719.0, "global_fisher_curvature": 51712.0, "global_fisher_curvature/max": 51712.0, "global_fisher_curvature/median": 51712.0, "global_fisher_curvature/min": 51712.0, "global_fisher_curvature/p25": 51712.0, "global_fisher_curvature/p75": 51712.0, "global_fisher_curvature/p85": 51712.0, "global_fisher_curvature/p90": 51712.0, "global_fisher_curvature/p95": 51712.0, "global_fisher_curvature/p99": 51712.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.649162292480469e-06, "global_fisher_kl_divergence/max": 4.649162292480469e-06, "global_fisher_kl_divergence/median": 4.649162292480469e-06, "global_fisher_kl_divergence/min": 4.649162292480469e-06, "global_fisher_kl_divergence/p25": 4.649162292480469e-06, "global_fisher_kl_divergence/p75": 4.649162292480469e-06, "global_fisher_kl_divergence/p85": 4.649162292480469e-06, "global_fisher_kl_divergence/p90": 4.649162292480469e-06, "global_fisher_kl_divergence/p95": 4.649162292480469e-06, "global_fisher_kl_divergence/p99": 4.649162292480469e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.033935546875, "global_full_update_term/max": 0.033935546875, "global_full_update_term/median": 0.033935546875, "global_full_update_term/min": 0.033935546875, "global_full_update_term/p25": 0.033935546875, "global_full_update_term/p75": 0.033935546875, "global_full_update_term/p85": 0.033935546875, "global_full_update_term/p90": 0.033935546875, "global_full_update_term/p95": 0.033935546875, "global_full_update_term/p99": 0.033935546875, "global_full_update_term/var": NaN, "global_hessian_coeff": 14848.0, "global_hessian_coeff/max": 14848.0, "global_hessian_coeff/median": 14848.0, "global_hessian_coeff/min": 14848.0, "global_hessian_coeff/p25": 14848.0, "global_hessian_coeff/p75": 14848.0, "global_hessian_coeff/p99": 14848.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 14848.0, "global_hessian_coeff_abs/max": 14848.0, "global_hessian_coeff_abs/median": 14848.0, "global_hessian_coeff_abs/min": 14848.0, "global_hessian_coeff_abs/p25": 14848.0, "global_hessian_coeff_abs/p75": 14848.0, "global_hessian_coeff_abs/p99": 14848.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04562995210289955, "learning_rate": 1.3245333323392335e-05, "loss": -0.5, "masked_global_fisher_curvature": 192.0, "masked_global_fisher_curvature/max": 192.0, "masked_global_fisher_curvature/median": 192.0, "masked_global_fisher_curvature/min": 192.0, "masked_global_fisher_curvature/p25": 192.0, "masked_global_fisher_curvature/p75": 192.0, "masked_global_fisher_curvature/p85": 192.0, "masked_global_fisher_curvature/p90": 192.0, "masked_global_fisher_curvature/p95": 192.0, "masked_global_fisher_curvature/p99": 192.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/max": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/median": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/min": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/p25": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/p75": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/p85": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/p90": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/p95": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/p99": 1.7229467630386353e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00147247314453125, "masked_global_full_update_term/max": 0.00147247314453125, "masked_global_full_update_term/median": 0.00147247314453125, "masked_global_full_update_term/min": 0.00147247314453125, "masked_global_full_update_term/p25": 0.00147247314453125, "masked_global_full_update_term/p75": 0.00147247314453125, "masked_global_full_update_term/p85": 0.00147247314453125, "masked_global_full_update_term/p90": 0.00147247314453125, "masked_global_full_update_term/p95": 0.00147247314453125, "masked_global_full_update_term/p99": 0.00147247314453125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1088.0, "masked_global_hessian_coeff/max": -1088.0, "masked_global_hessian_coeff/median": -1088.0, "masked_global_hessian_coeff/min": -1088.0, "masked_global_hessian_coeff/p25": -1088.0, "masked_global_hessian_coeff/p75": -1088.0, "masked_global_hessian_coeff/p99": -1088.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1088.0, "masked_global_hessian_coeff_abs/max": 1088.0, "masked_global_hessian_coeff_abs/median": 1088.0, "masked_global_hessian_coeff_abs/min": 1088.0, "masked_global_hessian_coeff_abs/p25": 1088.0, "masked_global_hessian_coeff_abs/p75": 1088.0, "masked_global_hessian_coeff_abs/p99": 1088.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.4698283672332764, "masked_per_sentence_gradient_norm/max": 6.21875, "masked_per_sentence_gradient_norm/median": 0.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.64453125, "masked_per_sentence_gradient_norm/var": 3.4697179794311523, "masked_per_token_gradient_norm": 0.019865702837705612, "masked_per_token_gradient_norm/max": 9.8125, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.4260876923799515e-09, "masked_per_token_gradient_norm/var": 0.08602757006883621, "masked_sentence_fisher_curvature": 236.85336303710938, "masked_sentence_fisher_curvature/max": 720.0, "masked_sentence_fisher_curvature/median": 196.0, "masked_sentence_fisher_curvature/min": 3.984375, "masked_sentence_fisher_curvature/p25": 106.5, "masked_sentence_fisher_curvature/p75": 311.0, "masked_sentence_fisher_curvature/p85": 410.5, "masked_sentence_fisher_curvature/p90": 580.0, "masked_sentence_fisher_curvature/p95": 616.0, "masked_sentence_fisher_curvature/p99": 720.0, "masked_sentence_fisher_curvature/var": 33869.5546875, "masked_sentence_fisher_kl_divergence": 2.1294928487236575e-08, "masked_sentence_fisher_kl_divergence/max": 6.472691893577576e-08, "masked_sentence_fisher_kl_divergence/median": 1.7578713595867157e-08, "masked_sentence_fisher_kl_divergence/min": 3.583409124985337e-10, "masked_sentence_fisher_kl_divergence/p25": 9.589712135493755e-09, "masked_sentence_fisher_kl_divergence/p75": 2.796878106892109e-08, "masked_sentence_fisher_kl_divergence/p85": 3.6903657019138336e-08, "masked_sentence_fisher_kl_divergence/p90": 5.21540641784668e-08, "masked_sentence_fisher_kl_divergence/p95": 5.5355485528707504e-08, "masked_sentence_fisher_kl_divergence/p99": 6.472691893577576e-08, "masked_sentence_fisher_kl_divergence/var": 2.739018677800709e-16, "masked_sentence_full_gradient_variance/max_squared_error": 5.33029842376709, "masked_sentence_full_gradient_variance/metric": 5.33029842376709, "masked_sentence_full_gradient_variance/p75": 5.33029842376709, "masked_sentence_full_gradient_variance/p90": 5.33029842376709, "masked_sentence_full_gradient_variance/p95": 5.33029842376709, "masked_sentence_full_gradient_variance/p99": 5.33029842376709, "masked_sentence_full_update_term": 0.0009221633663401008, "masked_sentence_full_update_term/max": 0.00518798828125, "masked_sentence_full_update_term/median": 0.0, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00140380859375, "masked_sentence_full_update_term/p85": 0.002277374267578125, "masked_sentence_full_update_term/p90": 0.00311279296875, "masked_sentence_full_update_term/p95": 0.0036773681640625, "masked_sentence_full_update_term/p99": 0.0040863072499632835, "masked_sentence_full_update_term/var": 1.548999648548488e-06, "masked_sentence_hessian_coeff": -4696.25, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -720.0, "masked_sentence_hessian_coeff/min": -23040.0, "masked_sentence_hessian_coeff/p25": -8272.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 36635280.0, "masked_sentence_hessian_coeff_abs": 4696.25, "masked_sentence_hessian_coeff_abs/max": 23040.0, "masked_sentence_hessian_coeff_abs/median": 0.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 8272.0, "masked_sentence_hessian_coeff_abs/p99": 22796.80078125, "masked_sentence_hessian_coeff_abs/var": 36635280.0, "masked_token_fisher_curvature": 258.26470947265625, "masked_token_fisher_curvature/max": 110080.0, "masked_token_fisher_curvature/median": 1.2750217548429532e-16, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 5.525566491854225e-22, "masked_token_fisher_curvature/p75": 1.5006662579253316e-11, "masked_token_fisher_curvature/p85": 8.672941476106644e-09, "masked_token_fisher_curvature/p90": 1.330627128481865e-06, "masked_token_fisher_curvature/p95": 0.005950927734375, "masked_token_fisher_curvature/p99": 2570.625, "masked_token_fisher_curvature/var": 13206662.0, "masked_token_fisher_kl_divergence": 2.3216479405618884e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 1.146057123105086e-26, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.968899256519069e-32, "masked_token_fisher_kl_divergence/p75": 1.3499587596865412e-21, "masked_token_fisher_kl_divergence/p85": 7.792703114739563e-19, "masked_token_fisher_kl_divergence/p90": 1.193435541363419e-16, "masked_token_fisher_kl_divergence/p95": 5.364597654988756e-13, "masked_token_fisher_kl_divergence/p99": 2.3143002181313932e-07, "masked_token_fisher_kl_divergence/var": 1.0672944461814077e-13, "masked_token_full_update_term": 8.300541594508104e-06, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -9.5367431640625e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.1358782797964437e-17, "masked_token_full_update_term/p85": 2.877698079828406e-13, "masked_token_full_update_term/p90": 1.3602452497707418e-11, "masked_token_full_update_term/p95": 1.4479155652225018e-09, "masked_token_full_update_term/p99": 2.193450927734375e-05, "masked_token_full_update_term/var": 1.608775157535547e-08, "masked_token_hessian_coeff": -5459.7626953125, "masked_token_hessian_coeff/max": 59.25, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2670592.0, "masked_token_hessian_coeff/p25": -1.2034433893859386e-08, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0013134777545928955, "masked_token_hessian_coeff/var": 7378333696.0, "masked_token_hessian_coeff_abs": 5459.78125, "masked_token_hessian_coeff_abs/max": 2670592.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 3.0547380447387695e-07, "masked_token_hessian_coeff_abs/p99": 6720.0, "masked_token_hessian_coeff_abs/var": 7378333696.0, "mean_logprobs": -0.01165771484375, "mean_logprobs/var": 8.344650268554688e-05, "num_completions/total": 2880, "per_sentence_gradient_norm": 21.52278709411621, "per_sentence_gradient_norm/max": 185.0, "per_sentence_gradient_norm/median": 0.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 28.625, "per_sentence_gradient_norm/var": 1101.5738525390625, "per_token_feature_norm": 185.48452758789062, "per_token_feature_norm/max": 260.0, "per_token_feature_norm/median": 186.0, "per_token_feature_norm/min": 102.0, "per_token_feature_norm/p25": 179.0, "per_token_feature_norm/p75": 193.0, "per_token_feature_norm/var": 169.6968536376953, "per_token_gradient_norm": 0.4883517324924469, "per_token_gradient_norm/max": 292.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.862645149230957e-09, "per_token_gradient_norm/var": 66.28086853027344, "per_token_policy_error_norm": 0.0066509973257780075, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006792886648327112, "policy_entropy": 0.01119737233966589, "policy_entropy/max": 2.515625, "policy_entropy/median": 3.2887328416109085e-09, "policy_entropy/min": 4.171637265227429e-20, "policy_entropy/p25": 9.379164112033322e-12, "policy_entropy/p75": 5.550682544708252e-07, "policy_entropy/var": 0.006081982981413603, "policy_loss": -0.5, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.25263160467147827, "policy_sharpness": 9.667791366577148, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.0738682746887207, "reward": 0.5, "reward/max": 1.0, "reward/median": 0.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.25263160467147827, "rewards/accuracy_reward": 0.5, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 0.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.25263160467147827, "sentence_fisher_curvature": 110328.046875, "sentence_fisher_curvature/max": 1056768.0, "sentence_fisher_curvature/median": 5728.0, "sentence_fisher_curvature/min": 17.375, "sentence_fisher_curvature/p25": 667.0, "sentence_fisher_curvature/p75": 174080.0, "sentence_fisher_curvature/p85": 268800.0, "sentence_fisher_curvature/p90": 353280.0, "sentence_fisher_curvature/p95": 438272.0, "sentence_fisher_curvature/p99": 690996.375, "sentence_fisher_curvature/var": 31887927296.0, "sentence_fisher_kl_divergence": 9.919885997078381e-06, "sentence_fisher_kl_divergence/max": 9.489059448242188e-05, "sentence_fisher_kl_divergence/median": 5.140900611877441e-07, "sentence_fisher_kl_divergence/min": 1.5643308870494366e-09, "sentence_fisher_kl_divergence/p25": 5.995389074087143e-08, "sentence_fisher_kl_divergence/p75": 1.5616416931152344e-05, "sentence_fisher_kl_divergence/p85": 2.41696834564209e-05, "sentence_fisher_kl_divergence/p90": 3.1828880310058594e-05, "sentence_fisher_kl_divergence/p95": 3.9458274841308594e-05, "sentence_fisher_kl_divergence/p99": 6.204853707458824e-05, "sentence_fisher_kl_divergence/var": 2.5757662669434467e-10, "sentence_full_gradient_variance/max_squared_error": 1532.4544677734375, "sentence_full_gradient_variance/metric": 1532.4544677734375, "sentence_full_gradient_variance/p75": 1532.4544677734375, "sentence_full_gradient_variance/p90": 1532.4544677734375, "sentence_full_gradient_variance/p95": 1532.4544677734375, "sentence_full_gradient_variance/p99": 1532.4544677734375, "sentence_full_update_term": 0.017170270904898643, "sentence_full_update_term/max": 0.11767578125, "sentence_full_update_term/median": 0.0, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0238037109375, "sentence_full_update_term/p85": 0.037841796875, "sentence_full_update_term/p90": 0.052978515625, "sentence_full_update_term/p95": 0.0738525390625, "sentence_full_update_term/p99": 0.09680182486772537, "sentence_full_update_term/var": 0.0006591925630345941, "sentence_hessian_coeff": 21361.5, "sentence_hessian_coeff/max": 401408.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -176128.0, "sentence_hessian_coeff/p25": -860.0, "sentence_hessian_coeff/p75": 2816.0, "sentence_hessian_coeff/p99": 354713.75, "sentence_hessian_coeff/var": 9230049280.0, "sentence_hessian_coeff_abs": 49110.5, "sentence_hessian_coeff_abs/max": 401408.0, "sentence_hessian_coeff_abs/median": 0.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 57344.0, "sentence_hessian_coeff_abs/p99": 354713.75, "sentence_hessian_coeff_abs/var": 7253937664.0, "step": 30, "token_fisher_curvature": 83211.7421875, "token_fisher_curvature/max": 88604672.0, "token_fisher_curvature/median": 1.48318857196017e-16, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 6.187310981896647e-22, "token_fisher_curvature/p75": 2.0463630789890885e-11, "token_fisher_curvature/p85": 1.4551915228366852e-08, "token_fisher_curvature/p90": 2.9355287551879883e-06, "token_fisher_curvature/p95": 0.03802490234375, "token_fisher_curvature/p99": 30592.0, "token_fisher_curvature/var": 3798645604352.0, "token_fisher_kl_divergence": 7.482024102500873e-06, "token_fisher_kl_divergence/max": 0.00799560546875, "token_fisher_kl_divergence/median": 1.3328593854614215e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.546678239835239e-32, "token_fisher_kl_divergence/p75": 1.8396496823179335e-21, "token_fisher_kl_divergence/p85": 1.3078188705606397e-18, "token_fisher_kl_divergence/p90": 2.636779683484747e-16, "token_fisher_kl_divergence/p95": 3.4141578453272814e-12, "token_fisher_kl_divergence/p99": 2.7567148208618164e-06, "token_fisher_kl_divergence/var": 3.0711504450664506e-08, "token_full_update_term": 0.00023185614554677159, "token_full_update_term/max": 0.1337890625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -9.5367431640625e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 3.366447745567491e-17, "token_full_update_term/p85": 4.227729277772596e-13, "token_full_update_term/p90": 1.978150976356119e-11, "token_full_update_term/p95": 3.856257535517216e-09, "token_full_update_term/p99": 0.0007946193218231201, "token_full_update_term/var": 1.5538180377916433e-05, "token_hessian_coeff": 8233.287109375, "token_hessian_coeff/max": 87031808.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -11403264.0, "token_hessian_coeff/p25": -1.6065314412117004e-08, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.002227783203125, "token_hessian_coeff/var": 2689223884800.0, "token_hessian_coeff_abs": 84345.296875, "token_hessian_coeff_abs/max": 87031808.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 4.3213367462158203e-07, "token_hessian_coeff_abs/p99": 468992.0, "token_hessian_coeff_abs/var": 2682177454080.0 }, { "accuracy_reward": 0.8958333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.09429825097322464, "adam_stats/lm_head/lr_effective_max": 5.659412272507325e-05, "adam_stats/lm_head/lr_effective_mean": -1.3205447846331708e-10, "adam_stats/lm_head/lr_effective_min": -5.868949301657267e-05, "adam_stats/lm_head/lr_effective_std": 1.4336573030959698e-06, "adam_stats/lr_effective_max": 7.102303061401471e-05, "adam_stats/lr_effective_mean": -2.572594637317849e-10, "adam_stats/lr_effective_min": -6.99527226970531e-05, "adam_stats/m_t_max": 0.0018539736047387123, "adam_stats/m_t_mean": 4.028860307436072e-13, "adam_stats/m_t_min": -0.001544677303172648, "adam_stats/v_t_max": 2.6031262677861378e-05, "adam_stats/v_t_mean": 1.7456320677150527e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8958333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.09429825097322464, "all_logprobs": -0.011170555837452412, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.4375, "all_logprobs/p1": -0.24647483229637146, "all_logprobs/p10": -1.33514404296875e-05, "all_logprobs/p25": -1.1920928955078125e-07, "all_logprobs/p5": -0.000713348388671875, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.019466964527964592, "clip_ratio": 0.0, "completion_length": 516.1666870117188, "completion_length/correct": 485.8953552246094, "completion_length/correct/max": 1023.0, "completion_length/correct/median": 433.0, "completion_length/correct/min": 183.0, "completion_length/correct/p25": 294.0, "completion_length/correct/p75": 693.25, "completion_length/correct/var": 63747.1484375, "completion_length/incorrect": 776.5, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 649.0, "completion_length/incorrect/min": 405.0, "completion_length/incorrect/p25": 605.25, "completion_length/incorrect/p75": 1001.0, "completion_length/incorrect/var": 51856.5, "completion_length/max": 1024.0, "completion_length/median": 448.0, "completion_length/min": 183.0, "completion_length/p25": 294.75, "completion_length/p75": 720.0, "completion_length/var": 69913.234375, "curvature_clip_ratio_token_fisher": 0.014510009437799454, "curvature_clip_ratio_token_hessian": 0.010009686462581158, "curvature_clip_ratio_total_fisher": 0.014510009437799454, "curvature_clip_ratio_total_full": 0.014510009437799454, "curvature_clip_ratio_total_hessian": 0.010009686462581158, "epoch": 0.0496, "feature_vector_variance/max_squared_error": 54469.73828125, "feature_vector_variance/metric": 28691.60546875, "generated_tokens/total": 1883271.0, "global_fisher_curvature": 90624.0, "global_fisher_curvature/max": 90624.0, "global_fisher_curvature/median": 90624.0, "global_fisher_curvature/min": 90624.0, "global_fisher_curvature/p25": 90624.0, "global_fisher_curvature/p75": 90624.0, "global_fisher_curvature/p85": 90624.0, "global_fisher_curvature/p90": 90624.0, "global_fisher_curvature/p95": 90624.0, "global_fisher_curvature/p99": 90624.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.927417755126953e-06, "global_fisher_kl_divergence/max": 7.927417755126953e-06, "global_fisher_kl_divergence/median": 7.927417755126953e-06, "global_fisher_kl_divergence/min": 7.927417755126953e-06, "global_fisher_kl_divergence/p25": 7.927417755126953e-06, "global_fisher_kl_divergence/p75": 7.927417755126953e-06, "global_fisher_kl_divergence/p85": 7.927417755126953e-06, "global_fisher_kl_divergence/p90": 7.927417755126953e-06, "global_fisher_kl_divergence/p95": 7.927417755126953e-06, "global_fisher_kl_divergence/p99": 7.927417755126953e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.1376953125, "global_full_update_term/max": 0.1376953125, "global_full_update_term/median": 0.1376953125, "global_full_update_term/min": 0.1376953125, "global_full_update_term/p25": 0.1376953125, "global_full_update_term/p75": 0.1376953125, "global_full_update_term/p85": 0.1376953125, "global_full_update_term/p90": 0.1376953125, "global_full_update_term/p95": 0.1376953125, "global_full_update_term/p99": 0.1376953125, "global_full_update_term/var": NaN, "global_hessian_coeff": 26112.0, "global_hessian_coeff/max": 26112.0, "global_hessian_coeff/median": 26112.0, "global_hessian_coeff/min": 26112.0, "global_hessian_coeff/p25": 26112.0, "global_hessian_coeff/p75": 26112.0, "global_hessian_coeff/p99": 26112.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 26112.0, "global_hessian_coeff_abs/max": 26112.0, "global_hessian_coeff_abs/median": 26112.0, "global_hessian_coeff_abs/min": 26112.0, "global_hessian_coeff_abs/p25": 26112.0, "global_hessian_coeff_abs/p75": 26112.0, "global_hessian_coeff_abs/p99": 26112.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.11858095228672028, "learning_rate": 1.3073586191080456e-05, "loss": -0.8958, "masked_global_fisher_curvature": 200.0, "masked_global_fisher_curvature/max": 200.0, "masked_global_fisher_curvature/median": 200.0, "masked_global_fisher_curvature/min": 200.0, "masked_global_fisher_curvature/p25": 200.0, "masked_global_fisher_curvature/p75": 200.0, "masked_global_fisher_curvature/p85": 200.0, "masked_global_fisher_curvature/p90": 200.0, "masked_global_fisher_curvature/p95": 200.0, "masked_global_fisher_curvature/p99": 200.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/max": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/median": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/min": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/p25": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/p75": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/p85": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/p90": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/p95": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/p99": 1.7578713595867157e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0019989013671875, "masked_global_full_update_term/max": 0.0019989013671875, "masked_global_full_update_term/median": 0.0019989013671875, "masked_global_full_update_term/min": 0.0019989013671875, "masked_global_full_update_term/p25": 0.0019989013671875, "masked_global_full_update_term/p75": 0.0019989013671875, "masked_global_full_update_term/p85": 0.0019989013671875, "masked_global_full_update_term/p90": 0.0019989013671875, "masked_global_full_update_term/p95": 0.0019989013671875, "masked_global_full_update_term/p99": 0.0019989013671875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2128.0, "masked_global_hessian_coeff/max": -2128.0, "masked_global_hessian_coeff/median": -2128.0, "masked_global_hessian_coeff/min": -2128.0, "masked_global_hessian_coeff/p25": -2128.0, "masked_global_hessian_coeff/p75": -2128.0, "masked_global_hessian_coeff/p99": -2128.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2128.0, "masked_global_hessian_coeff_abs/max": 2128.0, "masked_global_hessian_coeff_abs/median": 2128.0, "masked_global_hessian_coeff_abs/min": 2128.0, "masked_global_hessian_coeff_abs/p25": 2128.0, "masked_global_hessian_coeff_abs/p75": 2128.0, "masked_global_hessian_coeff_abs/p99": 2128.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.4128825664520264, "masked_per_sentence_gradient_norm/max": 7.15625, "masked_per_sentence_gradient_norm/median": 1.125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.35546875, "masked_per_sentence_gradient_norm/p75": 1.4765625, "masked_per_sentence_gradient_norm/var": 2.310720920562744, "masked_per_token_gradient_norm": 0.045023493468761444, "masked_per_token_gradient_norm/max": 10.4375, "masked_per_token_gradient_norm/median": 3.841705620288849e-09, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 5.400124791776761e-13, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.1904706954956055e-06, "masked_per_token_gradient_norm/var": 0.20020727813243866, "masked_sentence_fisher_curvature": 183.2350311279297, "masked_sentence_fisher_curvature/max": 896.0, "masked_sentence_fisher_curvature/median": 109.5, "masked_sentence_fisher_curvature/min": 13.1875, "masked_sentence_fisher_curvature/p25": 62.75, "masked_sentence_fisher_curvature/p75": 270.5, "masked_sentence_fisher_curvature/p85": 329.5, "masked_sentence_fisher_curvature/p90": 468.0, "masked_sentence_fisher_curvature/p95": 531.0, "masked_sentence_fisher_curvature/p99": 611.0009155273438, "masked_sentence_fisher_curvature/var": 30514.119140625, "masked_sentence_fisher_kl_divergence": 1.6073576247777055e-08, "masked_sentence_fisher_kl_divergence/max": 7.869675755500793e-08, "masked_sentence_fisher_kl_divergence/median": 9.604264050722122e-09, "masked_sentence_fisher_kl_divergence/min": 1.1568772606551647e-09, "masked_sentence_fisher_kl_divergence/p25": 5.50062395632267e-09, "masked_sentence_fisher_kl_divergence/p75": 2.3690517991781235e-08, "masked_sentence_fisher_kl_divergence/p85": 2.89292074739933e-08, "masked_sentence_fisher_kl_divergence/p90": 4.109460860490799e-08, "masked_sentence_fisher_kl_divergence/p95": 4.650792106986046e-08, "masked_sentence_fisher_kl_divergence/p99": 5.370246825009417e-08, "masked_sentence_fisher_kl_divergence/var": 2.350412667094645e-16, "masked_sentence_full_gradient_variance/max_squared_error": 4.1246843338012695, "masked_sentence_full_gradient_variance/metric": 4.1246843338012695, "masked_sentence_full_gradient_variance/p75": 4.1246843338012695, "masked_sentence_full_gradient_variance/p90": 4.1246843338012695, "masked_sentence_full_gradient_variance/p95": 4.1246843338012695, "masked_sentence_full_gradient_variance/p99": 4.1246843338012695, "masked_sentence_full_update_term": 0.0009574443101882935, "masked_sentence_full_update_term/max": 0.00726318359375, "masked_sentence_full_update_term/median": 0.000743865966796875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00020813941955566406, "masked_sentence_full_update_term/p75": 0.0009098052978515625, "masked_sentence_full_update_term/p85": 0.00153350830078125, "masked_sentence_full_update_term/p90": 0.00218963623046875, "masked_sentence_full_update_term/p95": 0.0035858154296875, "masked_sentence_full_update_term/p99": 0.006364443805068731, "masked_sentence_full_update_term/var": 1.5196216054391698e-06, "masked_sentence_hessian_coeff": -7423.75, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -5664.0, "masked_sentence_hessian_coeff/min": -38656.0, "masked_sentence_hessian_coeff/p25": -7880.0, "masked_sentence_hessian_coeff/p75": -2942.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 52317856.0, "masked_sentence_hessian_coeff_abs": 7423.75, "masked_sentence_hessian_coeff_abs/max": 38656.0, "masked_sentence_hessian_coeff_abs/median": 5664.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 2942.0, "masked_sentence_hessian_coeff_abs/p75": 7880.0, "masked_sentence_hessian_coeff_abs/p99": 26496.0390625, "masked_sentence_hessian_coeff_abs/var": 52317856.0, "masked_token_fisher_curvature": 312.2488708496094, "masked_token_fisher_curvature/max": 112640.0, "masked_token_fisher_curvature/median": 6.106226635438361e-16, "masked_token_fisher_curvature/min": 3.6734198463196485e-40, "masked_token_fisher_curvature/p25": 1.5087774372967225e-21, "masked_token_fisher_curvature/p75": 1.0504663805477321e-10, "masked_token_fisher_curvature/p85": 4.0605300455354154e-08, "masked_token_fisher_curvature/p90": 6.3478946685791016e-06, "masked_token_fisher_curvature/p95": 0.04155158996582031, "masked_token_fisher_curvature/p99": 4395.5, "masked_token_fisher_curvature/var": 16145777.0, "masked_token_fisher_kl_divergence": 2.739246340865975e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 5.371827220193002e-26, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.3250398017384183e-31, "masked_token_fisher_kl_divergence/p75": 9.211483301390516e-21, "masked_token_fisher_kl_divergence/p85": 3.561561784967519e-18, "masked_token_fisher_kl_divergence/p90": 5.551115123125783e-16, "masked_token_fisher_kl_divergence/p95": 3.6520786395044524e-12, "masked_token_fisher_kl_divergence/p99": 3.8564030546694994e-07, "masked_token_fisher_kl_divergence/var": 1.242707651193109e-13, "masked_token_full_update_term": 1.9378092474653386e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 4.5319650809894085e-17, "masked_token_full_update_term/min": -6.020069122314453e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.945377440890297e-12, "masked_token_full_update_term/p85": 3.3651303965598345e-10, "masked_token_full_update_term/p90": 4.1269458961323835e-09, "masked_token_full_update_term/p95": 4.6640343498438597e-07, "masked_token_full_update_term/p99": 0.00046096742153167725, "masked_token_full_update_term/var": 3.995679520585327e-08, "masked_token_hessian_coeff": -12909.7197265625, "masked_token_hessian_coeff/max": 170.0, "masked_token_hessian_coeff/median": -3.329478204250336e-08, "masked_token_hessian_coeff/min": -2916352.0, "masked_token_hessian_coeff/p25": -0.0003910064697265625, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0712890625, "masked_token_hessian_coeff/var": 18318254080.0, "masked_token_hessian_coeff_abs": 12909.8271484375, "masked_token_hessian_coeff_abs/max": 2916352.0, "masked_token_hessian_coeff_abs/median": 8.605420589447021e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 7.185008144006133e-11, "masked_token_hessian_coeff_abs/p75": 0.001251220703125, "masked_token_hessian_coeff_abs/p99": 329072.0, "masked_token_hessian_coeff_abs/var": 18318252032.0, "mean_logprobs": -0.0101318359375, "mean_logprobs/var": 8.869171142578125e-05, "num_completions/total": 2976, "per_sentence_gradient_norm": 42.58837890625, "per_sentence_gradient_norm/max": 198.0, "per_sentence_gradient_norm/median": 35.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 17.5, "per_sentence_gradient_norm/p75": 55.6875, "per_sentence_gradient_norm/var": 1592.4215087890625, "per_token_feature_norm": 183.85972595214844, "per_token_feature_norm/max": 247.0, "per_token_feature_norm/median": 185.0, "per_token_feature_norm/min": 89.5, "per_token_feature_norm/p25": 177.0, "per_token_feature_norm/p75": 192.0, "per_token_feature_norm/var": 176.940673828125, "per_token_gradient_norm": 1.1003574132919312, "per_token_gradient_norm/max": 278.0, "per_token_gradient_norm/median": 4.6566128730773926e-09, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 6.430411758628907e-13, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.86102294921875e-06, "per_token_gradient_norm/var": 144.18934631347656, "per_token_policy_error_norm": 0.0062569607980549335, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0061198994517326355, "policy_entropy": 0.011876598000526428, "policy_entropy/max": 3.765625, "policy_entropy/median": 7.916241884231567e-09, "policy_entropy/min": 1.3806637040245096e-19, "policy_entropy/p25": 2.0122570276726037e-11, "policy_entropy/p75": 1.4156103134155273e-06, "policy_entropy/var": 0.007536637596786022, "policy_loss": -0.8958333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.09429825097322464, "policy_sharpness": 9.63588809967041, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.2991104125976562, "reward": 0.8958333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.09429825097322464, "rewards/accuracy_reward": 0.8958333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.09429825097322464, "sentence_fisher_curvature": 191770.625, "sentence_fisher_curvature/max": 946176.0, "sentence_fisher_curvature/median": 164864.0, "sentence_fisher_curvature/min": 182.0, "sentence_fisher_curvature/p25": 33536.0, "sentence_fisher_curvature/p75": 280576.0, "sentence_fisher_curvature/p85": 361472.0, "sentence_fisher_curvature/p90": 466944.0, "sentence_fisher_curvature/p95": 526336.0, "sentence_fisher_curvature/p99": 708813.5625, "sentence_fisher_curvature/var": 35292409856.0, "sentence_fisher_kl_divergence": 1.6823061741888523e-05, "sentence_fisher_kl_divergence/max": 8.296966552734375e-05, "sentence_fisher_kl_divergence/median": 1.4483928680419922e-05, "sentence_fisher_kl_divergence/min": 1.594889909029007e-08, "sentence_fisher_kl_divergence/p25": 2.9355287551879883e-06, "sentence_fisher_kl_divergence/p75": 2.4616718292236328e-05, "sentence_fisher_kl_divergence/p85": 3.17692756652832e-05, "sentence_fisher_kl_divergence/p90": 4.100799560546875e-05, "sentence_fisher_kl_divergence/p95": 4.607439041137695e-05, "sentence_fisher_kl_divergence/p99": 6.213194865267724e-05, "sentence_fisher_kl_divergence/var": 2.7160385052127367e-10, "sentence_full_gradient_variance/max_squared_error": 3324.103515625, "sentence_full_gradient_variance/metric": 3324.103515625, "sentence_full_gradient_variance/p75": 3324.103515625, "sentence_full_gradient_variance/p90": 3324.103515625, "sentence_full_gradient_variance/p95": 3324.103515625, "sentence_full_gradient_variance/p99": 3324.103515625, "sentence_full_update_term": 0.03192440792918205, "sentence_full_update_term/max": 0.1416015625, "sentence_full_update_term/median": 0.0252685546875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0104217529296875, "sentence_full_update_term/p75": 0.0361328125, "sentence_full_update_term/p85": 0.0576171875, "sentence_full_update_term/p90": 0.071044921875, "sentence_full_update_term/p95": 0.1015625, "sentence_full_update_term/p99": 0.13789063692092896, "sentence_full_update_term/var": 0.0009526555659249425, "sentence_hessian_coeff": 31466.75, "sentence_hessian_coeff/max": 464896.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -178176.0, "sentence_hessian_coeff/p25": -41344.0, "sentence_hessian_coeff/p75": 71168.0, "sentence_hessian_coeff/p99": 385126.65625, "sentence_hessian_coeff/var": 16443884544.0, "sentence_hessian_coeff_abs": 90695.921875, "sentence_hessian_coeff_abs/max": 464896.0, "sentence_hessian_coeff_abs/median": 55040.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 28800.0, "sentence_hessian_coeff_abs/p75": 120832.0, "sentence_hessian_coeff_abs/p99": 385126.65625, "sentence_hessian_coeff_abs/var": 9132126208.0, "step": 31, "token_fisher_curvature": 184343.0625, "token_fisher_curvature/max": 85983232.0, "token_fisher_curvature/median": 8.465450562766819e-16, "token_fisher_curvature/min": 3.6734198463196485e-40, "token_fisher_curvature/p25": 1.826414792517085e-21, "token_fisher_curvature/p75": 1.8826540326699615e-10, "token_fisher_curvature/p85": 1.0617077350616455e-07, "token_fisher_curvature/p90": 4.506111145019531e-05, "token_fisher_curvature/p95": 1.1484375, "token_fisher_curvature/p99": 640928.0, "token_fisher_curvature/var": 8291455336448.0, "token_fisher_kl_divergence": 1.6173478798009455e-05, "token_fisher_kl_divergence/max": 0.007537841796875, "token_fisher_kl_divergence/median": 7.431700815906108e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.6023737137301802e-31, "token_fisher_kl_divergence/p75": 1.6517142471458857e-20, "token_fisher_kl_divergence/p85": 9.324138683375338e-18, "token_fisher_kl_divergence/p90": 3.941291737419306e-15, "token_fisher_kl_divergence/p95": 1.0095391189679503e-10, "token_fisher_kl_divergence/p99": 5.6259334087371826e-05, "token_fisher_kl_divergence/var": 6.384227191347236e-08, "token_full_update_term": 0.0005183356697671115, "token_full_update_term/max": 0.130859375, "token_full_update_term/median": 6.938893903907228e-17, "token_full_update_term/min": -6.020069122314453e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 7.560174708487466e-12, "token_full_update_term/p85": 5.602487362921238e-10, "token_full_update_term/p90": 1.0704297892516479e-08, "token_full_update_term/p95": 2.8214999474585056e-06, "token_full_update_term/p99": 0.009947776794433594, "token_full_update_term/var": 3.3577289286768064e-05, "token_hessian_coeff": 10653.0390625, "token_hessian_coeff/max": 85983232.0, "token_hessian_coeff/median": -4.0279701352119446e-08, "token_hessian_coeff/min": -11927552.0, "token_hessian_coeff/p25": -0.00052642822265625, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.1748046875, "token_hessian_coeff/var": 5666742206464.0, "token_hessian_coeff_abs": 185523.90625, "token_hessian_coeff_abs/max": 85983232.0, "token_hessian_coeff_abs/median": 1.0654330253601074e-06, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 8.355982572538778e-11, "token_hessian_coeff_abs/p75": 0.0017242431640625, "token_hessian_coeff_abs/p99": 4849664.0, "token_hessian_coeff_abs/var": 5632436469760.0 }, { "accuracy_reward": 0.6041666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24166667461395264, "adam_stats/lm_head/lr_effective_max": 5.872522888239473e-05, "adam_stats/lm_head/lr_effective_mean": -8.49681575099126e-11, "adam_stats/lm_head/lr_effective_min": -6.18519916315563e-05, "adam_stats/lm_head/lr_effective_std": 1.3772072406936786e-06, "adam_stats/lr_effective_max": 7.152014586608857e-05, "adam_stats/lr_effective_mean": -1.9012087737468875e-10, "adam_stats/lr_effective_min": -6.90181041136384e-05, "adam_stats/m_t_max": 0.0017257966101169586, "adam_stats/m_t_mean": 1.5439118733473478e-11, "adam_stats/m_t_min": -0.0015000728890299797, "adam_stats/v_t_max": 2.600833795440849e-05, "adam_stats/v_t_mean": 1.7560985218073588e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6041666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24166667461395264, "all_logprobs": -0.010784918442368507, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.875, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -5.9604644775390625e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.000553131103515625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.018848173320293427, "clip_ratio": 0.0, "completion_length": 618.71875, "completion_length/correct": 543.8103637695312, "completion_length/correct/max": 992.0, "completion_length/correct/median": 508.0, "completion_length/correct/min": 351.0, "completion_length/correct/p25": 410.5, "completion_length/correct/p75": 634.0, "completion_length/correct/var": 24889.173828125, "completion_length/incorrect": 733.0526123046875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 648.0, "completion_length/incorrect/min": 342.0, "completion_length/incorrect/p25": 536.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 77465.734375, "completion_length/max": 1024.0, "completion_length/median": 551.0, "completion_length/min": 342.0, "completion_length/p25": 413.75, "completion_length/p75": 777.0, "completion_length/var": 53759.09375, "curvature_clip_ratio_token_fisher": 0.010791790671646595, "curvature_clip_ratio_token_hessian": 0.007239422760903835, "curvature_clip_ratio_total_fisher": 0.010791790671646595, "curvature_clip_ratio_total_full": 0.010791790671646595, "curvature_clip_ratio_total_hessian": 0.007239422760903835, "epoch": 0.0512, "feature_vector_variance/max_squared_error": 70082.9296875, "feature_vector_variance/metric": 29440.49609375, "generated_tokens/total": 1942668.0, "global_fisher_curvature": 69632.0, "global_fisher_curvature/max": 69632.0, "global_fisher_curvature/median": 69632.0, "global_fisher_curvature/min": 69632.0, "global_fisher_curvature/p25": 69632.0, "global_fisher_curvature/p75": 69632.0, "global_fisher_curvature/p85": 69632.0, "global_fisher_curvature/p90": 69632.0, "global_fisher_curvature/p95": 69632.0, "global_fisher_curvature/p99": 69632.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.9604644775390625e-06, "global_fisher_kl_divergence/max": 5.9604644775390625e-06, "global_fisher_kl_divergence/median": 5.9604644775390625e-06, "global_fisher_kl_divergence/min": 5.9604644775390625e-06, "global_fisher_kl_divergence/p25": 5.9604644775390625e-06, "global_fisher_kl_divergence/p75": 5.9604644775390625e-06, "global_fisher_kl_divergence/p85": 5.9604644775390625e-06, "global_fisher_kl_divergence/p90": 5.9604644775390625e-06, "global_fisher_kl_divergence/p95": 5.9604644775390625e-06, "global_fisher_kl_divergence/p99": 5.9604644775390625e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0751953125, "global_full_update_term/max": 0.0751953125, "global_full_update_term/median": 0.0751953125, "global_full_update_term/min": 0.0751953125, "global_full_update_term/p25": 0.0751953125, "global_full_update_term/p75": 0.0751953125, "global_full_update_term/p85": 0.0751953125, "global_full_update_term/p90": 0.0751953125, "global_full_update_term/p95": 0.0751953125, "global_full_update_term/p99": 0.0751953125, "global_full_update_term/var": NaN, "global_hessian_coeff": 16640.0, "global_hessian_coeff/max": 16640.0, "global_hessian_coeff/median": 16640.0, "global_hessian_coeff/min": 16640.0, "global_hessian_coeff/p25": 16640.0, "global_hessian_coeff/p75": 16640.0, "global_hessian_coeff/p99": 16640.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 16640.0, "global_hessian_coeff_abs/max": 16640.0, "global_hessian_coeff_abs/median": 16640.0, "global_hessian_coeff_abs/min": 16640.0, "global_hessian_coeff_abs/p25": 16640.0, "global_hessian_coeff_abs/p75": 16640.0, "global_hessian_coeff_abs/p99": 16640.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.15438076853752136, "learning_rate": 1.2895048502539883e-05, "loss": -0.6042, "masked_global_fisher_curvature": 620.0, "masked_global_fisher_curvature/max": 620.0, "masked_global_fisher_curvature/median": 620.0, "masked_global_fisher_curvature/min": 620.0, "masked_global_fisher_curvature/p25": 620.0, "masked_global_fisher_curvature/p75": 620.0, "masked_global_fisher_curvature/p85": 620.0, "masked_global_fisher_curvature/p90": 620.0, "masked_global_fisher_curvature/p95": 620.0, "masked_global_fisher_curvature/p99": 620.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/max": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/median": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/min": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/p25": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/p75": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/p85": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/p90": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/p95": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/p99": 5.3085386753082275e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0023956298828125, "masked_global_full_update_term/max": 0.0023956298828125, "masked_global_full_update_term/median": 0.0023956298828125, "masked_global_full_update_term/min": 0.0023956298828125, "masked_global_full_update_term/p25": 0.0023956298828125, "masked_global_full_update_term/p75": 0.0023956298828125, "masked_global_full_update_term/p85": 0.0023956298828125, "masked_global_full_update_term/p90": 0.0023956298828125, "masked_global_full_update_term/p95": 0.0023956298828125, "masked_global_full_update_term/p99": 0.0023956298828125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1624.0, "masked_global_hessian_coeff/max": -1624.0, "masked_global_hessian_coeff/median": -1624.0, "masked_global_hessian_coeff/min": -1624.0, "masked_global_hessian_coeff/p25": -1624.0, "masked_global_hessian_coeff/p75": -1624.0, "masked_global_hessian_coeff/p99": -1624.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1624.0, "masked_global_hessian_coeff_abs/max": 1624.0, "masked_global_hessian_coeff_abs/median": 1624.0, "masked_global_hessian_coeff_abs/min": 1624.0, "masked_global_hessian_coeff_abs/p25": 1624.0, "masked_global_hessian_coeff_abs/p75": 1624.0, "masked_global_hessian_coeff_abs/p99": 1624.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.7464702129364014, "masked_per_sentence_gradient_norm/max": 8.0625, "masked_per_sentence_gradient_norm/median": 0.84375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.578125, "masked_per_sentence_gradient_norm/var": 5.6199116706848145, "masked_per_token_gradient_norm": 0.03081863932311535, "masked_per_token_gradient_norm/max": 10.1875, "masked_per_token_gradient_norm/median": 5.412337245047638e-16, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.618172973394394e-08, "masked_per_token_gradient_norm/var": 0.1392928659915924, "masked_sentence_fisher_curvature": 251.18359375, "masked_sentence_fisher_curvature/max": 1272.0, "masked_sentence_fisher_curvature/median": 243.0, "masked_sentence_fisher_curvature/min": 1.71875, "masked_sentence_fisher_curvature/p25": 117.875, "masked_sentence_fisher_curvature/p75": 336.0, "masked_sentence_fisher_curvature/p85": 403.0, "masked_sentence_fisher_curvature/p90": 437.0, "masked_sentence_fisher_curvature/p95": 549.0, "masked_sentence_fisher_curvature/p99": 690.6018676757812, "masked_sentence_fisher_curvature/var": 34131.625, "masked_sentence_fisher_kl_divergence": 2.146548716552843e-08, "masked_sentence_fisher_kl_divergence/max": 1.0849907994270325e-07, "masked_sentence_fisher_kl_divergence/median": 2.0721927285194397e-08, "masked_sentence_fisher_kl_divergence/min": 1.4642864698544145e-10, "masked_sentence_fisher_kl_divergence/p25": 1.0069925338029861e-08, "masked_sentence_fisher_kl_divergence/p75": 2.87545844912529e-08, "masked_sentence_fisher_kl_divergence/p85": 3.440072759985924e-08, "masked_sentence_fisher_kl_divergence/p90": 3.7369318306446075e-08, "masked_sentence_fisher_kl_divergence/p95": 4.69735823571682e-08, "masked_sentence_fisher_kl_divergence/p99": 5.89528781347326e-08, "masked_sentence_fisher_kl_divergence/var": 2.4895701218122777e-16, "masked_sentence_full_gradient_variance/max_squared_error": 8.091997146606445, "masked_sentence_full_gradient_variance/metric": 8.091997146606445, "masked_sentence_full_gradient_variance/p75": 8.091997146606445, "masked_sentence_full_gradient_variance/p90": 8.091997146606445, "masked_sentence_full_gradient_variance/p95": 8.091997146606445, "masked_sentence_full_gradient_variance/p99": 8.091997146606445, "masked_sentence_full_update_term": 0.0012033333769068122, "masked_sentence_full_update_term/max": 0.00592041015625, "masked_sentence_full_update_term/median": 0.000518798828125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0017547607421875, "masked_sentence_full_update_term/p85": 0.00267791748046875, "masked_sentence_full_update_term/p90": 0.004302978515625, "masked_sentence_full_update_term/p95": 0.004302978515625, "masked_sentence_full_update_term/p99": 0.005398561246693134, "masked_sentence_full_update_term/var": 2.316126256118878e-06, "masked_sentence_hessian_coeff": -6827.2294921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -5344.0, "masked_sentence_hessian_coeff/min": -47360.0, "masked_sentence_hessian_coeff/p25": -11392.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 63456628.0, "masked_sentence_hessian_coeff_abs": 6827.2294921875, "masked_sentence_hessian_coeff_abs/max": 47360.0, "masked_sentence_hessian_coeff_abs/median": 5184.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 11392.0, "masked_sentence_hessian_coeff_abs/p99": 26688.06640625, "masked_sentence_hessian_coeff_abs/var": 63456628.0, "masked_token_fisher_curvature": 284.892822265625, "masked_token_fisher_curvature/max": 116736.0, "masked_token_fisher_curvature/median": 5.117434254131581e-17, "masked_token_fisher_curvature/min": 9.183549615799121e-41, "masked_token_fisher_curvature/p25": 5.045801736573469e-23, "masked_token_fisher_curvature/p75": 9.038103598868474e-12, "masked_token_fisher_curvature/p85": 6.257323548197746e-09, "masked_token_fisher_curvature/p90": 8.791685104370117e-07, "masked_token_fisher_curvature/p95": 0.00482177734375, "masked_token_fisher_curvature/p99": 2656.0, "masked_token_fisher_curvature/var": 15208046.0, "masked_token_fisher_kl_divergence": 2.4347066585050925e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 4.367133971303521e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.30926825056644e-33, "masked_token_fisher_kl_divergence/p75": 7.709323308994218e-22, "masked_token_fisher_kl_divergence/p85": 5.353248226647178e-19, "masked_token_fisher_kl_divergence/p90": 7.502679033599691e-17, "masked_token_fisher_kl_divergence/p95": 4.121147867408581e-13, "masked_token_fisher_kl_divergence/p99": 2.2724270820617676e-07, "masked_token_fisher_kl_divergence/var": 1.1103775234347357e-13, "masked_token_full_update_term": 1.302315740758786e-05, "masked_token_full_update_term/max": 0.004241943359375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.8312206268310547e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.1379786002407855e-15, "masked_token_full_update_term/p85": 3.836930773104541e-12, "masked_token_full_update_term/p90": 1.0550138540565968e-10, "masked_token_full_update_term/p95": 1.4319084584712982e-08, "masked_token_full_update_term/p99": 0.0002498626708984375, "masked_token_full_update_term/var": 2.6260849494974536e-08, "masked_token_hessian_coeff": -9076.4541015625, "masked_token_hessian_coeff/max": 114.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -3096576.0, "masked_token_hessian_coeff/p25": -4.470348358154297e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.001079171895980835, "masked_token_hessian_coeff/var": 13131634688.0, "masked_token_hessian_coeff_abs": 9076.4609375, "masked_token_hessian_coeff_abs/max": 3096576.0, "masked_token_hessian_coeff_abs/median": 5.262457136723242e-14, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 4.32133674621582e-06, "masked_token_hessian_coeff_abs/p99": 104960.0, "masked_token_hessian_coeff_abs/var": 13131634688.0, "mean_logprobs": -0.0108642578125, "mean_logprobs/var": 7.104873657226562e-05, "num_completions/total": 3072, "per_sentence_gradient_norm": 34.427085876464844, "per_sentence_gradient_norm/max": 154.0, "per_sentence_gradient_norm/median": 12.375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 62.75, "per_sentence_gradient_norm/var": 1851.975341796875, "per_token_feature_norm": 185.8457489013672, "per_token_feature_norm/max": 282.0, "per_token_feature_norm/median": 186.0, "per_token_feature_norm/min": 97.0, "per_token_feature_norm/p25": 179.0, "per_token_feature_norm/p75": 193.0, "per_token_feature_norm/var": 156.32798767089844, "per_token_gradient_norm": 0.7561461329460144, "per_token_gradient_norm/max": 280.0, "per_token_gradient_norm/median": 1.3183898417423734e-15, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.3632310330867767e-08, "per_token_gradient_norm/var": 97.12824249267578, "per_token_policy_error_norm": 0.006192113738507032, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006342582870274782, "policy_entropy": 0.010868306271731853, "policy_entropy/max": 1.9609375, "policy_entropy/median": 2.35741026699543e-09, "policy_entropy/min": 6.988021814847978e-21, "policy_entropy/p25": 3.566924533515703e-12, "policy_entropy/p75": 4.544854164123535e-07, "policy_entropy/var": 0.005316801834851503, "policy_loss": -0.6041666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24166667461395264, "policy_sharpness": 9.668941497802734, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.075284004211426, "reward": 0.6041666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24166667461395264, "rewards/accuracy_reward": 0.6041666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24166667461395264, "sentence_fisher_curvature": 133618.671875, "sentence_fisher_curvature/max": 737280.0, "sentence_fisher_curvature/median": 33792.0, "sentence_fisher_curvature/min": 58.5, "sentence_fisher_curvature/p25": 781.0, "sentence_fisher_curvature/p75": 206848.0, "sentence_fisher_curvature/p85": 289792.0, "sentence_fisher_curvature/p90": 382976.0, "sentence_fisher_curvature/p95": 503296.0, "sentence_fisher_curvature/p99": 686694.5625, "sentence_fisher_curvature/var": 30634553344.0, "sentence_fisher_kl_divergence": 1.1420273040130269e-05, "sentence_fisher_kl_divergence/max": 6.29425048828125e-05, "sentence_fisher_kl_divergence/median": 2.8908252716064453e-06, "sentence_fisher_kl_divergence/min": 5.005858838558197e-09, "sentence_fisher_kl_divergence/p25": 6.682239472866058e-08, "sentence_fisher_kl_divergence/p75": 1.7642974853515625e-05, "sentence_fisher_kl_divergence/p85": 2.4765729904174805e-05, "sentence_fisher_kl_divergence/p90": 3.2782554626464844e-05, "sentence_fisher_kl_divergence/p95": 4.303455352783203e-05, "sentence_fisher_kl_divergence/p99": 5.8639063354348764e-05, "sentence_fisher_kl_divergence/var": 2.237805496463352e-10, "sentence_full_gradient_variance/max_squared_error": 2980.657958984375, "sentence_full_gradient_variance/metric": 2980.657958984375, "sentence_full_gradient_variance/p75": 2980.657958984375, "sentence_full_gradient_variance/p90": 2980.657958984375, "sentence_full_gradient_variance/p95": 2980.657958984375, "sentence_full_gradient_variance/p99": 2980.657958984375, "sentence_full_update_term": 0.0295562744140625, "sentence_full_update_term/max": 0.1669921875, "sentence_full_update_term/median": 0.01129150390625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.05206298828125, "sentence_full_update_term/p85": 0.0770263671875, "sentence_full_update_term/p90": 0.0830078125, "sentence_full_update_term/p95": 0.1002197265625, "sentence_full_update_term/p99": 0.16513672471046448, "sentence_full_update_term/var": 0.0014769434928894043, "sentence_hessian_coeff": 11390.333984375, "sentence_hessian_coeff/max": 276480.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -288768.0, "sentence_hessian_coeff/p25": -22336.0, "sentence_hessian_coeff/p75": 16240.0, "sentence_hessian_coeff/p99": 266752.03125, "sentence_hessian_coeff/var": 6877906432.0, "sentence_hessian_coeff_abs": 48234.3359375, "sentence_hessian_coeff_abs/max": 288768.0, "sentence_hessian_coeff_abs/median": 18688.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 61632.0, "sentence_hessian_coeff_abs/p99": 277094.4375, "sentence_hessian_coeff_abs/var": 4657970688.0, "step": 32, "token_fisher_curvature": 125913.28125, "token_fisher_curvature/max": 89653248.0, "token_fisher_curvature/median": 7.025630077706069e-17, "token_fisher_curvature/min": 9.183549615799121e-41, "token_fisher_curvature/p25": 5.955700410381799e-23, "token_fisher_curvature/p75": 1.4779288903810084e-11, "token_fisher_curvature/p85": 1.3445969671010971e-08, "token_fisher_curvature/p90": 3.471970558166504e-06, "token_fisher_curvature/p95": 0.10917854309082031, "token_fisher_curvature/p99": 143360.0, "token_fisher_curvature/var": 5577767911424.0, "token_fisher_kl_divergence": 1.0762739293568302e-05, "token_fisher_kl_divergence/max": 0.007659912109375, "token_fisher_kl_divergence/median": 6.007964654163226e-27, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.0796402283213346e-33, "token_fisher_kl_divergence/p75": 1.2639319759810263e-21, "token_fisher_kl_divergence/p85": 1.1519648082658485e-18, "token_fisher_kl_divergence/p90": 2.96637714392034e-16, "token_fisher_kl_divergence/p95": 9.344969242874868e-12, "token_fisher_kl_divergence/p99": 1.2278556823730469e-05, "token_fisher_kl_divergence/var": 4.075319637308894e-08, "token_full_update_term": 0.00035330746322870255, "token_full_update_term/max": 0.1318359375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.8312206268310547e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.2620794126737565e-15, "token_full_update_term/p85": 6.821210263296962e-12, "token_full_update_term/p90": 2.2100721253082156e-10, "token_full_update_term/p95": 5.564652383327484e-08, "token_full_update_term/p99": 0.0045166015625, "token_full_update_term/var": 2.2232421542867087e-05, "token_hessian_coeff": 1592.47021484375, "token_hessian_coeff/max": 87556096.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -12255232.0, "token_hessian_coeff/p25": -6.742775440216064e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.002197265625, "token_hessian_coeff/var": 3817650257920.0, "token_hessian_coeff_abs": 128184.203125, "token_hessian_coeff_abs/max": 87556096.0, "token_hessian_coeff_abs/median": 1.1457501614131615e-13, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 6.3478946685791016e-06, "token_hessian_coeff_abs/p99": 2719744.0, "token_hessian_coeff_abs/var": 3801220644864.0 }, { "accuracy_reward": 0.5729166865348816, "accuracy_reward/correct": 0.9999999403953552, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24725878238677979, "adam_stats/lm_head/lr_effective_max": 5.2681294619105756e-05, "adam_stats/lm_head/lr_effective_mean": -1.368808955071188e-10, "adam_stats/lm_head/lr_effective_min": -5.731655255658552e-05, "adam_stats/lm_head/lr_effective_std": 1.379857849315158e-06, "adam_stats/lr_effective_max": 7.03913246979937e-05, "adam_stats/lr_effective_mean": -2.31630770120006e-10, "adam_stats/lr_effective_min": -6.991636473685503e-05, "adam_stats/m_t_max": 0.0015440616989508271, "adam_stats/m_t_mean": 1.5363077129904035e-11, "adam_stats/m_t_min": -0.0013338531134650111, "adam_stats/v_t_max": 2.5982946681324393e-05, "adam_stats/v_t_mean": 1.7544283083606449e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5729166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24725878238677979, "all_logprobs": -0.010263679549098015, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -3.814697265625e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0002040863037109375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.016022279858589172, "clip_ratio": 0.0, "completion_length": 598.5208740234375, "completion_length/correct": 481.16363525390625, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 421.0, "completion_length/correct/min": 177.0, "completion_length/correct/p25": 366.5, "completion_length/correct/p75": 592.5, "completion_length/correct/var": 27067.177734375, "completion_length/incorrect": 755.951171875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 740.0, "completion_length/incorrect/min": 326.0, "completion_length/incorrect/p25": 487.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 65234.05078125, "completion_length/max": 1024.0, "completion_length/median": 495.0, "completion_length/min": 177.0, "completion_length/p25": 400.0, "completion_length/p75": 727.25, "completion_length/var": 61522.58984375, "curvature_clip_ratio_token_fisher": 0.009694037027657032, "curvature_clip_ratio_token_hessian": 0.006822374649345875, "curvature_clip_ratio_total_fisher": 0.009694037027657032, "curvature_clip_ratio_total_full": 0.009694037027657032, "curvature_clip_ratio_total_hessian": 0.006822374649345875, "epoch": 0.0528, "feature_vector_variance/max_squared_error": 67716.578125, "feature_vector_variance/metric": 29522.3046875, "generated_tokens/total": 2000126.0, "global_fisher_curvature": 61952.0, "global_fisher_curvature/max": 61952.0, "global_fisher_curvature/median": 61952.0, "global_fisher_curvature/min": 61952.0, "global_fisher_curvature/p25": 61952.0, "global_fisher_curvature/p75": 61952.0, "global_fisher_curvature/p85": 61952.0, "global_fisher_curvature/p90": 61952.0, "global_fisher_curvature/p95": 61952.0, "global_fisher_curvature/p99": 61952.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.155801773071289e-06, "global_fisher_kl_divergence/max": 5.155801773071289e-06, "global_fisher_kl_divergence/median": 5.155801773071289e-06, "global_fisher_kl_divergence/min": 5.155801773071289e-06, "global_fisher_kl_divergence/p25": 5.155801773071289e-06, "global_fisher_kl_divergence/p75": 5.155801773071289e-06, "global_fisher_kl_divergence/p85": 5.155801773071289e-06, "global_fisher_kl_divergence/p90": 5.155801773071289e-06, "global_fisher_kl_divergence/p95": 5.155801773071289e-06, "global_fisher_kl_divergence/p99": 5.155801773071289e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.06201171875, "global_full_update_term/max": 0.06201171875, "global_full_update_term/median": 0.06201171875, "global_full_update_term/min": 0.06201171875, "global_full_update_term/p25": 0.06201171875, "global_full_update_term/p75": 0.06201171875, "global_full_update_term/p85": 0.06201171875, "global_full_update_term/p90": 0.06201171875, "global_full_update_term/p95": 0.06201171875, "global_full_update_term/p99": 0.06201171875, "global_full_update_term/var": NaN, "global_hessian_coeff": 8192.0, "global_hessian_coeff/max": 8192.0, "global_hessian_coeff/median": 8192.0, "global_hessian_coeff/min": 8192.0, "global_hessian_coeff/p25": 8192.0, "global_hessian_coeff/p75": 8192.0, "global_hessian_coeff/p99": 8192.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 8192.0, "global_hessian_coeff_abs/max": 8192.0, "global_hessian_coeff_abs/median": 8192.0, "global_hessian_coeff_abs/min": 8192.0, "global_hessian_coeff_abs/p25": 8192.0, "global_hessian_coeff_abs/p75": 8192.0, "global_hessian_coeff_abs/p99": 8192.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.025582600384950638, "learning_rate": 1.270993777844248e-05, "loss": -0.5729, "masked_global_fisher_curvature": 286.0, "masked_global_fisher_curvature/max": 286.0, "masked_global_fisher_curvature/median": 286.0, "masked_global_fisher_curvature/min": 286.0, "masked_global_fisher_curvature/p25": 286.0, "masked_global_fisher_curvature/p75": 286.0, "masked_global_fisher_curvature/p85": 286.0, "masked_global_fisher_curvature/p90": 286.0, "masked_global_fisher_curvature/p95": 286.0, "masked_global_fisher_curvature/p99": 286.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/max": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/median": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/min": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/p25": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/p75": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/p85": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/p90": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/p95": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/p99": 2.3748725652694702e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0023651123046875, "masked_global_full_update_term/max": 0.0023651123046875, "masked_global_full_update_term/median": 0.0023651123046875, "masked_global_full_update_term/min": 0.0023651123046875, "masked_global_full_update_term/p25": 0.0023651123046875, "masked_global_full_update_term/p75": 0.0023651123046875, "masked_global_full_update_term/p85": 0.0023651123046875, "masked_global_full_update_term/p90": 0.0023651123046875, "masked_global_full_update_term/p95": 0.0023651123046875, "masked_global_full_update_term/p99": 0.0023651123046875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1896.0, "masked_global_hessian_coeff/max": -1896.0, "masked_global_hessian_coeff/median": -1896.0, "masked_global_hessian_coeff/min": -1896.0, "masked_global_hessian_coeff/p25": -1896.0, "masked_global_hessian_coeff/p75": -1896.0, "masked_global_hessian_coeff/p99": -1896.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1896.0, "masked_global_hessian_coeff_abs/max": 1896.0, "masked_global_hessian_coeff_abs/median": 1896.0, "masked_global_hessian_coeff_abs/min": 1896.0, "masked_global_hessian_coeff_abs/p25": 1896.0, "masked_global_hessian_coeff_abs/p75": 1896.0, "masked_global_hessian_coeff_abs/p99": 1896.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.328857421875, "masked_per_sentence_gradient_norm/max": 7.34375, "masked_per_sentence_gradient_norm/median": 0.83203125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 1.625, "masked_per_sentence_gradient_norm/var": 3.165790319442749, "masked_per_token_gradient_norm": 0.024126067757606506, "masked_per_token_gradient_norm/max": 9.375, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.0069925338029861e-08, "masked_per_token_gradient_norm/var": 0.10471644252538681, "masked_sentence_fisher_curvature": 211.759765625, "masked_sentence_fisher_curvature/max": 700.0, "masked_sentence_fisher_curvature/median": 174.0, "masked_sentence_fisher_curvature/min": 10.9375, "masked_sentence_fisher_curvature/p25": 97.125, "masked_sentence_fisher_curvature/p75": 264.5, "masked_sentence_fisher_curvature/p85": 328.0, "masked_sentence_fisher_curvature/p90": 434.0, "masked_sentence_fisher_curvature/p95": 559.0, "masked_sentence_fisher_curvature/p99": 696.2000122070312, "masked_sentence_fisher_curvature/var": 24404.349609375, "masked_sentence_fisher_kl_divergence": 1.7611380798143728e-08, "masked_sentence_fisher_kl_divergence/max": 5.820766091346741e-08, "masked_sentence_fisher_kl_divergence/median": 1.4493707567453384e-08, "masked_sentence_fisher_kl_divergence/min": 9.094947017729282e-10, "masked_sentence_fisher_kl_divergence/p25": 8.061761036515236e-09, "masked_sentence_fisher_kl_divergence/p75": 2.1973391994833946e-08, "masked_sentence_fisher_kl_divergence/p85": 2.7241185307502747e-08, "masked_sentence_fisher_kl_divergence/p90": 3.608874976634979e-08, "masked_sentence_fisher_kl_divergence/p95": 4.6566128730773926e-08, "masked_sentence_fisher_kl_divergence/p99": 5.798647251253897e-08, "masked_sentence_fisher_kl_divergence/var": 1.6889455803424484e-16, "masked_sentence_full_gradient_variance/max_squared_error": 4.755120754241943, "masked_sentence_full_gradient_variance/metric": 4.755120754241943, "masked_sentence_full_gradient_variance/p75": 4.755120754241943, "masked_sentence_full_gradient_variance/p90": 4.755120754241943, "masked_sentence_full_gradient_variance/p95": 4.755120754241943, "masked_sentence_full_gradient_variance/p99": 4.755120754241943, "masked_sentence_full_update_term": 0.0009369850158691406, "masked_sentence_full_update_term/max": 0.005706787109375, "masked_sentence_full_update_term/median": 0.00055694580078125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0011959075927734375, "masked_sentence_full_update_term/p85": 0.0019989013671875, "masked_sentence_full_update_term/p90": 0.0024871826171875, "masked_sentence_full_update_term/p95": 0.00392913818359375, "masked_sentence_full_update_term/p99": 0.005706787109375, "masked_sentence_full_update_term/var": 1.6845085610839305e-06, "masked_sentence_hessian_coeff": -5687.83349609375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -5312.0, "masked_sentence_hessian_coeff/min": -26496.0, "masked_sentence_hessian_coeff/p25": -9488.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 39833492.0, "masked_sentence_hessian_coeff_abs": 5687.83349609375, "masked_sentence_hessian_coeff_abs/max": 26496.0, "masked_sentence_hessian_coeff_abs/median": 5248.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 9488.0, "masked_sentence_hessian_coeff_abs/p99": 23699.208984375, "masked_sentence_hessian_coeff_abs/var": 39833492.0, "masked_token_fisher_curvature": 242.48731994628906, "masked_token_fisher_curvature/max": 119808.0, "masked_token_fisher_curvature/median": 1.0625181290357943e-16, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.830734777309682e-22, "masked_token_fisher_curvature/p75": 1.375610736431554e-11, "masked_token_fisher_curvature/p85": 4.511093720793724e-09, "masked_token_fisher_curvature/p90": 3.632158041000366e-07, "masked_token_fisher_curvature/p95": 0.000766754150390625, "masked_token_fisher_curvature/p99": 1384.0, "masked_token_fisher_curvature/var": 12868749.0, "masked_token_fisher_kl_divergence": 2.016254718739674e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 8.835242138475332e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.025193583769323e-32, "masked_token_fisher_kl_divergence/p75": 1.1448179677733903e-21, "masked_token_fisher_kl_divergence/p85": 3.7438856268640075e-19, "masked_token_fisher_kl_divergence/p90": 3.0140820395097023e-17, "masked_token_fisher_kl_divergence/p95": 6.394884621840902e-14, "masked_token_fisher_kl_divergence/p99": 1.150183379650116e-07, "masked_token_fisher_kl_divergence/var": 8.895916772319601e-14, "masked_token_full_update_term": 1.0086968359246384e-05, "masked_token_full_update_term/max": 0.0040283203125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.0132789611816406e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.2327252813834093e-16, "masked_token_full_update_term/p85": 1.5987211554602254e-12, "masked_token_full_update_term/p90": 4.4565240386873484e-11, "masked_token_full_update_term/p95": 4.94765117764473e-09, "masked_token_full_update_term/p99": 5.888938903808594e-05, "masked_token_full_update_term/var": 1.9417750962702485e-08, "masked_token_hessian_coeff": -7055.20361328125, "masked_token_hessian_coeff/max": 211.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -2998272.0, "masked_token_hessian_coeff/p25": -1.6205012798309326e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.00244140625, "masked_token_hessian_coeff/var": 9821022208.0, "masked_token_hessian_coeff_abs": 7055.22509765625, "masked_token_hessian_coeff_abs/max": 2998272.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.428889274597168e-06, "masked_token_hessian_coeff_abs/p99": 18432.0, "masked_token_hessian_coeff_abs/var": 9821022208.0, "mean_logprobs": -0.01007080078125, "mean_logprobs/var": 3.647804260253906e-05, "num_completions/total": 3168, "per_sentence_gradient_norm": 26.625652313232422, "per_sentence_gradient_norm/max": 160.0, "per_sentence_gradient_norm/median": 6.5625, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 47.75, "per_sentence_gradient_norm/var": 1029.732177734375, "per_token_feature_norm": 186.23577880859375, "per_token_feature_norm/max": 272.0, "per_token_feature_norm/median": 186.0, "per_token_feature_norm/min": 98.5, "per_token_feature_norm/p25": 179.0, "per_token_feature_norm/p75": 193.0, "per_token_feature_norm/var": 202.73167419433594, "per_token_gradient_norm": 0.699423611164093, "per_token_gradient_norm/max": 280.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.4319084584712982e-08, "per_token_gradient_norm/var": 88.98538970947266, "per_token_policy_error_norm": 0.005911960732191801, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005715940613299608, "policy_entropy": 0.010819298215210438, "policy_entropy/max": 1.984375, "policy_entropy/median": 3.0995579436421394e-09, "policy_entropy/min": 1.6940658945086007e-20, "policy_entropy/p25": 1.0402345651527867e-11, "policy_entropy/p75": 5.699694156646729e-07, "policy_entropy/var": 0.006872063037008047, "policy_loss": -0.5729166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24725878238677979, "policy_sharpness": 9.708040237426758, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.861629843711853, "reward": 0.5729166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24725878238677979, "rewards/accuracy_reward": 0.5729166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24725878238677979, "sentence_fisher_curvature": 131102.8125, "sentence_fisher_curvature/max": 577536.0, "sentence_fisher_curvature/median": 2608.0, "sentence_fisher_curvature/min": 16.5, "sentence_fisher_curvature/p25": 624.0, "sentence_fisher_curvature/p75": 244736.0, "sentence_fisher_curvature/p85": 327168.0, "sentence_fisher_curvature/p90": 372736.0, "sentence_fisher_curvature/p95": 449536.0, "sentence_fisher_curvature/p99": 513331.40625, "sentence_fisher_curvature/var": 26384865280.0, "sentence_fisher_kl_divergence": 1.0903393558692187e-05, "sentence_fisher_kl_divergence/max": 4.792213439941406e-05, "sentence_fisher_kl_divergence/median": 2.169981598854065e-07, "sentence_fisher_kl_divergence/min": 1.3751559890806675e-09, "sentence_fisher_kl_divergence/p25": 5.192123353481293e-08, "sentence_fisher_kl_divergence/p75": 2.0384788513183594e-05, "sentence_fisher_kl_divergence/p85": 2.7239322662353516e-05, "sentence_fisher_kl_divergence/p90": 3.0994415283203125e-05, "sentence_fisher_kl_divergence/p95": 3.737211227416992e-05, "sentence_fisher_kl_divergence/p99": 4.2712705180747434e-05, "sentence_fisher_kl_divergence/var": 1.8246516797493229e-10, "sentence_full_gradient_variance/max_squared_error": 1697.30615234375, "sentence_full_gradient_variance/metric": 1697.30615234375, "sentence_full_gradient_variance/p75": 1697.30615234375, "sentence_full_gradient_variance/p90": 1697.30615234375, "sentence_full_gradient_variance/p95": 1697.30615234375, "sentence_full_gradient_variance/p99": 1697.30615234375, "sentence_full_update_term": 0.02293682098388672, "sentence_full_update_term/max": 0.15625, "sentence_full_update_term/median": 0.003021240234375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.03826904296875, "sentence_full_update_term/p85": 0.0582275390625, "sentence_full_update_term/p90": 0.06591796875, "sentence_full_update_term/p95": 0.0728759765625, "sentence_full_update_term/p99": 0.08342308551073074, "sentence_full_update_term/var": 0.0008266696822829545, "sentence_hessian_coeff": 3881.33349609375, "sentence_hessian_coeff/max": 242688.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -270336.0, "sentence_hessian_coeff/p25": -16064.0, "sentence_hessian_coeff/p75": 21952.0, "sentence_hessian_coeff/p99": 241715.203125, "sentence_hessian_coeff/var": 5940775424.0, "sentence_hessian_coeff_abs": 44606.66796875, "sentence_hessian_coeff_abs/max": 270336.0, "sentence_hessian_coeff_abs/median": 16064.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 70400.0, "sentence_hessian_coeff_abs/p99": 244070.484375, "sentence_hessian_coeff_abs/var": 3945299712.0, "step": 33, "token_fisher_curvature": 117322.0703125, "token_fisher_curvature/max": 98566144.0, "token_fisher_curvature/median": 1.3704315460216776e-16, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.525566491854225e-22, "token_fisher_curvature/p75": 2.0179413695586845e-11, "token_fisher_curvature/p85": 8.149072527885437e-09, "token_fisher_curvature/p90": 1.0624062269926071e-06, "token_fisher_curvature/p95": 0.008624076843261719, "token_fisher_curvature/p99": 94136.0, "token_fisher_curvature/var": 5117663248384.0, "token_fisher_kl_divergence": 9.755617611517664e-06, "token_fisher_kl_divergence/max": 0.0081787109375, "token_fisher_kl_divergence/median": 1.1410084133116715e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.6029725670854937e-32, "token_fisher_kl_divergence/p75": 1.675867921032434e-21, "token_fisher_kl_divergence/p85": 6.776263578034403e-19, "token_fisher_kl_divergence/p90": 8.842346342977092e-17, "token_fisher_kl_divergence/p95": 7.156775172489915e-13, "token_fisher_kl_divergence/p99": 7.833819836378098e-06, "token_fisher_kl_divergence/var": 3.5377869522790206e-08, "token_full_update_term": 0.00032582576386630535, "token_full_update_term/max": 0.1357421875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.0132789611816406e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 8.187894806610529e-16, "token_full_update_term/p85": 2.8421709430404007e-12, "token_full_update_term/p90": 8.321876521222293e-11, "token_full_update_term/p95": 1.876014721347019e-08, "token_full_update_term/p99": 0.0026397705078125, "token_full_update_term/var": 2.0106583178858273e-05, "token_hessian_coeff": -2740.394287109375, "token_hessian_coeff/max": 97517568.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -12451840.0, "token_hessian_coeff/p25": -2.3748725652694702e-07, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.0048828125, "token_hessian_coeff/var": 3435861901312.0, "token_hessian_coeff_abs": 117150.671875, "token_hessian_coeff_abs/max": 97517568.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.647059202194214e-06, "token_hessian_coeff_abs/p99": 1712128.0, "token_hessian_coeff_abs/var": 3422145216512.0 }, { "accuracy_reward": 0.6458333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2311403602361679, "adam_stats/lm_head/lr_effective_max": 5.4456337238661945e-05, "adam_stats/lm_head/lr_effective_mean": -6.902752675008728e-11, "adam_stats/lm_head/lr_effective_min": -5.5782929848646745e-05, "adam_stats/lm_head/lr_effective_std": 1.4124292420092388e-06, "adam_stats/lr_effective_max": 6.83025355101563e-05, "adam_stats/lr_effective_mean": -2.072217536452925e-10, "adam_stats/lr_effective_min": -6.584107177332044e-05, "adam_stats/m_t_max": 0.0014300913317129016, "adam_stats/m_t_mean": 1.315605003410214e-11, "adam_stats/m_t_min": -0.0012534920824691653, "adam_stats/v_t_max": 2.5957195248338394e-05, "adam_stats/v_t_mean": 1.7528565404711927e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6458333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2311403602361679, "all_logprobs": -0.010923655703663826, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.75, "all_logprobs/p1": -0.251953125, "all_logprobs/p10": -4.172325134277344e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.000431060791015625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.0153580904006958, "clip_ratio": 0.0, "completion_length": 515.6666870117188, "completion_length/correct": 433.4031982421875, "completion_length/correct/max": 966.0, "completion_length/correct/median": 387.0, "completion_length/correct/min": 211.0, "completion_length/correct/p25": 350.75, "completion_length/correct/p75": 431.0, "completion_length/correct/var": 34095.62109375, "completion_length/incorrect": 665.6764526367188, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 472.0, "completion_length/incorrect/min": 376.0, "completion_length/incorrect/p25": 447.0, "completion_length/incorrect/p75": 987.25, "completion_length/incorrect/var": 73239.984375, "completion_length/max": 1024.0, "completion_length/median": 431.0, "completion_length/min": 211.0, "completion_length/p25": 360.0, "completion_length/p75": 520.25, "completion_length/var": 59804.453125, "curvature_clip_ratio_token_fisher": 0.011999030597507954, "curvature_clip_ratio_token_hessian": 0.007777149323374033, "curvature_clip_ratio_total_fisher": 0.011999030597507954, "curvature_clip_ratio_total_full": 0.011999030597507954, "curvature_clip_ratio_total_hessian": 0.007777149323374033, "epoch": 0.0544, "feature_vector_variance/max_squared_error": 70410.3046875, "feature_vector_variance/metric": 29681.966796875, "generated_tokens/total": 2049630.0, "global_fisher_curvature": 80896.0, "global_fisher_curvature/max": 80896.0, "global_fisher_curvature/median": 80896.0, "global_fisher_curvature/min": 80896.0, "global_fisher_curvature/p25": 80896.0, "global_fisher_curvature/p75": 80896.0, "global_fisher_curvature/p85": 80896.0, "global_fisher_curvature/p90": 80896.0, "global_fisher_curvature/p95": 80896.0, "global_fisher_curvature/p99": 80896.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.5267086029052734e-06, "global_fisher_kl_divergence/max": 6.5267086029052734e-06, "global_fisher_kl_divergence/median": 6.5267086029052734e-06, "global_fisher_kl_divergence/min": 6.5267086029052734e-06, "global_fisher_kl_divergence/p25": 6.5267086029052734e-06, "global_fisher_kl_divergence/p75": 6.5267086029052734e-06, "global_fisher_kl_divergence/p85": 6.5267086029052734e-06, "global_fisher_kl_divergence/p90": 6.5267086029052734e-06, "global_fisher_kl_divergence/p95": 6.5267086029052734e-06, "global_fisher_kl_divergence/p99": 6.5267086029052734e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.06640625, "global_full_update_term/max": 0.06640625, "global_full_update_term/median": 0.06640625, "global_full_update_term/min": 0.06640625, "global_full_update_term/p25": 0.06640625, "global_full_update_term/p75": 0.06640625, "global_full_update_term/p85": 0.06640625, "global_full_update_term/p90": 0.06640625, "global_full_update_term/p95": 0.06640625, "global_full_update_term/p99": 0.06640625, "global_full_update_term/var": NaN, "global_hessian_coeff": 12736.0, "global_hessian_coeff/max": 12736.0, "global_hessian_coeff/median": 12736.0, "global_hessian_coeff/min": 12736.0, "global_hessian_coeff/p25": 12736.0, "global_hessian_coeff/p75": 12736.0, "global_hessian_coeff/p99": 12736.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 12736.0, "global_hessian_coeff_abs/max": 12736.0, "global_hessian_coeff_abs/median": 12736.0, "global_hessian_coeff_abs/min": 12736.0, "global_hessian_coeff_abs/p25": 12736.0, "global_hessian_coeff_abs/p75": 12736.0, "global_hessian_coeff_abs/p99": 12736.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.03881899267435074, "learning_rate": 1.2518479547691437e-05, "loss": -0.6458, "masked_global_fisher_curvature": 332.0, "masked_global_fisher_curvature/max": 332.0, "masked_global_fisher_curvature/median": 332.0, "masked_global_fisher_curvature/min": 332.0, "masked_global_fisher_curvature/p25": 332.0, "masked_global_fisher_curvature/p75": 332.0, "masked_global_fisher_curvature/p85": 332.0, "masked_global_fisher_curvature/p90": 332.0, "masked_global_fisher_curvature/p95": 332.0, "masked_global_fisher_curvature/p99": 332.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/max": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/median": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/min": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p25": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p75": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p85": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p90": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p95": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p99": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0022430419921875, "masked_global_full_update_term/max": 0.0022430419921875, "masked_global_full_update_term/median": 0.0022430419921875, "masked_global_full_update_term/min": 0.0022430419921875, "masked_global_full_update_term/p25": 0.0022430419921875, "masked_global_full_update_term/p75": 0.0022430419921875, "masked_global_full_update_term/p85": 0.0022430419921875, "masked_global_full_update_term/p90": 0.0022430419921875, "masked_global_full_update_term/p95": 0.0022430419921875, "masked_global_full_update_term/p99": 0.0022430419921875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -3232.0, "masked_global_hessian_coeff/max": -3232.0, "masked_global_hessian_coeff/median": -3232.0, "masked_global_hessian_coeff/min": -3232.0, "masked_global_hessian_coeff/p25": -3232.0, "masked_global_hessian_coeff/p75": -3232.0, "masked_global_hessian_coeff/p99": -3232.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 3232.0, "masked_global_hessian_coeff_abs/max": 3232.0, "masked_global_hessian_coeff_abs/median": 3232.0, "masked_global_hessian_coeff_abs/min": 3232.0, "masked_global_hessian_coeff_abs/p25": 3232.0, "masked_global_hessian_coeff_abs/p75": 3232.0, "masked_global_hessian_coeff_abs/p99": 3232.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.0471599102020264, "masked_per_sentence_gradient_norm/max": 4.5625, "masked_per_sentence_gradient_norm/median": 0.404296875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 1.810546875, "masked_per_sentence_gradient_norm/var": 1.5353893041610718, "masked_per_token_gradient_norm": 0.029631849378347397, "masked_per_token_gradient_norm/max": 10.625, "masked_per_token_gradient_norm/median": 1.6209256159527285e-14, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 4.284083843231201e-08, "masked_per_token_gradient_norm/var": 0.14029087126255035, "masked_sentence_fisher_curvature": 274.48309326171875, "masked_sentence_fisher_curvature/max": 1456.0, "masked_sentence_fisher_curvature/median": 194.0, "masked_sentence_fisher_curvature/min": 11.625, "masked_sentence_fisher_curvature/p25": 142.0, "masked_sentence_fisher_curvature/p75": 346.5, "masked_sentence_fisher_curvature/p85": 446.5, "masked_sentence_fisher_curvature/p90": 650.0, "masked_sentence_fisher_curvature/p95": 750.0, "masked_sentence_fisher_curvature/p99": 1038.0013427734375, "masked_sentence_fisher_curvature/var": 59426.875, "masked_sentence_fisher_kl_divergence": 2.216817662770154e-08, "masked_sentence_fisher_kl_divergence/max": 1.1781230568885803e-07, "masked_sentence_fisher_kl_divergence/median": 1.57160684466362e-08, "masked_sentence_fisher_kl_divergence/min": 9.38598532229662e-10, "masked_sentence_fisher_kl_divergence/p25": 1.146690919995308e-08, "masked_sentence_fisher_kl_divergence/p75": 2.796878106892109e-08, "masked_sentence_fisher_kl_divergence/p85": 3.6030542105436325e-08, "masked_sentence_fisher_kl_divergence/p90": 5.25033101439476e-08, "masked_sentence_fisher_kl_divergence/p95": 6.05359673500061e-08, "masked_sentence_fisher_kl_divergence/p99": 8.374929194587821e-08, "masked_sentence_fisher_kl_divergence/var": 3.877439958635553e-16, "masked_sentence_full_gradient_variance/max_squared_error": 2.5114474296569824, "masked_sentence_full_gradient_variance/metric": 2.5114474296569824, "masked_sentence_full_gradient_variance/p75": 2.5114474296569824, "masked_sentence_full_gradient_variance/p90": 2.5114474296569824, "masked_sentence_full_gradient_variance/p95": 2.5114474296569824, "masked_sentence_full_gradient_variance/p99": 2.5114474296569824, "masked_sentence_full_update_term": 0.0007231434574350715, "masked_sentence_full_update_term/max": 0.004486083984375, "masked_sentence_full_update_term/median": 0.000270843505859375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0013446807861328125, "masked_sentence_full_update_term/p85": 0.001712799072265625, "masked_sentence_full_update_term/p90": 0.001850128173828125, "masked_sentence_full_update_term/p95": 0.00254058837890625, "masked_sentence_full_update_term/p99": 0.0027900750283151865, "masked_sentence_full_update_term/var": 8.07892376997188e-07, "masked_sentence_hessian_coeff": -8054.25, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -6528.0, "masked_sentence_hessian_coeff/min": -55552.0, "masked_sentence_hessian_coeff/p25": -10064.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 111679768.0, "masked_sentence_hessian_coeff_abs": 8054.25, "masked_sentence_hessian_coeff_abs/max": 55552.0, "masked_sentence_hessian_coeff_abs/median": 5408.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 10064.0, "masked_sentence_hessian_coeff_abs/p99": 38771.25390625, "masked_sentence_hessian_coeff_abs/var": 111679768.0, "masked_token_fisher_curvature": 303.3728332519531, "masked_token_fisher_curvature/max": 122368.0, "masked_token_fisher_curvature/median": 5.074066167232161e-17, "masked_token_fisher_curvature/min": 9.183549615799121e-41, "masked_token_fisher_curvature/p25": 2.83309359799412e-23, "masked_token_fisher_curvature/p75": 6.252776074688882e-12, "masked_token_fisher_curvature/p85": 4.336470738053322e-09, "masked_token_fisher_curvature/p90": 3.1478703022003174e-07, "masked_token_fisher_curvature/p95": 0.0024871826171875, "masked_token_fisher_curvature/p99": 4096.0, "masked_token_fisher_curvature/var": 16560147.0, "masked_token_fisher_kl_divergence": 2.450399883002774e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 4.089454932665725e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.2870418089598426e-33, "masked_token_fisher_kl_divergence/p75": 5.062345348824529e-22, "masked_token_fisher_kl_divergence/p85": 3.5067164016328034e-19, "masked_token_fisher_kl_divergence/p90": 2.5478751053409354e-17, "masked_token_fisher_kl_divergence/p95": 2.007283228522283e-13, "masked_token_fisher_kl_divergence/p99": 3.3155083656311035e-07, "masked_token_fisher_kl_divergence/var": 1.0802956565327676e-13, "masked_token_full_update_term": 1.2365379916445818e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.9637088775634766e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 7.993605777301127e-15, "masked_token_full_update_term/p85": 4.746425474877469e-12, "masked_token_full_update_term/p90": 1.0686562745831907e-10, "masked_token_full_update_term/p95": 1.0166104402742349e-08, "masked_token_full_update_term/p99": 6.866455078125e-05, "masked_token_full_update_term/var": 2.5964411065615423e-08, "masked_token_hessian_coeff": -9055.2587890625, "masked_token_hessian_coeff/max": 19.875, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -3080192.0, "masked_token_hessian_coeff/p25": -1.6763806343078613e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.003719031810760498, "masked_token_hessian_coeff/var": 14372783104.0, "masked_token_hessian_coeff_abs": 9055.263671875, "masked_token_hessian_coeff_abs/max": 3080192.0, "masked_token_hessian_coeff_abs/median": 1.3855583347321954e-12, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.150369644165039e-05, "masked_token_hessian_coeff_abs/p99": 23796.5, "masked_token_hessian_coeff_abs/var": 14372783104.0, "mean_logprobs": -0.0111083984375, "mean_logprobs/var": 7.390975952148438e-05, "num_completions/total": 3264, "per_sentence_gradient_norm": 25.320964813232422, "per_sentence_gradient_norm/max": 136.0, "per_sentence_gradient_norm/median": 8.9375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 36.5625, "per_sentence_gradient_norm/var": 997.1574096679688, "per_token_feature_norm": 187.3409423828125, "per_token_feature_norm/max": 282.0, "per_token_feature_norm/median": 187.0, "per_token_feature_norm/min": 106.0, "per_token_feature_norm/p25": 181.0, "per_token_feature_norm/p75": 194.0, "per_token_feature_norm/var": 169.71107482910156, "per_token_gradient_norm": 0.8076465129852295, "per_token_gradient_norm/max": 294.0, "per_token_gradient_norm/median": 2.7644553313166398e-14, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 6.239861249923706e-08, "per_token_gradient_norm/var": 101.90151977539062, "per_token_policy_error_norm": 0.0064849164336919785, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006173938047140837, "policy_entropy": 0.011751079931855202, "policy_entropy/max": 1.5703125, "policy_entropy/median": 2.0954757928848267e-09, "policy_entropy/min": 5.2304284492953046e-20, "policy_entropy/p25": 3.282707439211663e-12, "policy_entropy/p75": 4.3958425521850586e-07, "policy_entropy/var": 0.0061355154030025005, "policy_loss": -0.6458333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2311403602361679, "policy_sharpness": 9.670828819274902, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.0992608070373535, "reward": 0.6458333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2311403602361679, "rewards/accuracy_reward": 0.6458333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2311403602361679, "sentence_fisher_curvature": 174658.53125, "sentence_fisher_curvature/max": 1146880.0, "sentence_fisher_curvature/median": 4384.0, "sentence_fisher_curvature/min": 102.5, "sentence_fisher_curvature/p25": 1228.0, "sentence_fisher_curvature/p75": 249856.0, "sentence_fisher_curvature/p85": 454656.0, "sentence_fisher_curvature/p90": 514048.0, "sentence_fisher_curvature/p95": 683008.0, "sentence_fisher_curvature/p99": 792781.9375, "sentence_fisher_curvature/var": 58624442368.0, "sentence_fisher_kl_divergence": 1.4098221981839743e-05, "sentence_fisher_kl_divergence/max": 9.250640869140625e-05, "sentence_fisher_kl_divergence/median": 3.5390257835388184e-07, "sentence_fisher_kl_divergence/min": 8.265487849712372e-09, "sentence_fisher_kl_divergence/p25": 9.918585419654846e-08, "sentence_fisher_kl_divergence/p75": 2.0176172256469727e-05, "sentence_fisher_kl_divergence/p85": 3.6776065826416016e-05, "sentence_fisher_kl_divergence/p90": 4.1484832763671875e-05, "sentence_fisher_kl_divergence/p95": 5.519390106201172e-05, "sentence_fisher_kl_divergence/p99": 6.396779645001516e-05, "sentence_fisher_kl_divergence/var": 3.8175759775604945e-10, "sentence_full_gradient_variance/max_squared_error": 1598.421630859375, "sentence_full_gradient_variance/metric": 1598.421630859375, "sentence_full_gradient_variance/p75": 1598.421630859375, "sentence_full_gradient_variance/p90": 1598.421630859375, "sentence_full_gradient_variance/p95": 1598.421630859375, "sentence_full_gradient_variance/p99": 1598.421630859375, "sentence_full_update_term": 0.022692998871207237, "sentence_full_update_term/max": 0.11572265625, "sentence_full_update_term/median": 0.00494384765625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0345458984375, "sentence_full_update_term/p85": 0.049072265625, "sentence_full_update_term/p90": 0.057373046875, "sentence_full_update_term/p95": 0.079833984375, "sentence_full_update_term/p99": 0.10458987951278687, "sentence_full_update_term/var": 0.000792482343968004, "sentence_hessian_coeff": 16174.0, "sentence_hessian_coeff/max": 770048.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -288768.0, "sentence_hessian_coeff/p25": -41344.0, "sentence_hessian_coeff/p75": 2544.0, "sentence_hessian_coeff/p99": 441242.65625, "sentence_hessian_coeff/var": 18865190912.0, "sentence_hessian_coeff_abs": 74107.3359375, "sentence_hessian_coeff_abs/max": 770048.0, "sentence_hessian_coeff_abs/median": 33280.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 105472.0, "sentence_hessian_coeff_abs/p99": 441242.65625, "sentence_hessian_coeff_abs/var": 13579837440.0, "step": 34, "token_fisher_curvature": 140809.890625, "token_fisher_curvature/max": 95944704.0, "token_fisher_curvature/median": 6.722053469410127e-17, "token_fisher_curvature/min": 9.183549615799121e-41, "token_fisher_curvature/p25": 3.5982356646056704e-23, "token_fisher_curvature/p75": 1.0629719326971099e-11, "token_fisher_curvature/p85": 8.847564458847046e-09, "token_fisher_curvature/p90": 1.4901161193847656e-06, "token_fisher_curvature/p95": 0.05078125, "token_fisher_curvature/p99": 290688.0, "token_fisher_curvature/var": 6528978911232.0, "token_fisher_kl_divergence": 1.1372748303983826e-05, "token_fisher_kl_divergence/max": 0.00775146484375, "token_fisher_kl_divergence/median": 5.427363027920561e-27, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.900931978733274e-33, "token_fisher_kl_divergence/p75": 8.602678370551488e-22, "token_fisher_kl_divergence/p85": 7.148958074826295e-19, "token_fisher_kl_divergence/p90": 1.205632815803881e-16, "token_fisher_kl_divergence/p95": 4.092726157978177e-12, "token_fisher_kl_divergence/p99": 2.347305417060852e-05, "token_fisher_kl_divergence/var": 4.258795271994131e-08, "token_full_update_term": 0.00037719932151958346, "token_full_update_term/max": 0.1318359375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.9637088775634766e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.4988010832439613e-14, "token_full_update_term/p85": 7.673861546209082e-12, "token_full_update_term/p90": 2.3010215954855084e-10, "token_full_update_term/p95": 5.611218512058258e-08, "token_full_update_term/p99": 0.005980491638183594, "token_full_update_term/var": 2.3524284188169986e-05, "token_hessian_coeff": -988.5924682617188, "token_hessian_coeff/max": 94896128.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -12845056.0, "token_hessian_coeff/p25": -2.4437904357910156e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00787353515625, "token_hessian_coeff/var": 4289202749440.0, "token_hessian_coeff_abs": 145039.65625, "token_hessian_coeff_abs/max": 94896128.0, "token_hessian_coeff_abs/median": 2.5295321393059567e-12, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.6450881958007812e-05, "token_hessian_coeff_abs/p99": 3571200.0, "token_hessian_coeff_abs/var": 4268167266304.0 }, { "accuracy_reward": 0.5729166865348816, "accuracy_reward/correct": 0.9999999403953552, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24725879728794098, "adam_stats/lm_head/lr_effective_max": 5.184967449167743e-05, "adam_stats/lm_head/lr_effective_mean": -2.876447127361792e-12, "adam_stats/lm_head/lr_effective_min": -5.177349521545693e-05, "adam_stats/lm_head/lr_effective_std": 1.3750770904152887e-06, "adam_stats/lr_effective_max": 6.444150494644418e-05, "adam_stats/lr_effective_mean": -1.2446405017740858e-10, "adam_stats/lr_effective_min": -6.570303958142176e-05, "adam_stats/m_t_max": 0.0011657747672870755, "adam_stats/m_t_mean": 1.449850569462674e-11, "adam_stats/m_t_min": -0.001093429047614336, "adam_stats/v_t_max": 2.5931291020242497e-05, "adam_stats/v_t_mean": 1.7525359418887887e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5729166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24725879728794098, "all_logprobs": -0.009534968994557858, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.25, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -5.125999450683594e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.000270843505859375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.014142553322017193, "clip_ratio": 0.0, "completion_length": 603.1354370117188, "completion_length/correct": 513.2000122070312, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 511.0, "completion_length/correct/min": 292.0, "completion_length/correct/p25": 350.0, "completion_length/correct/p75": 628.5, "completion_length/correct/var": 30495.94140625, "completion_length/incorrect": 723.7804565429688, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 657.0, "completion_length/incorrect/min": 397.0, "completion_length/incorrect/p25": 496.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 67758.5234375, "completion_length/max": 1024.0, "completion_length/median": 521.0, "completion_length/min": 292.0, "completion_length/p25": 442.0, "completion_length/p75": 718.0, "completion_length/var": 56828.921875, "curvature_clip_ratio_token_fisher": 0.008704513311386108, "curvature_clip_ratio_token_hessian": 0.005820279475301504, "curvature_clip_ratio_total_fisher": 0.008704513311386108, "curvature_clip_ratio_total_full": 0.008704513311386108, "curvature_clip_ratio_total_hessian": 0.005820279475301504, "epoch": 0.056, "feature_vector_variance/max_squared_error": 57849.6015625, "feature_vector_variance/metric": 29268.201171875, "generated_tokens/total": 2107531.0, "global_fisher_curvature": 58624.0, "global_fisher_curvature/max": 58624.0, "global_fisher_curvature/median": 58624.0, "global_fisher_curvature/min": 58624.0, "global_fisher_curvature/p25": 58624.0, "global_fisher_curvature/p75": 58624.0, "global_fisher_curvature/p85": 58624.0, "global_fisher_curvature/p90": 58624.0, "global_fisher_curvature/p95": 58624.0, "global_fisher_curvature/p99": 58624.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.589557647705078e-06, "global_fisher_kl_divergence/max": 4.589557647705078e-06, "global_fisher_kl_divergence/median": 4.589557647705078e-06, "global_fisher_kl_divergence/min": 4.589557647705078e-06, "global_fisher_kl_divergence/p25": 4.589557647705078e-06, "global_fisher_kl_divergence/p75": 4.589557647705078e-06, "global_fisher_kl_divergence/p85": 4.589557647705078e-06, "global_fisher_kl_divergence/p90": 4.589557647705078e-06, "global_fisher_kl_divergence/p95": 4.589557647705078e-06, "global_fisher_kl_divergence/p99": 4.589557647705078e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.09130859375, "global_full_update_term/max": 0.09130859375, "global_full_update_term/median": 0.09130859375, "global_full_update_term/min": 0.09130859375, "global_full_update_term/p25": 0.09130859375, "global_full_update_term/p75": 0.09130859375, "global_full_update_term/p85": 0.09130859375, "global_full_update_term/p90": 0.09130859375, "global_full_update_term/p95": 0.09130859375, "global_full_update_term/p99": 0.09130859375, "global_full_update_term/var": NaN, "global_hessian_coeff": 12800.0, "global_hessian_coeff/max": 12800.0, "global_hessian_coeff/median": 12800.0, "global_hessian_coeff/min": 12800.0, "global_hessian_coeff/p25": 12800.0, "global_hessian_coeff/p75": 12800.0, "global_hessian_coeff/p99": 12800.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 12800.0, "global_hessian_coeff_abs/max": 12800.0, "global_hessian_coeff_abs/median": 12800.0, "global_hessian_coeff_abs/min": 12800.0, "global_hessian_coeff_abs/p25": 12800.0, "global_hessian_coeff_abs/p75": 12800.0, "global_hessian_coeff_abs/p99": 12800.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.059453777968883514, "learning_rate": 1.2320907072649045e-05, "loss": -0.5729, "masked_global_fisher_curvature": 266.0, "masked_global_fisher_curvature/max": 266.0, "masked_global_fisher_curvature/median": 266.0, "masked_global_fisher_curvature/min": 266.0, "masked_global_fisher_curvature/p25": 266.0, "masked_global_fisher_curvature/p75": 266.0, "masked_global_fisher_curvature/p85": 266.0, "masked_global_fisher_curvature/p90": 266.0, "masked_global_fisher_curvature/p95": 266.0, "masked_global_fisher_curvature/p99": 266.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/max": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/median": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/min": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/p25": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/p75": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/p85": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/p90": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/p95": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/p99": 2.0838342607021332e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0031585693359375, "masked_global_full_update_term/max": 0.0031585693359375, "masked_global_full_update_term/median": 0.0031585693359375, "masked_global_full_update_term/min": 0.0031585693359375, "masked_global_full_update_term/p25": 0.0031585693359375, "masked_global_full_update_term/p75": 0.0031585693359375, "masked_global_full_update_term/p85": 0.0031585693359375, "masked_global_full_update_term/p90": 0.0031585693359375, "masked_global_full_update_term/p95": 0.0031585693359375, "masked_global_full_update_term/p99": 0.0031585693359375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2080.0, "masked_global_hessian_coeff/max": -2080.0, "masked_global_hessian_coeff/median": -2080.0, "masked_global_hessian_coeff/min": -2080.0, "masked_global_hessian_coeff/p25": -2080.0, "masked_global_hessian_coeff/p75": -2080.0, "masked_global_hessian_coeff/p99": -2080.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2080.0, "masked_global_hessian_coeff_abs/max": 2080.0, "masked_global_hessian_coeff_abs/median": 2080.0, "masked_global_hessian_coeff_abs/min": 2080.0, "masked_global_hessian_coeff_abs/p25": 2080.0, "masked_global_hessian_coeff_abs/p75": 2080.0, "masked_global_hessian_coeff_abs/p99": 2080.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.5219320058822632, "masked_per_sentence_gradient_norm/max": 8.3125, "masked_per_sentence_gradient_norm/median": 0.9921875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.19140625, "masked_per_sentence_gradient_norm/var": 3.540835380554199, "masked_per_token_gradient_norm": 0.02396547608077526, "masked_per_token_gradient_norm/max": 9.6875, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 7.566995918750763e-09, "masked_per_token_gradient_norm/var": 0.10858067125082016, "masked_sentence_fisher_curvature": 221.986328125, "masked_sentence_fisher_curvature/max": 660.0, "masked_sentence_fisher_curvature/median": 197.0, "masked_sentence_fisher_curvature/min": 8.375, "masked_sentence_fisher_curvature/p25": 84.375, "masked_sentence_fisher_curvature/p75": 330.0, "masked_sentence_fisher_curvature/p85": 404.0, "masked_sentence_fisher_curvature/p90": 447.0, "masked_sentence_fisher_curvature/p95": 502.0, "masked_sentence_fisher_curvature/p99": 656.2000122070312, "masked_sentence_fisher_curvature/var": 26335.56640625, "masked_sentence_fisher_kl_divergence": 1.7396626361687595e-08, "masked_sentence_fisher_kl_divergence/max": 5.168840289115906e-08, "masked_sentence_fisher_kl_divergence/median": 1.548323780298233e-08, "masked_sentence_fisher_kl_divergence/min": 6.548361852765083e-10, "masked_sentence_fisher_kl_divergence/p25": 6.613845471292734e-09, "masked_sentence_fisher_kl_divergence/p75": 2.584420144557953e-08, "masked_sentence_fisher_kl_divergence/p85": 3.166496753692627e-08, "masked_sentence_fisher_kl_divergence/p90": 3.504101186990738e-08, "masked_sentence_fisher_kl_divergence/p95": 3.934837877750397e-08, "masked_sentence_fisher_kl_divergence/p99": 5.146721449023062e-08, "masked_sentence_fisher_kl_divergence/var": 1.6166312012617966e-16, "masked_sentence_full_gradient_variance/max_squared_error": 5.560462951660156, "masked_sentence_full_gradient_variance/metric": 5.560462951660156, "masked_sentence_full_gradient_variance/p75": 5.560462951660156, "masked_sentence_full_gradient_variance/p90": 5.560462951660156, "masked_sentence_full_gradient_variance/p95": 5.560462951660156, "masked_sentence_full_gradient_variance/p99": 5.560462951660156, "masked_sentence_full_update_term": 0.0010184546699747443, "masked_sentence_full_update_term/max": 0.0062255859375, "masked_sentence_full_update_term/median": 0.000713348388671875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.001239776611328125, "masked_sentence_full_update_term/p85": 0.0026397705078125, "masked_sentence_full_update_term/p90": 0.00324249267578125, "masked_sentence_full_update_term/p95": 0.0034027099609375, "masked_sentence_full_update_term/p99": 0.004776005633175373, "masked_sentence_full_update_term/var": 1.692508135420212e-06, "masked_sentence_hessian_coeff": -5620.5, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -3904.0, "masked_sentence_hessian_coeff/min": -27776.0, "masked_sentence_hessian_coeff/p25": -9616.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 41783392.0, "masked_sentence_hessian_coeff_abs": 5620.5, "masked_sentence_hessian_coeff_abs/max": 27776.0, "masked_sentence_hessian_coeff_abs/median": 3728.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 9616.0, "masked_sentence_hessian_coeff_abs/p99": 20358.423828125, "masked_sentence_hessian_coeff_abs/var": 41783392.0, "masked_token_fisher_curvature": 248.71435546875, "masked_token_fisher_curvature/max": 126976.0, "masked_token_fisher_curvature/median": 1.0148132334464322e-16, "masked_token_fisher_curvature/min": 1.8367099231598242e-40, "masked_token_fisher_curvature/p25": 1.0670629901934057e-22, "masked_token_fisher_curvature/p75": 8.469669410260394e-12, "masked_token_fisher_curvature/p85": 4.190951585769653e-09, "masked_token_fisher_curvature/p90": 6.02740328758955e-07, "masked_token_fisher_curvature/p95": 0.003204345703125, "masked_token_fisher_curvature/p99": 1600.9375, "masked_token_fisher_curvature/var": 14302406.0, "masked_token_fisher_kl_divergence": 1.948457928335756e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 7.926474375660727e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 8.377795258084476e-33, "masked_token_fisher_kl_divergence/p75": 6.6505321249263425e-22, "masked_token_fisher_kl_divergence/p85": 3.2864878353466853e-19, "masked_token_fisher_kl_divergence/p90": 4.722716900711077e-17, "masked_token_fisher_kl_divergence/p95": 2.504663143554353e-13, "masked_token_fisher_kl_divergence/p99": 1.2580130714923143e-07, "masked_token_fisher_kl_divergence/var": 8.774801225257603e-14, "masked_token_full_update_term": 1.0103103704750538e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.1101365089416504e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.914335439641036e-16, "masked_token_full_update_term/p85": 9.308109838457312e-13, "masked_token_full_update_term/p90": 2.773958840407431e-11, "masked_token_full_update_term/p95": 7.159542292356491e-09, "masked_token_full_update_term/p99": 3.528594970703125e-05, "masked_token_full_update_term/var": 2.065817561458516e-08, "masked_token_hessian_coeff": -7454.81640625, "masked_token_hessian_coeff/max": 50.25, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -3358720.0, "masked_token_hessian_coeff/p25": -1.51805579662323e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0005685389041900635, "masked_token_hessian_coeff/var": 11576801280.0, "masked_token_hessian_coeff_abs": 7454.82421875, "masked_token_hessian_coeff_abs/max": 3358720.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.6987323760986328e-06, "masked_token_hessian_coeff_abs/p99": 12672.0, "masked_token_hessian_coeff_abs/var": 11576800256.0, "mean_logprobs": -0.00958251953125, "mean_logprobs/var": 6.29425048828125e-05, "num_completions/total": 3360, "per_sentence_gradient_norm": 34.37760543823242, "per_sentence_gradient_norm/max": 195.0, "per_sentence_gradient_norm/median": 31.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 50.1875, "per_sentence_gradient_norm/var": 1726.722412109375, "per_token_feature_norm": 185.6094970703125, "per_token_feature_norm/max": 255.0, "per_token_feature_norm/median": 186.0, "per_token_feature_norm/min": 103.5, "per_token_feature_norm/p25": 179.0, "per_token_feature_norm/p75": 193.0, "per_token_feature_norm/var": 167.13946533203125, "per_token_gradient_norm": 0.6148250102996826, "per_token_gradient_norm/max": 276.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 9.89530235528946e-09, "per_token_gradient_norm/var": 77.8602066040039, "per_token_policy_error_norm": 0.0055312286131083965, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005449897609651089, "policy_entropy": 0.010326952673494816, "policy_entropy/max": 3.171875, "policy_entropy/median": 2.7212081477046013e-09, "policy_entropy/min": 6.395098751769968e-20, "policy_entropy/p25": 5.7980287238024175e-12, "policy_entropy/p75": 4.991888999938965e-07, "policy_entropy/var": 0.00565442955121398, "policy_loss": -0.5729166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24725879728794098, "policy_sharpness": 9.69483470916748, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.944585919380188, "reward": 0.5729166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24725879728794098, "rewards/accuracy_reward": 0.5729166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24725879728794098, "sentence_fisher_curvature": 123425.3125, "sentence_fisher_curvature/max": 925696.0, "sentence_fisher_curvature/median": 46080.0, "sentence_fisher_curvature/min": 8.375, "sentence_fisher_curvature/p25": 481.5, "sentence_fisher_curvature/p75": 186368.0, "sentence_fisher_curvature/p85": 274944.0, "sentence_fisher_curvature/p90": 348160.0, "sentence_fisher_curvature/p95": 444928.0, "sentence_fisher_curvature/p99": 805069.1875, "sentence_fisher_curvature/var": 31352893440.0, "sentence_fisher_kl_divergence": 9.669831342762336e-06, "sentence_fisher_kl_divergence/max": 7.2479248046875e-05, "sentence_fisher_kl_divergence/median": 3.606081008911133e-06, "sentence_fisher_kl_divergence/min": 6.548361852765083e-10, "sentence_fisher_kl_divergence/p25": 3.777677193284035e-08, "sentence_fisher_kl_divergence/p75": 1.461803913116455e-05, "sentence_fisher_kl_divergence/p85": 2.1576881408691406e-05, "sentence_fisher_kl_divergence/p90": 2.7239322662353516e-05, "sentence_fisher_kl_divergence/p95": 3.4928321838378906e-05, "sentence_fisher_kl_divergence/p99": 6.296637729974464e-05, "sentence_fisher_kl_divergence/var": 1.9230554648697051e-10, "sentence_full_gradient_variance/max_squared_error": 2832.305419921875, "sentence_full_gradient_variance/metric": 2832.305419921875, "sentence_full_gradient_variance/p75": 2832.305419921875, "sentence_full_gradient_variance/p90": 2832.305419921875, "sentence_full_gradient_variance/p95": 2832.305419921875, "sentence_full_gradient_variance/p99": 2832.305419921875, "sentence_full_update_term": 0.0273564662784338, "sentence_full_update_term/max": 0.2431640625, "sentence_full_update_term/median": 0.0203857421875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.037841796875, "sentence_full_update_term/p85": 0.0472412109375, "sentence_full_update_term/p90": 0.061767578125, "sentence_full_update_term/p95": 0.090576171875, "sentence_full_update_term/p99": 0.1522463858127594, "sentence_full_update_term/var": 0.0014195808907970786, "sentence_hessian_coeff": 4352.08349609375, "sentence_hessian_coeff/max": 393216.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -147456.0, "sentence_hessian_coeff/p25": -25440.0, "sentence_hessian_coeff/p75": 10272.0, "sentence_hessian_coeff/p99": 305664.28125, "sentence_hessian_coeff/var": 8096502784.0, "sentence_hessian_coeff_abs": 53365.41796875, "sentence_hessian_coeff_abs/max": 393216.0, "sentence_hessian_coeff_abs/median": 14720.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 96256.0, "sentence_hessian_coeff_abs/p99": 305664.28125, "sentence_hessian_coeff_abs/var": 5237797888.0, "step": 35, "token_fisher_curvature": 108438.59375, "token_fisher_curvature/max": 101187584.0, "token_fisher_curvature/median": 1.2750217548429532e-16, "token_fisher_curvature/min": 1.8367099231598242e-40, "token_fisher_curvature/p25": 1.2407709188295415e-22, "token_fisher_curvature/p75": 1.1539214028744027e-11, "token_fisher_curvature/p85": 7.799826562404633e-09, "token_fisher_curvature/p90": 1.5273690223693848e-06, "token_fisher_curvature/p95": 0.0203857421875, "token_fisher_curvature/p99": 58624.0, "token_fisher_curvature/var": 5115119927296.0, "token_fisher_kl_divergence": 8.496101145283319e-06, "token_fisher_kl_divergence/max": 0.0079345703125, "token_fisher_kl_divergence/median": 9.996445390960662e-27, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 9.725946219155541e-33, "token_fisher_kl_divergence/p75": 9.065899513581183e-22, "token_fisher_kl_divergence/p85": 6.098637220230962e-19, "token_fisher_kl_divergence/p90": 1.196959198423997e-16, "token_fisher_kl_divergence/p95": 1.5987211554602254e-12, "token_fisher_kl_divergence/p99": 4.589557647705078e-06, "token_fisher_kl_divergence/var": 3.139647830607828e-08, "token_full_update_term": 0.0002843160182237625, "token_full_update_term/max": 0.1337890625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.1101365089416504e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.787836793695988e-16, "token_full_update_term/p85": 1.3713474800169934e-12, "token_full_update_term/p90": 4.4565240386873484e-11, "token_full_update_term/p95": 2.421438694000244e-08, "token_full_update_term/p99": 0.00176239013671875, "token_full_update_term/var": 1.7542623027111404e-05, "token_hessian_coeff": -2335.87353515625, "token_hessian_coeff/max": 99614720.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -13107200.0, "token_hessian_coeff/p25": -2.0116567611694336e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00104522705078125, "token_hessian_coeff/var": 3438680473600.0, "token_hessian_coeff_abs": 109951.15625, "token_hessian_coeff_abs/max": 99614720.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.339482307434082e-06, "token_hessian_coeff_abs/p99": 1335296.0, "token_hessian_coeff_abs/var": 3426596421632.0 }, { "accuracy_reward": 0.6770833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.22094298899173737, "adam_stats/lm_head/lr_effective_max": 5.32185185875278e-05, "adam_stats/lm_head/lr_effective_mean": 1.2185845214840452e-11, "adam_stats/lm_head/lr_effective_min": -4.794418055098504e-05, "adam_stats/lm_head/lr_effective_std": 1.3612702787213493e-06, "adam_stats/lr_effective_max": 6.4402527641505e-05, "adam_stats/lr_effective_mean": -8.217377472918841e-11, "adam_stats/lr_effective_min": -6.571734411409125e-05, "adam_stats/m_t_max": 0.0010623580310493708, "adam_stats/m_t_mean": 1.3392652438437569e-11, "adam_stats/m_t_min": -0.0010371103417128325, "adam_stats/v_t_max": 2.5905841539497487e-05, "adam_stats/v_t_mean": 1.7509160354228781e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6770833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.22094298899173737, "all_logprobs": -0.012678544968366623, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.65625, "all_logprobs/p1": -0.30394530296325684, "all_logprobs/p10": -1.0132789611816406e-05, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.000919342041015625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.02094990760087967, "clip_ratio": 0.0, "completion_length": 622.0833740234375, "completion_length/correct": 526.7692260742188, "completion_length/correct/max": 909.0, "completion_length/correct/median": 551.0, "completion_length/correct/min": 206.0, "completion_length/correct/p25": 390.0, "completion_length/correct/p75": 678.0, "completion_length/correct/var": 43977.2109375, "completion_length/incorrect": 821.9354858398438, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 999.0, "completion_length/incorrect/min": 429.0, "completion_length/incorrect/p25": 573.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 50318.26171875, "completion_length/max": 1024.0, "completion_length/median": 570.0, "completion_length/min": 206.0, "completion_length/p25": 411.75, "completion_length/p75": 864.0, "completion_length/var": 64765.98046875, "curvature_clip_ratio_token_fisher": 0.013094441033899784, "curvature_clip_ratio_token_hessian": 0.008824514225125313, "curvature_clip_ratio_total_fisher": 0.013094441033899784, "curvature_clip_ratio_total_full": 0.013094441033899784, "curvature_clip_ratio_total_hessian": 0.008824514225125313, "epoch": 0.0576, "feature_vector_variance/max_squared_error": 66349.2265625, "feature_vector_variance/metric": 29428.71484375, "generated_tokens/total": 2167251.0, "global_fisher_curvature": 92672.0, "global_fisher_curvature/max": 92672.0, "global_fisher_curvature/median": 92672.0, "global_fisher_curvature/min": 92672.0, "global_fisher_curvature/p25": 92672.0, "global_fisher_curvature/p75": 92672.0, "global_fisher_curvature/p85": 92672.0, "global_fisher_curvature/p90": 92672.0, "global_fisher_curvature/p95": 92672.0, "global_fisher_curvature/p99": 92672.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.033348083496094e-06, "global_fisher_kl_divergence/max": 7.033348083496094e-06, "global_fisher_kl_divergence/median": 7.033348083496094e-06, "global_fisher_kl_divergence/min": 7.033348083496094e-06, "global_fisher_kl_divergence/p25": 7.033348083496094e-06, "global_fisher_kl_divergence/p75": 7.033348083496094e-06, "global_fisher_kl_divergence/p85": 7.033348083496094e-06, "global_fisher_kl_divergence/p90": 7.033348083496094e-06, "global_fisher_kl_divergence/p95": 7.033348083496094e-06, "global_fisher_kl_divergence/p99": 7.033348083496094e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.09326171875, "global_full_update_term/max": 0.09326171875, "global_full_update_term/median": 0.09326171875, "global_full_update_term/min": 0.09326171875, "global_full_update_term/p25": 0.09326171875, "global_full_update_term/p75": 0.09326171875, "global_full_update_term/p85": 0.09326171875, "global_full_update_term/p90": 0.09326171875, "global_full_update_term/p95": 0.09326171875, "global_full_update_term/p99": 0.09326171875, "global_full_update_term/var": NaN, "global_hessian_coeff": 17792.0, "global_hessian_coeff/max": 17792.0, "global_hessian_coeff/median": 17792.0, "global_hessian_coeff/min": 17792.0, "global_hessian_coeff/p25": 17792.0, "global_hessian_coeff/p75": 17792.0, "global_hessian_coeff/p99": 17792.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 17792.0, "global_hessian_coeff_abs/max": 17792.0, "global_hessian_coeff_abs/median": 17792.0, "global_hessian_coeff_abs/min": 17792.0, "global_hessian_coeff_abs/p25": 17792.0, "global_hessian_coeff_abs/p75": 17792.0, "global_hessian_coeff_abs/p99": 17792.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.031228654086589813, "learning_rate": 1.2117461064942437e-05, "loss": -0.6771, "masked_global_fisher_curvature": 502.0, "masked_global_fisher_curvature/max": 502.0, "masked_global_fisher_curvature/median": 502.0, "masked_global_fisher_curvature/min": 502.0, "masked_global_fisher_curvature/p25": 502.0, "masked_global_fisher_curvature/p75": 502.0, "masked_global_fisher_curvature/p85": 502.0, "masked_global_fisher_curvature/p90": 502.0, "masked_global_fisher_curvature/p95": 502.0, "masked_global_fisher_curvature/p99": 502.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/max": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/median": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/min": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/p25": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/p75": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/p85": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/p90": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/p95": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/p99": 3.818422555923462e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00579833984375, "masked_global_full_update_term/max": 0.00579833984375, "masked_global_full_update_term/median": 0.00579833984375, "masked_global_full_update_term/min": 0.00579833984375, "masked_global_full_update_term/p25": 0.00579833984375, "masked_global_full_update_term/p75": 0.00579833984375, "masked_global_full_update_term/p85": 0.00579833984375, "masked_global_full_update_term/p90": 0.00579833984375, "masked_global_full_update_term/p95": 0.00579833984375, "masked_global_full_update_term/p99": 0.00579833984375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2720.0, "masked_global_hessian_coeff/max": -2720.0, "masked_global_hessian_coeff/median": -2720.0, "masked_global_hessian_coeff/min": -2720.0, "masked_global_hessian_coeff/p25": -2720.0, "masked_global_hessian_coeff/p75": -2720.0, "masked_global_hessian_coeff/p99": -2720.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2720.0, "masked_global_hessian_coeff_abs/max": 2720.0, "masked_global_hessian_coeff_abs/median": 2720.0, "masked_global_hessian_coeff_abs/min": 2720.0, "masked_global_hessian_coeff_abs/p25": 2720.0, "masked_global_hessian_coeff_abs/p75": 2720.0, "masked_global_hessian_coeff_abs/p99": 2720.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.035459041595459, "masked_per_sentence_gradient_norm/max": 7.46875, "masked_per_sentence_gradient_norm/median": 1.7265625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.55859375, "masked_per_sentence_gradient_norm/var": 4.417130470275879, "masked_per_token_gradient_norm": 0.03636758401989937, "masked_per_token_gradient_norm/max": 11.4375, "masked_per_token_gradient_norm/median": 3.5349501104064984e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 6.426125764846802e-08, "masked_per_token_gradient_norm/var": 0.16405616700649261, "masked_sentence_fisher_curvature": 268.5159912109375, "masked_sentence_fisher_curvature/max": 756.0, "masked_sentence_fisher_curvature/median": 237.0, "masked_sentence_fisher_curvature/min": 0.330078125, "masked_sentence_fisher_curvature/p25": 135.25, "masked_sentence_fisher_curvature/p75": 378.5, "masked_sentence_fisher_curvature/p85": 444.0, "masked_sentence_fisher_curvature/p90": 479.0, "masked_sentence_fisher_curvature/p95": 612.0, "masked_sentence_fisher_curvature/p99": 702.8001708984375, "masked_sentence_fisher_curvature/var": 28383.720703125, "masked_sentence_fisher_kl_divergence": 2.037547197630829e-08, "masked_sentence_fisher_kl_divergence/max": 5.727633833885193e-08, "masked_sentence_fisher_kl_divergence/median": 1.8044374883174896e-08, "masked_sentence_fisher_kl_divergence/min": 2.5011104298755527e-11, "masked_sentence_fisher_kl_divergence/p25": 1.0244548320770264e-08, "masked_sentence_fisher_kl_divergence/p75": 2.8696376830339432e-08, "masked_sentence_fisher_kl_divergence/p85": 3.370223566889763e-08, "masked_sentence_fisher_kl_divergence/p90": 3.632158041000366e-08, "masked_sentence_fisher_kl_divergence/p95": 4.6566128730773926e-08, "masked_sentence_fisher_kl_divergence/p99": 5.329494712214e-08, "masked_sentence_fisher_kl_divergence/var": 1.633593036030564e-16, "masked_sentence_full_gradient_variance/max_squared_error": 8.164602279663086, "masked_sentence_full_gradient_variance/metric": 8.164602279663086, "masked_sentence_full_gradient_variance/p75": 8.164602279663086, "masked_sentence_full_gradient_variance/p90": 8.164602279663086, "masked_sentence_full_gradient_variance/p95": 8.164602279663086, "masked_sentence_full_gradient_variance/p99": 8.164602279663086, "masked_sentence_full_update_term": 0.0014158328995108604, "masked_sentence_full_update_term/max": 0.006805419921875, "masked_sentence_full_update_term/median": 0.000949859619140625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.002429962158203125, "masked_sentence_full_update_term/p85": 0.003025054931640625, "masked_sentence_full_update_term/p90": 0.00347900390625, "masked_sentence_full_update_term/p95": 0.0043487548828125, "masked_sentence_full_update_term/p99": 0.005500797647982836, "masked_sentence_full_update_term/var": 2.347493136767298e-06, "masked_sentence_hessian_coeff": -9087.427734375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -8704.0, "masked_sentence_hessian_coeff/min": -39168.0, "masked_sentence_hessian_coeff/p25": -14288.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 76666240.0, "masked_sentence_hessian_coeff_abs": 9087.427734375, "masked_sentence_hessian_coeff_abs/max": 39168.0, "masked_sentence_hessian_coeff_abs/median": 8640.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 14288.0, "masked_sentence_hessian_coeff_abs/p99": 35276.8125, "masked_sentence_hessian_coeff_abs/var": 76666240.0, "masked_token_fisher_curvature": 325.7025451660156, "masked_token_fisher_curvature/max": 128000.0, "masked_token_fisher_curvature/median": 1.6393136847980827e-16, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.532949756790592e-22, "masked_token_fisher_curvature/p75": 2.717115421546623e-11, "masked_token_fisher_curvature/p85": 1.3445969671010971e-08, "masked_token_fisher_curvature/p90": 1.780688762664795e-06, "masked_token_fisher_curvature/p95": 0.014404296875, "masked_token_fisher_curvature/p99": 4768.0, "masked_token_fisher_curvature/var": 18110420.0, "masked_token_fisher_kl_divergence": 2.4730031356057225e-08, "masked_token_fisher_kl_divergence/max": 9.715557098388672e-06, "masked_token_fisher_kl_divergence/median": 1.241982609179961e-26, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.447414600453152e-32, "masked_token_fisher_kl_divergence/p75": 2.064642808932357e-21, "masked_token_fisher_kl_divergence/p85": 1.0232158002831948e-18, "masked_token_fisher_kl_divergence/p90": 1.3530843112619095e-16, "masked_token_fisher_kl_divergence/p95": 1.0942358130705543e-12, "masked_token_fisher_kl_divergence/p99": 3.6135315895080566e-07, "masked_token_fisher_kl_divergence/var": 1.0441477462383789e-13, "masked_token_full_update_term": 1.5416064343298785e-05, "masked_token_full_update_term/max": 0.004180908203125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.1010637283325195e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.864375403532904e-14, "masked_token_full_update_term/p85": 9.890754881780595e-12, "masked_token_full_update_term/p90": 2.2737367544323206e-10, "masked_token_full_update_term/p95": 2.3981556296348572e-08, "masked_token_full_update_term/p99": 0.0003662109375, "masked_token_full_update_term/var": 3.077786203675714e-08, "masked_token_hessian_coeff": -11940.763671875, "masked_token_hessian_coeff/max": 484.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -3096576.0, "masked_token_hessian_coeff/p25": -3.4868717193603516e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.007076621055603027, "masked_token_hessian_coeff/var": 18783913984.0, "masked_token_hessian_coeff_abs": 11940.8486328125, "masked_token_hessian_coeff_abs/max": 3096576.0, "masked_token_hessian_coeff_abs/median": 4.615685611497611e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.8090009689331055e-05, "masked_token_hessian_coeff_abs/p99": 270336.0, "masked_token_hessian_coeff_abs/var": 18783909888.0, "mean_logprobs": -0.01239013671875, "mean_logprobs/var": 5.936622619628906e-05, "num_completions/total": 3456, "per_sentence_gradient_norm": 42.25390625, "per_sentence_gradient_norm/max": 208.0, "per_sentence_gradient_norm/median": 37.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 54.75, "per_sentence_gradient_norm/var": 2003.3460693359375, "per_token_feature_norm": 185.8822784423828, "per_token_feature_norm/max": 272.0, "per_token_feature_norm/median": 186.0, "per_token_feature_norm/min": 97.5, "per_token_feature_norm/p25": 179.0, "per_token_feature_norm/p75": 193.0, "per_token_feature_norm/var": 163.04171752929688, "per_token_gradient_norm": 0.9641063809394836, "per_token_gradient_norm/max": 290.0, "per_token_gradient_norm/median": 5.861977570020827e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 8.800998330116272e-08, "per_token_gradient_norm/var": 124.95514678955078, "per_token_policy_error_norm": 0.007115333806723356, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.00705432565882802, "policy_entropy": 0.013318927027285099, "policy_entropy/max": 2.375, "policy_entropy/median": 3.841705620288849e-09, "policy_entropy/min": 1.3552527156068805e-20, "policy_entropy/p25": 1.1482370609883219e-11, "policy_entropy/p75": 7.897615432739258e-07, "policy_entropy/var": 0.007713083643466234, "policy_loss": -0.6770833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.22094298899173737, "policy_sharpness": 9.62126636505127, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.4170379638671875, "reward": 0.6770833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.22094298899173737, "rewards/accuracy_reward": 0.6770833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.22094298899173737, "sentence_fisher_curvature": 203760.8125, "sentence_fisher_curvature/max": 794624.0, "sentence_fisher_curvature/median": 140288.0, "sentence_fisher_curvature/min": 22.875, "sentence_fisher_curvature/p25": 1096.0, "sentence_fisher_curvature/p75": 345088.0, "sentence_fisher_curvature/p85": 450048.0, "sentence_fisher_curvature/p90": 510976.0, "sentence_fisher_curvature/p95": 574464.0, "sentence_fisher_curvature/p99": 670106.0, "sentence_fisher_curvature/var": 42528346112.0, "sentence_fisher_kl_divergence": 1.5463443560292944e-05, "sentence_fisher_kl_divergence/max": 6.031990051269531e-05, "sentence_fisher_kl_divergence/median": 1.0669231414794922e-05, "sentence_fisher_kl_divergence/min": 1.7389538697898388e-09, "sentence_fisher_kl_divergence/p25": 8.323695510625839e-08, "sentence_fisher_kl_divergence/p75": 2.6226043701171875e-05, "sentence_fisher_kl_divergence/p85": 3.409385681152344e-05, "sentence_fisher_kl_divergence/p90": 3.8743019104003906e-05, "sentence_fisher_kl_divergence/p95": 4.357099533081055e-05, "sentence_fisher_kl_divergence/p99": 5.080702976556495e-05, "sentence_fisher_kl_divergence/var": 2.4491630945533416e-10, "sentence_full_gradient_variance/max_squared_error": 3717.37060546875, "sentence_full_gradient_variance/metric": 3717.37060546875, "sentence_full_gradient_variance/p75": 3717.37060546875, "sentence_full_gradient_variance/p90": 3717.37060546875, "sentence_full_gradient_variance/p95": 3717.37060546875, "sentence_full_gradient_variance/p99": 3717.37060546875, "sentence_full_update_term": 0.03450202941894531, "sentence_full_update_term/max": 0.1552734375, "sentence_full_update_term/median": 0.02587890625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.050048828125, "sentence_full_update_term/p85": 0.0670166015625, "sentence_full_update_term/p90": 0.08935546875, "sentence_full_update_term/p95": 0.112060546875, "sentence_full_update_term/p99": 0.1432129293680191, "sentence_full_update_term/var": 0.0013805023627355695, "sentence_hessian_coeff": 18179.85546875, "sentence_hessian_coeff/max": 561152.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -327680.0, "sentence_hessian_coeff/p25": -37632.0, "sentence_hessian_coeff/p75": 35488.0, "sentence_hessian_coeff/p99": 370483.8125, "sentence_hessian_coeff/var": 15079598080.0, "sentence_hessian_coeff_abs": 74263.859375, "sentence_hessian_coeff_abs/max": 561152.0, "sentence_hessian_coeff_abs/median": 37632.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 125568.0, "sentence_hessian_coeff_abs/p99": 370483.8125, "sentence_hessian_coeff_abs/var": 9840409600.0, "step": 36, "token_fisher_curvature": 181708.109375, "token_fisher_curvature/max": 101187584.0, "token_fisher_curvature/median": 2.3245294578089215e-16, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.327043144841498e-22, "token_fisher_curvature/p75": 4.7066350816749036e-11, "token_fisher_curvature/p85": 3.795139491558075e-08, "token_fisher_curvature/p90": 9.47713851928711e-06, "token_fisher_curvature/p95": 0.7271575927734375, "token_fisher_curvature/p99": 516992.0, "token_fisher_curvature/var": 9205932097536.0, "token_fisher_kl_divergence": 1.379898822051473e-05, "token_fisher_kl_divergence/max": 0.0076904296875, "token_fisher_kl_divergence/median": 1.7670484276950664e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.0444528832131953e-32, "token_fisher_kl_divergence/p75": 3.5734202462290796e-21, "token_fisher_kl_divergence/p85": 2.8866882842426556e-18, "token_fisher_kl_divergence/p90": 7.181755190543981e-16, "token_fisher_kl_divergence/p95": 5.5286442091073695e-11, "token_fisher_kl_divergence/p99": 3.9249658584594727e-05, "token_fisher_kl_divergence/var": 5.309611594839225e-08, "token_full_update_term": 0.0004494800523389131, "token_full_update_term/max": 0.1318359375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.1010637283325195e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.957145804951324e-14, "token_full_update_term/p85": 1.8189894035458565e-11, "token_full_update_term/p90": 5.093170329928398e-10, "token_full_update_term/p95": 1.648440957069397e-07, "token_full_update_term/p99": 0.007171630859375, "token_full_update_term/var": 2.8570602808031254e-05, "token_hessian_coeff": 5536.21435546875, "token_hessian_coeff/max": 99614720.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -13369344.0, "token_hessian_coeff/p25": -4.559755325317383e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.01596832275390625, "token_hessian_coeff/var": 6525367091200.0, "token_hessian_coeff_abs": 176814.609375, "token_hessian_coeff_abs/max": 99614720.0, "token_hessian_coeff_abs/median": 7.275957614183426e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.5987625122070312e-05, "token_hessian_coeff_abs/p99": 4325376.0, "token_hessian_coeff_abs/var": 6494133682176.0 }, { "accuracy_reward": 0.6666666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2245613932609558, "adam_stats/lm_head/lr_effective_max": 5.0758015277097e-05, "adam_stats/lm_head/lr_effective_mean": 5.2053094473647477e-11, "adam_stats/lm_head/lr_effective_min": -5.24843817402143e-05, "adam_stats/lm_head/lr_effective_std": 1.320635647061863e-06, "adam_stats/lr_effective_max": 6.769522588001564e-05, "adam_stats/lr_effective_mean": -1.152838727258576e-10, "adam_stats/lr_effective_min": -6.874946120660752e-05, "adam_stats/m_t_max": 0.0009988468373194337, "adam_stats/m_t_mean": 1.3433054148193069e-11, "adam_stats/m_t_min": -0.0009622002835385501, "adam_stats/v_t_max": 2.588017741800286e-05, "adam_stats/v_t_mean": 1.74948478013498e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6666666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2245613932609558, "all_logprobs": -0.009727949276566505, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -3.96875, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -4.172325134277344e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0005306238308548927, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.011944180354475975, "clip_ratio": 0.0, "completion_length": 517.78125, "completion_length/correct": 464.25, "completion_length/correct/max": 970.0, "completion_length/correct/median": 458.0, "completion_length/correct/min": 172.0, "completion_length/correct/p25": 272.0, "completion_length/correct/p75": 572.0, "completion_length/correct/var": 48305.23828125, "completion_length/incorrect": 624.84375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 522.0, "completion_length/incorrect/min": 368.0, "completion_length/incorrect/p25": 480.0, "completion_length/incorrect/p75": 825.5, "completion_length/incorrect/var": 57831.81640625, "completion_length/max": 1024.0, "completion_length/median": 499.0, "completion_length/min": 172.0, "completion_length/p25": 366.0, "completion_length/p75": 589.75, "completion_length/var": 56696.94921875, "curvature_clip_ratio_token_fisher": 0.012432856485247612, "curvature_clip_ratio_token_hessian": 0.008248335681855679, "curvature_clip_ratio_total_fisher": 0.012432856485247612, "curvature_clip_ratio_total_full": 0.012432856485247612, "curvature_clip_ratio_total_hessian": 0.008248335681855679, "epoch": 0.0592, "feature_vector_variance/max_squared_error": 68443.6953125, "feature_vector_variance/metric": 29878.384765625, "generated_tokens/total": 2216958.0, "global_fisher_curvature": 68096.0, "global_fisher_curvature/max": 68096.0, "global_fisher_curvature/median": 68096.0, "global_fisher_curvature/min": 68096.0, "global_fisher_curvature/p25": 68096.0, "global_fisher_curvature/p75": 68096.0, "global_fisher_curvature/p85": 68096.0, "global_fisher_curvature/p90": 68096.0, "global_fisher_curvature/p95": 68096.0, "global_fisher_curvature/p99": 68096.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.0067901611328125e-06, "global_fisher_kl_divergence/max": 5.0067901611328125e-06, "global_fisher_kl_divergence/median": 5.0067901611328125e-06, "global_fisher_kl_divergence/min": 5.0067901611328125e-06, "global_fisher_kl_divergence/p25": 5.0067901611328125e-06, "global_fisher_kl_divergence/p75": 5.0067901611328125e-06, "global_fisher_kl_divergence/p85": 5.0067901611328125e-06, "global_fisher_kl_divergence/p90": 5.0067901611328125e-06, "global_fisher_kl_divergence/p95": 5.0067901611328125e-06, "global_fisher_kl_divergence/p99": 5.0067901611328125e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.06201171875, "global_full_update_term/max": 0.06201171875, "global_full_update_term/median": 0.06201171875, "global_full_update_term/min": 0.06201171875, "global_full_update_term/p25": 0.06201171875, "global_full_update_term/p75": 0.06201171875, "global_full_update_term/p85": 0.06201171875, "global_full_update_term/p90": 0.06201171875, "global_full_update_term/p95": 0.06201171875, "global_full_update_term/p99": 0.06201171875, "global_full_update_term/var": NaN, "global_hessian_coeff": 19072.0, "global_hessian_coeff/max": 19072.0, "global_hessian_coeff/median": 19072.0, "global_hessian_coeff/min": 19072.0, "global_hessian_coeff/p25": 19072.0, "global_hessian_coeff/p75": 19072.0, "global_hessian_coeff/p99": 19072.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 19072.0, "global_hessian_coeff_abs/max": 19072.0, "global_hessian_coeff_abs/median": 19072.0, "global_hessian_coeff_abs/min": 19072.0, "global_hessian_coeff_abs/p25": 19072.0, "global_hessian_coeff_abs/p75": 19072.0, "global_hessian_coeff_abs/p99": 19072.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.041585132479667664, "learning_rate": 1.1908389392193549e-05, "loss": -0.6667, "masked_global_fisher_curvature": 168.0, "masked_global_fisher_curvature/max": 168.0, "masked_global_fisher_curvature/median": 168.0, "masked_global_fisher_curvature/min": 168.0, "masked_global_fisher_curvature/p25": 168.0, "masked_global_fisher_curvature/p75": 168.0, "masked_global_fisher_curvature/p85": 168.0, "masked_global_fisher_curvature/p90": 168.0, "masked_global_fisher_curvature/p95": 168.0, "masked_global_fisher_curvature/p99": 168.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/max": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/median": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/min": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/p25": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/p75": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/p85": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/p90": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/p95": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/p99": 1.234002411365509e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0014801025390625, "masked_global_full_update_term/max": 0.0014801025390625, "masked_global_full_update_term/median": 0.0014801025390625, "masked_global_full_update_term/min": 0.0014801025390625, "masked_global_full_update_term/p25": 0.0014801025390625, "masked_global_full_update_term/p75": 0.0014801025390625, "masked_global_full_update_term/p85": 0.0014801025390625, "masked_global_full_update_term/p90": 0.0014801025390625, "masked_global_full_update_term/p95": 0.0014801025390625, "masked_global_full_update_term/p99": 0.0014801025390625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1408.0, "masked_global_hessian_coeff/max": -1408.0, "masked_global_hessian_coeff/median": -1408.0, "masked_global_hessian_coeff/min": -1408.0, "masked_global_hessian_coeff/p25": -1408.0, "masked_global_hessian_coeff/p75": -1408.0, "masked_global_hessian_coeff/p99": -1408.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1408.0, "masked_global_hessian_coeff_abs/max": 1408.0, "masked_global_hessian_coeff_abs/median": 1408.0, "masked_global_hessian_coeff_abs/min": 1408.0, "masked_global_hessian_coeff_abs/p25": 1408.0, "masked_global_hessian_coeff_abs/p75": 1408.0, "masked_global_hessian_coeff_abs/p99": 1408.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.0783488750457764, "masked_per_sentence_gradient_norm/max": 7.5625, "masked_per_sentence_gradient_norm/median": 0.66015625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.03125, "masked_per_sentence_gradient_norm/var": 1.5016261339187622, "masked_per_token_gradient_norm": 0.03525564447045326, "masked_per_token_gradient_norm/max": 10.375, "masked_per_token_gradient_norm/median": 6.501466032204917e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 8.055940270423889e-08, "masked_per_token_gradient_norm/var": 0.15794852375984192, "masked_sentence_fisher_curvature": 189.75247192382812, "masked_sentence_fisher_curvature/max": 596.0, "masked_sentence_fisher_curvature/median": 156.0, "masked_sentence_fisher_curvature/min": 0.037109375, "masked_sentence_fisher_curvature/p25": 56.25, "masked_sentence_fisher_curvature/p75": 282.5, "masked_sentence_fisher_curvature/p85": 367.5, "masked_sentence_fisher_curvature/p90": 418.0, "masked_sentence_fisher_curvature/p95": 458.5, "masked_sentence_fisher_curvature/p99": 580.800048828125, "masked_sentence_fisher_curvature/var": 21946.705078125, "masked_sentence_fisher_kl_divergence": 1.393574233787831e-08, "masked_sentence_fisher_kl_divergence/max": 4.377216100692749e-08, "masked_sentence_fisher_kl_divergence/median": 1.146690919995308e-08, "masked_sentence_fisher_kl_divergence/min": 2.7284841053187847e-12, "masked_sentence_fisher_kl_divergence/p25": 4.132743924856186e-09, "masked_sentence_fisher_kl_divergence/p75": 2.075103111565113e-08, "masked_sentence_fisher_kl_divergence/p85": 2.7008354663848877e-08, "masked_sentence_fisher_kl_divergence/p90": 3.0675437301397324e-08, "masked_sentence_fisher_kl_divergence/p95": 3.370223566889763e-08, "masked_sentence_fisher_kl_divergence/p99": 4.266621900228529e-08, "masked_sentence_fisher_kl_divergence/var": 1.18440471430781e-16, "masked_sentence_full_gradient_variance/max_squared_error": 2.560441493988037, "masked_sentence_full_gradient_variance/metric": 2.560441493988037, "masked_sentence_full_gradient_variance/p75": 2.560441493988037, "masked_sentence_full_gradient_variance/p90": 2.560441493988037, "masked_sentence_full_gradient_variance/p95": 2.560441493988037, "masked_sentence_full_gradient_variance/p99": 2.560441493988037, "masked_sentence_full_update_term": 0.0008275384898297489, "masked_sentence_full_update_term/max": 0.00732421875, "masked_sentence_full_update_term/median": 0.0003566741943359375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0016307830810546875, "masked_sentence_full_update_term/p85": 0.0019683837890625, "masked_sentence_full_update_term/p90": 0.00215911865234375, "masked_sentence_full_update_term/p95": 0.002277374267578125, "masked_sentence_full_update_term/p99": 0.002801528200507164, "masked_sentence_full_update_term/var": 1.1614018831096473e-06, "masked_sentence_hessian_coeff": -6581.73974609375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -6336.0, "masked_sentence_hessian_coeff/min": -23296.0, "masked_sentence_hessian_coeff/p25": -9328.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 43845528.0, "masked_sentence_hessian_coeff_abs": 6581.73974609375, "masked_sentence_hessian_coeff_abs/max": 23296.0, "masked_sentence_hessian_coeff_abs/median": 5664.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 9328.0, "masked_sentence_hessian_coeff_abs/p99": 20012.810546875, "masked_sentence_hessian_coeff_abs/var": 43845528.0, "masked_token_fisher_curvature": 297.4579162597656, "masked_token_fisher_curvature/max": 135168.0, "masked_token_fisher_curvature/median": 8.370040771588094e-17, "masked_token_fisher_curvature/min": 9.183549615799121e-41, "masked_token_fisher_curvature/p25": 1.0132962503774589e-22, "masked_token_fisher_curvature/p75": 8.526512829121202e-12, "masked_token_fisher_curvature/p85": 3.725290298461914e-09, "masked_token_fisher_curvature/p90": 3.594905138015747e-07, "masked_token_fisher_curvature/p95": 0.00299072265625, "masked_token_fisher_curvature/p99": 2457.6875, "masked_token_fisher_curvature/var": 17822014.0, "masked_token_fisher_kl_divergence": 2.184010128303271e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 6.134182398998588e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 7.462978534500539e-33, "masked_token_fisher_kl_divergence/p75": 6.253485430900889e-22, "masked_token_fisher_kl_divergence/p85": 2.727446090158847e-19, "masked_token_fisher_kl_divergence/p90": 2.6346112791397758e-17, "masked_token_fisher_kl_divergence/p95": 2.1938006966593093e-13, "masked_token_fisher_kl_divergence/p99": 1.804219209589064e-07, "masked_token_fisher_kl_divergence/var": 9.61342889516971e-14, "masked_token_full_update_term": 1.4507123523799237e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.557652235031128e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.352873534367973e-14, "masked_token_full_update_term/p85": 6.139089236967266e-12, "masked_token_full_update_term/p90": 1.2478196254051e-10, "masked_token_full_update_term/p95": 1.57160684466362e-08, "masked_token_full_update_term/p99": 0.00036453455686569214, "masked_token_full_update_term/var": 2.8603935220417043e-08, "masked_token_hessian_coeff": -11518.794921875, "masked_token_hessian_coeff/max": 94.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -3522560.0, "masked_token_hessian_coeff/p25": -5.811452865600586e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0034637451171875, "masked_token_hessian_coeff/var": 18668974080.0, "masked_token_hessian_coeff_abs": 11518.80859375, "masked_token_hessian_coeff_abs/max": 3522560.0, "masked_token_hessian_coeff_abs/median": 8.321876521222293e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.3126602172851562e-05, "masked_token_hessian_coeff_abs/p99": 253952.0, "masked_token_hessian_coeff_abs/var": 18668974080.0, "mean_logprobs": -0.00860595703125, "mean_logprobs/var": 3.409385681152344e-05, "num_completions/total": 3552, "per_sentence_gradient_norm": 30.16975975036621, "per_sentence_gradient_norm/max": 164.0, "per_sentence_gradient_norm/median": 11.3125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 55.8125, "per_sentence_gradient_norm/var": 1431.799072265625, "per_token_feature_norm": 186.697998046875, "per_token_feature_norm/max": 280.0, "per_token_feature_norm/median": 187.0, "per_token_feature_norm/min": 96.5, "per_token_feature_norm/p25": 180.0, "per_token_feature_norm/p75": 194.0, "per_token_feature_norm/var": 203.2331085205078, "per_token_gradient_norm": 0.8702328205108643, "per_token_gradient_norm/max": 324.0, "per_token_gradient_norm/median": 9.592326932761353e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.0849907994270325e-07, "per_token_gradient_norm/var": 112.23095703125, "per_token_policy_error_norm": 0.005862086080014706, "per_token_policy_error_norm/max": 1.90625, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005412365309894085, "policy_entropy": 0.011059760116040707, "policy_entropy/max": 1.78125, "policy_entropy/median": 3.026798367500305e-09, "policy_entropy/min": 2.8587361969832636e-20, "policy_entropy/p25": 5.7838178690872155e-12, "policy_entropy/p75": 4.6938657760620117e-07, "policy_entropy/var": 0.005597401410341263, "policy_loss": -0.6666666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2245613932609558, "policy_sharpness": 9.66708755493164, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.0921287536621094, "reward": 0.6666666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2245613932609558, "rewards/accuracy_reward": 0.6666666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2245613932609558, "sentence_fisher_curvature": 151899.765625, "sentence_fisher_curvature/max": 638976.0, "sentence_fisher_curvature/median": 97792.0, "sentence_fisher_curvature/min": 11.4375, "sentence_fisher_curvature/p25": 982.0, "sentence_fisher_curvature/p75": 247808.0, "sentence_fisher_curvature/p85": 378880.0, "sentence_fisher_curvature/p90": 394240.0, "sentence_fisher_curvature/p95": 489984.0, "sentence_fisher_curvature/p99": 580608.1875, "sentence_fisher_curvature/var": 28979476480.0, "sentence_fisher_kl_divergence": 1.114902534027351e-05, "sentence_fisher_kl_divergence/max": 4.696846008300781e-05, "sentence_fisher_kl_divergence/median": 7.18235969543457e-06, "sentence_fisher_kl_divergence/min": 8.403731044381857e-10, "sentence_fisher_kl_divergence/p25": 7.194466888904572e-08, "sentence_fisher_kl_divergence/p75": 1.823902130126953e-05, "sentence_fisher_kl_divergence/p85": 2.777576446533203e-05, "sentence_fisher_kl_divergence/p90": 2.8967857360839844e-05, "sentence_fisher_kl_divergence/p95": 3.600120544433594e-05, "sentence_fisher_kl_divergence/p99": 4.2665018554544076e-05, "sentence_fisher_kl_divergence/var": 1.5622669824466584e-10, "sentence_full_gradient_variance/max_squared_error": 2281.5986328125, "sentence_full_gradient_variance/metric": 2281.5986328125, "sentence_full_gradient_variance/p75": 2281.5986328125, "sentence_full_gradient_variance/p90": 2281.5986328125, "sentence_full_gradient_variance/p95": 2281.5986328125, "sentence_full_gradient_variance/p99": 2281.5986328125, "sentence_full_update_term": 0.023834228515625, "sentence_full_update_term/max": 0.142578125, "sentence_full_update_term/median": 0.00543212890625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.03466796875, "sentence_full_update_term/p85": 0.058837890625, "sentence_full_update_term/p90": 0.069580078125, "sentence_full_update_term/p95": 0.103515625, "sentence_full_update_term/p99": 0.1138184517621994, "sentence_full_update_term/var": 0.0010827204678207636, "sentence_hessian_coeff": 12411.333984375, "sentence_hessian_coeff/max": 372736.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -165888.0, "sentence_hessian_coeff/p25": -37184.0, "sentence_hessian_coeff/p75": 32768.0, "sentence_hessian_coeff/p99": 291021.0625, "sentence_hessian_coeff/var": 8676528128.0, "sentence_hessian_coeff_abs": 56964.66796875, "sentence_hessian_coeff_abs/max": 372736.0, "sentence_hessian_coeff_abs/median": 35328.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 82944.0, "sentence_hessian_coeff_abs/p99": 291021.0625, "sentence_hessian_coeff_abs/var": 5553059840.0, "step": 37, "token_fisher_curvature": 157656.53125, "token_fisher_curvature/max": 95944704.0, "token_fisher_curvature/median": 1.1188966420050406e-16, "token_fisher_curvature/min": 9.183549615799121e-41, "token_fisher_curvature/p25": 1.2324991127040112e-22, "token_fisher_curvature/p75": 1.4438228390645236e-11, "token_fisher_curvature/p85": 8.381903171539307e-09, "token_fisher_curvature/p90": 1.2889504432678223e-06, "token_fisher_curvature/p95": 0.171875, "token_fisher_curvature/p99": 301056.0, "token_fisher_curvature/var": 7484119449600.0, "token_fisher_kl_divergence": 1.1570694368856493e-05, "token_fisher_kl_divergence/max": 0.007049560546875, "token_fisher_kl_divergence/median": 8.229396963265595e-27, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 9.051870738620009e-33, "token_fisher_kl_divergence/p75": 1.0587911840678754e-21, "token_fisher_kl_divergence/p85": 6.166399856011306e-19, "token_fisher_kl_divergence/p90": 9.454242944073599e-17, "token_fisher_kl_divergence/p95": 1.261923898709938e-11, "token_fisher_kl_divergence/p99": 2.205371856689453e-05, "token_fisher_kl_divergence/var": 4.030933453691432e-08, "token_full_update_term": 0.00039401568938046694, "token_full_update_term/max": 0.125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.557652235031128e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.906386491005833e-14, "token_full_update_term/p85": 1.1198153515579179e-11, "token_full_update_term/p90": 2.8884983294119593e-10, "token_full_update_term/p95": 8.614733815193176e-08, "token_full_update_term/p99": 0.005735516548156738, "token_full_update_term/var": 2.3786531528458e-05, "token_hessian_coeff": -8566.3017578125, "token_hessian_coeff/max": 92798976.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -13565952.0, "token_hessian_coeff/p25": -7.808208465576172e-06, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.006256103515625, "token_hessian_coeff/var": 4564121026560.0, "token_hessian_coeff_abs": 161068.328125, "token_hessian_coeff_abs/max": 92798976.0, "token_hessian_coeff_abs/median": 1.3733369996771216e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.9921531677246094e-05, "token_hessian_coeff_abs/p99": 4419840.0, "token_hessian_coeff_abs/var": 4538251608064.0 }, { "accuracy_reward": 0.7395833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.19462719559669495, "adam_stats/lm_head/lr_effective_max": 4.7529400035273284e-05, "adam_stats/lm_head/lr_effective_mean": 5.45190767531345e-11, "adam_stats/lm_head/lr_effective_min": -5.535571472137235e-05, "adam_stats/lm_head/lr_effective_std": 1.240880010300316e-06, "adam_stats/lr_effective_max": 5.824497202411294e-05, "adam_stats/lr_effective_mean": -8.886033575628005e-11, "adam_stats/lr_effective_min": -5.900842370465398e-05, "adam_stats/m_t_max": 0.0008934308425523341, "adam_stats/m_t_mean": 1.2642913625737773e-11, "adam_stats/m_t_min": -0.0008819065988063812, "adam_stats/v_t_max": 2.585432775958907e-05, "adam_stats/v_t_mean": 1.7478183613958698e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7395833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.19462719559669495, "all_logprobs": -0.008168931119143963, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -9.5367431640625e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -4.315376281738281e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012018648907542229, "clip_ratio": 0.0, "completion_length": 658.3854370117188, "completion_length/correct": 639.6901245117188, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 672.0, "completion_length/correct/min": 262.0, "completion_length/correct/p25": 440.5, "completion_length/correct/p75": 782.5, "completion_length/correct/var": 51929.4453125, "completion_length/incorrect": 711.47998046875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 573.0, "completion_length/incorrect/min": 333.0, "completion_length/incorrect/p25": 517.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 73123.9296875, "completion_length/max": 1024.0, "completion_length/median": 652.0, "completion_length/min": 262.0, "completion_length/p25": 484.0, "completion_length/p75": 819.0, "completion_length/var": 57740.27734375, "curvature_clip_ratio_token_fisher": 0.010315639898180962, "curvature_clip_ratio_token_hessian": 0.0073570129461586475, "curvature_clip_ratio_total_fisher": 0.010315639898180962, "curvature_clip_ratio_total_full": 0.010315639898180962, "curvature_clip_ratio_total_hessian": 0.0073570129461586475, "epoch": 0.0608, "feature_vector_variance/max_squared_error": 61161.6171875, "feature_vector_variance/metric": 29180.021484375, "generated_tokens/total": 2280163.0, "global_fisher_curvature": 66048.0, "global_fisher_curvature/max": 66048.0, "global_fisher_curvature/median": 66048.0, "global_fisher_curvature/min": 66048.0, "global_fisher_curvature/p25": 66048.0, "global_fisher_curvature/p75": 66048.0, "global_fisher_curvature/p85": 66048.0, "global_fisher_curvature/p90": 66048.0, "global_fisher_curvature/p95": 66048.0, "global_fisher_curvature/p99": 66048.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.678964614868164e-06, "global_fisher_kl_divergence/max": 4.678964614868164e-06, "global_fisher_kl_divergence/median": 4.678964614868164e-06, "global_fisher_kl_divergence/min": 4.678964614868164e-06, "global_fisher_kl_divergence/p25": 4.678964614868164e-06, "global_fisher_kl_divergence/p75": 4.678964614868164e-06, "global_fisher_kl_divergence/p85": 4.678964614868164e-06, "global_fisher_kl_divergence/p90": 4.678964614868164e-06, "global_fisher_kl_divergence/p95": 4.678964614868164e-06, "global_fisher_kl_divergence/p99": 4.678964614868164e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.06494140625, "global_full_update_term/max": 0.06494140625, "global_full_update_term/median": 0.06494140625, "global_full_update_term/min": 0.06494140625, "global_full_update_term/p25": 0.06494140625, "global_full_update_term/p75": 0.06494140625, "global_full_update_term/p85": 0.06494140625, "global_full_update_term/p90": 0.06494140625, "global_full_update_term/p95": 0.06494140625, "global_full_update_term/p99": 0.06494140625, "global_full_update_term/var": NaN, "global_hessian_coeff": 16768.0, "global_hessian_coeff/max": 16768.0, "global_hessian_coeff/median": 16768.0, "global_hessian_coeff/min": 16768.0, "global_hessian_coeff/p25": 16768.0, "global_hessian_coeff/p75": 16768.0, "global_hessian_coeff/p99": 16768.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 16768.0, "global_hessian_coeff_abs/max": 16768.0, "global_hessian_coeff_abs/median": 16768.0, "global_hessian_coeff_abs/min": 16768.0, "global_hessian_coeff_abs/p25": 16768.0, "global_hessian_coeff_abs/p75": 16768.0, "global_hessian_coeff_abs/p99": 16768.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.025254517793655396, "learning_rate": 1.1693946776030601e-05, "loss": -0.7396, "masked_global_fisher_curvature": 326.0, "masked_global_fisher_curvature/max": 326.0, "masked_global_fisher_curvature/median": 326.0, "masked_global_fisher_curvature/min": 326.0, "masked_global_fisher_curvature/p25": 326.0, "masked_global_fisher_curvature/p75": 326.0, "masked_global_fisher_curvature/p85": 326.0, "masked_global_fisher_curvature/p90": 326.0, "masked_global_fisher_curvature/p95": 326.0, "masked_global_fisher_curvature/p99": 326.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/max": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/median": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/min": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p25": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p75": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p85": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p90": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p95": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/p99": 2.3166649043560028e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00153350830078125, "masked_global_full_update_term/max": 0.00153350830078125, "masked_global_full_update_term/median": 0.00153350830078125, "masked_global_full_update_term/min": 0.00153350830078125, "masked_global_full_update_term/p25": 0.00153350830078125, "masked_global_full_update_term/p75": 0.00153350830078125, "masked_global_full_update_term/p85": 0.00153350830078125, "masked_global_full_update_term/p90": 0.00153350830078125, "masked_global_full_update_term/p95": 0.00153350830078125, "masked_global_full_update_term/p99": 0.00153350830078125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1648.0, "masked_global_hessian_coeff/max": -1648.0, "masked_global_hessian_coeff/median": -1648.0, "masked_global_hessian_coeff/min": -1648.0, "masked_global_hessian_coeff/p25": -1648.0, "masked_global_hessian_coeff/p75": -1648.0, "masked_global_hessian_coeff/p99": -1648.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1648.0, "masked_global_hessian_coeff_abs/max": 1648.0, "masked_global_hessian_coeff_abs/median": 1648.0, "masked_global_hessian_coeff_abs/min": 1648.0, "masked_global_hessian_coeff_abs/p25": 1648.0, "masked_global_hessian_coeff_abs/p75": 1648.0, "masked_global_hessian_coeff_abs/p99": 1648.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.687970519065857, "masked_per_sentence_gradient_norm/max": 7.9375, "masked_per_sentence_gradient_norm/median": 0.90234375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.8671875, "masked_per_sentence_gradient_norm/var": 3.5169403553009033, "masked_per_token_gradient_norm": 0.024224024266004562, "masked_per_token_gradient_norm/max": 10.9375, "masked_per_token_gradient_norm/median": 4.001776687800884e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 8.89413058757782e-08, "masked_per_token_gradient_norm/var": 0.10841105878353119, "masked_sentence_fisher_curvature": 184.1123504638672, "masked_sentence_fisher_curvature/max": 800.0, "masked_sentence_fisher_curvature/median": 148.0, "masked_sentence_fisher_curvature/min": 0.0654296875, "masked_sentence_fisher_curvature/p25": 57.1875, "masked_sentence_fisher_curvature/p75": 248.25, "masked_sentence_fisher_curvature/p85": 299.5, "masked_sentence_fisher_curvature/p90": 375.0, "masked_sentence_fisher_curvature/p95": 486.0, "masked_sentence_fisher_curvature/p99": 758.2001342773438, "masked_sentence_fisher_curvature/var": 25055.373046875, "masked_sentence_fisher_kl_divergence": 1.3056157932567203e-08, "masked_sentence_fisher_kl_divergence/max": 5.681067705154419e-08, "masked_sentence_fisher_kl_divergence/median": 1.0477378964424133e-08, "masked_sentence_fisher_kl_divergence/min": 4.632738637155853e-12, "masked_sentence_fisher_kl_divergence/p25": 4.045432433485985e-09, "masked_sentence_fisher_kl_divergence/p75": 1.760781742632389e-08, "masked_sentence_fisher_kl_divergence/p85": 2.121669240295887e-08, "masked_sentence_fisher_kl_divergence/p90": 2.6600901037454605e-08, "masked_sentence_fisher_kl_divergence/p95": 3.451714292168617e-08, "masked_sentence_fisher_kl_divergence/p99": 5.3714039438546024e-08, "masked_sentence_fisher_kl_divergence/var": 1.261274939504608e-16, "masked_sentence_full_gradient_variance/max_squared_error": 6.095174789428711, "masked_sentence_full_gradient_variance/metric": 6.095174789428711, "masked_sentence_full_gradient_variance/p75": 6.095174789428711, "masked_sentence_full_gradient_variance/p90": 6.095174789428711, "masked_sentence_full_gradient_variance/p95": 6.095174789428711, "masked_sentence_full_gradient_variance/p99": 6.095174789428711, "masked_sentence_full_update_term": 0.0009969578823074698, "masked_sentence_full_update_term/max": 0.005340576171875, "masked_sentence_full_update_term/median": 0.00055694580078125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00164794921875, "masked_sentence_full_update_term/p85": 0.00201416015625, "masked_sentence_full_update_term/p90": 0.00238800048828125, "masked_sentence_full_update_term/p95": 0.003185272216796875, "masked_sentence_full_update_term/p99": 0.004470827989280224, "masked_sentence_full_update_term/var": 1.3109122392052086e-06, "masked_sentence_hessian_coeff": -5739.5146484375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -3584.0, "masked_sentence_hessian_coeff/min": -32000.0, "masked_sentence_hessian_coeff/p25": -7824.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 40892988.0, "masked_sentence_hessian_coeff_abs": 5739.5146484375, "masked_sentence_hessian_coeff_abs/max": 32000.0, "masked_sentence_hessian_coeff_abs/median": 3584.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 7824.0, "masked_sentence_hessian_coeff_abs/p99": 24339.224609375, "masked_sentence_hessian_coeff_abs/var": 40892988.0, "masked_token_fisher_curvature": 219.23257446289062, "masked_token_fisher_curvature/max": 137216.0, "masked_token_fisher_curvature/median": 4.1470733097570545e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.9128551665288765e-23, "masked_token_fisher_curvature/p75": 5.400124791776761e-13, "masked_token_fisher_curvature/p85": 2.582964953035116e-10, "masked_token_fisher_curvature/p90": 2.3632310330867767e-08, "masked_token_fisher_curvature/p95": 8.96453857421875e-05, "masked_token_fisher_curvature/p99": 1072.0, "masked_token_fisher_curvature/var": 13863069.0, "masked_token_fisher_kl_divergence": 1.5542614306696123e-08, "masked_token_fisher_kl_divergence/max": 9.715557098388672e-06, "masked_token_fisher_kl_divergence/median": 2.934562567422164e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.3541694921472752e-33, "masked_token_fisher_kl_divergence/p75": 3.825710333057753e-23, "masked_token_fisher_kl_divergence/p85": 1.8317087484374245e-20, "masked_token_fisher_kl_divergence/p90": 1.6737371037744975e-18, "masked_token_fisher_kl_divergence/p95": 6.356026815979021e-15, "masked_token_fisher_kl_divergence/p99": 7.59027898311615e-08, "masked_token_fisher_kl_divergence/var": 6.968235802826475e-14, "masked_token_full_update_term": 9.910628250509035e-06, "masked_token_full_update_term/max": 0.004150390625, "masked_token_full_update_term/median": 1.5500702934753696e-19, "masked_token_full_update_term/min": -1.8812716007232666e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 7.904787935331115e-14, "masked_token_full_update_term/p85": 8.697043085703626e-12, "masked_token_full_update_term/p90": 1.418811734765768e-10, "masked_token_full_update_term/p95": 1.100124791264534e-08, "masked_token_full_update_term/p99": 5.005858838558197e-05, "masked_token_full_update_term/var": 1.91488336298562e-08, "masked_token_hessian_coeff": -8020.82568359375, "masked_token_hessian_coeff/max": 40.5, "masked_token_hessian_coeff/median": -1.0413714335300028e-10, "masked_token_hessian_coeff/min": -3325952.0, "masked_token_hessian_coeff/p25": -6.258487701416016e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.003082275390625, "masked_token_hessian_coeff/var": 12892681216.0, "masked_token_hessian_coeff_abs": 8020.8369140625, "masked_token_hessian_coeff_abs/max": 3325952.0, "masked_token_hessian_coeff_abs/median": 7.741618901491165e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.2411346435546875e-05, "masked_token_hessian_coeff_abs/p99": 17664.0, "masked_token_hessian_coeff_abs/var": 12892682240.0, "mean_logprobs": -0.008544921875, "mean_logprobs/var": 4.029273986816406e-05, "num_completions/total": 3648, "per_sentence_gradient_norm": 44.466796875, "per_sentence_gradient_norm/max": 211.0, "per_sentence_gradient_norm/median": 24.875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 77.0, "per_sentence_gradient_norm/var": 2548.240966796875, "per_token_feature_norm": 186.74728393554688, "per_token_feature_norm/max": 264.0, "per_token_feature_norm/median": 187.0, "per_token_feature_norm/min": 102.0, "per_token_feature_norm/p25": 180.0, "per_token_feature_norm/p75": 194.0, "per_token_feature_norm/var": 169.243408203125, "per_token_gradient_norm": 0.8014640808105469, "per_token_gradient_norm/max": 296.0, "per_token_gradient_norm/median": 4.7975845518521965e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.0943040251731873e-07, "per_token_gradient_norm/var": 106.47900390625, "per_token_policy_error_norm": 0.004901337902992964, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004831935744732618, "policy_entropy": 0.008313121274113655, "policy_entropy/max": 1.5390625, "policy_entropy/median": 7.639755494892597e-10, "policy_entropy/min": 1.164670302474663e-20, "policy_entropy/p25": 2.1742607714259066e-12, "policy_entropy/p75": 1.424923539161682e-07, "policy_entropy/var": 0.004287299700081348, "policy_loss": -0.7395833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.19462719559669495, "policy_sharpness": 9.766578674316406, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.530378818511963, "reward": 0.7395833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.19462719559669495, "rewards/accuracy_reward": 0.7395833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.19462719559669495, "sentence_fisher_curvature": 158493.0625, "sentence_fisher_curvature/max": 1056768.0, "sentence_fisher_curvature/median": 136192.0, "sentence_fisher_curvature/min": 75.0, "sentence_fisher_curvature/p25": 1128.0, "sentence_fisher_curvature/p75": 194304.0, "sentence_fisher_curvature/p85": 348672.0, "sentence_fisher_curvature/p90": 406528.0, "sentence_fisher_curvature/p95": 474112.0, "sentence_fisher_curvature/p99": 683214.0, "sentence_fisher_curvature/var": 33707517952.0, "sentence_fisher_kl_divergence": 1.123351466958411e-05, "sentence_fisher_kl_divergence/max": 7.486343383789062e-05, "sentence_fisher_kl_divergence/median": 9.655952453613281e-06, "sentence_fisher_kl_divergence/min": 5.326000973582268e-09, "sentence_fisher_kl_divergence/p25": 8.009374141693115e-08, "sentence_fisher_kl_divergence/p75": 1.3753771781921387e-05, "sentence_fisher_kl_divergence/p85": 2.4706125259399414e-05, "sentence_fisher_kl_divergence/p90": 2.8848648071289062e-05, "sentence_fisher_kl_divergence/p95": 3.361701965332031e-05, "sentence_fisher_kl_divergence/p99": 4.836329389945604e-05, "sentence_fisher_kl_divergence/var": 1.6925041923521178e-10, "sentence_full_gradient_variance/max_squared_error": 4435.7431640625, "sentence_full_gradient_variance/metric": 4435.7431640625, "sentence_full_gradient_variance/p75": 4435.7431640625, "sentence_full_gradient_variance/p90": 4435.7431640625, "sentence_full_gradient_variance/p95": 4435.7431640625, "sentence_full_gradient_variance/p99": 4435.7431640625, "sentence_full_update_term": 0.03236071392893791, "sentence_full_update_term/max": 0.1728515625, "sentence_full_update_term/median": 0.0267333984375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0533447265625, "sentence_full_update_term/p85": 0.0672607421875, "sentence_full_update_term/p90": 0.07470703125, "sentence_full_update_term/p95": 0.092529296875, "sentence_full_update_term/p99": 0.14687508344650269, "sentence_full_update_term/var": 0.0012106142239645123, "sentence_hessian_coeff": 25045.66796875, "sentence_hessian_coeff/max": 724992.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -96768.0, "sentence_hessian_coeff/p25": -33920.0, "sentence_hessian_coeff/p75": 31488.0, "sentence_hessian_coeff/p99": 405914.625, "sentence_hessian_coeff/var": 11898664960.0, "sentence_hessian_coeff_abs": 54883.0, "sentence_hessian_coeff_abs/max": 724992.0, "sentence_hessian_coeff_abs/median": 33792.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 68096.0, "sentence_hessian_coeff_abs/p99": 405914.625, "sentence_hessian_coeff_abs/var": 9488702464.0, "step": 38, "token_fisher_curvature": 157149.734375, "token_fisher_curvature/max": 112721920.0, "token_fisher_curvature/median": 5.231275482242559e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.1713491079516976e-23, "token_fisher_curvature/p75": 7.993605777301127e-13, "token_fisher_curvature/p85": 5.311449058353901e-10, "token_fisher_curvature/p90": 6.574919098056853e-08, "token_fisher_curvature/p95": 0.0009613037109375, "token_fisher_curvature/p99": 162696.0, "token_fisher_curvature/var": 7850177331200.0, "token_fisher_kl_divergence": 1.1140866263303906e-05, "token_fisher_kl_divergence/max": 0.00799560546875, "token_fisher_kl_divergence/median": 3.7076462545387555e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.5407439555097887e-33, "token_fisher_kl_divergence/p75": 5.66618719598824e-23, "token_fisher_kl_divergence/p85": 3.7692966152816365e-20, "token_fisher_kl_divergence/p90": 4.6673632976080084e-18, "token_fisher_kl_divergence/p95": 6.794564910705958e-14, "token_fisher_kl_divergence/p99": 1.1553987860679626e-05, "token_fisher_kl_divergence/var": 3.9443879273903804e-08, "token_full_update_term": 0.0003648856654763222, "token_full_update_term/max": 0.1337890625, "token_full_update_term/median": 2.371692252312041e-19, "token_full_update_term/min": -1.8812716007232666e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.0835776720341528e-13, "token_full_update_term/p85": 1.2972067864325254e-11, "token_full_update_term/p90": 2.673914423212409e-10, "token_full_update_term/p95": 5.029141902923584e-08, "token_full_update_term/p99": 0.004149198532104492, "token_full_update_term/var": 2.302040593349375e-05, "token_hessian_coeff": 9846.998046875, "token_hessian_coeff/max": 111149056.0, "token_hessian_coeff/median": -1.2005330063402653e-10, "token_hessian_coeff/min": -13893632.0, "token_hessian_coeff/p25": -7.748603820800781e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0060389041900634766, "token_hessian_coeff/var": 5020351725568.0, "token_hessian_coeff_abs": 144639.84375, "token_hessian_coeff_abs/max": 111149056.0, "token_hessian_coeff_abs/median": 8.96397978067398e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.849102020263672e-05, "token_hessian_coeff_abs/p99": 2637824.0, "token_hessian_coeff_abs/var": 4999527530496.0 }, { "accuracy_reward": 0.6770833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.22094298899173737, "adam_stats/lm_head/lr_effective_max": 4.6202829253161326e-05, "adam_stats/lm_head/lr_effective_mean": 2.145167030120909e-11, "adam_stats/lm_head/lr_effective_min": -4.7832239943090826e-05, "adam_stats/lm_head/lr_effective_std": 1.2002691391899134e-06, "adam_stats/lr_effective_max": 5.9600402892101556e-05, "adam_stats/lr_effective_mean": -9.567482917027803e-11, "adam_stats/lr_effective_min": -6.027860217727721e-05, "adam_stats/m_t_max": 0.0007554503390565515, "adam_stats/m_t_mean": 1.049658276963683e-11, "adam_stats/m_t_min": -0.0007384028285741806, "adam_stats/v_t_max": 2.582851993793156e-05, "adam_stats/v_t_mean": 1.7462591702516184e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6770833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.22094298899173737, "all_logprobs": -0.009473776444792747, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.5, "all_logprobs/p1": -0.1650390625, "all_logprobs/p10": -3.933906555175781e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0002613067626953125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.016249382868409157, "clip_ratio": 0.0, "completion_length": 577.4166870117188, "completion_length/correct": 484.23077392578125, "completion_length/correct/max": 883.0, "completion_length/correct/median": 444.0, "completion_length/correct/min": 315.0, "completion_length/correct/p25": 401.0, "completion_length/correct/p75": 513.0, "completion_length/correct/var": 16239.0546875, "completion_length/incorrect": 772.8064575195312, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 834.0, "completion_length/incorrect/min": 327.0, "completion_length/incorrect/p25": 506.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 61862.76171875, "completion_length/max": 1024.0, "completion_length/median": 491.0, "completion_length/min": 315.0, "completion_length/p25": 434.75, "completion_length/p75": 704.0, "completion_length/var": 48874.8359375, "curvature_clip_ratio_token_fisher": 0.01058955118060112, "curvature_clip_ratio_token_hessian": 0.007558810990303755, "curvature_clip_ratio_total_fisher": 0.01058955118060112, "curvature_clip_ratio_total_full": 0.01058955118060112, "curvature_clip_ratio_total_hessian": 0.007558810990303755, "epoch": 0.0624, "feature_vector_variance/max_squared_error": 65986.9140625, "feature_vector_variance/metric": 29679.6015625, "generated_tokens/total": 2335595.0, "global_fisher_curvature": 91136.0, "global_fisher_curvature/max": 91136.0, "global_fisher_curvature/median": 91136.0, "global_fisher_curvature/min": 91136.0, "global_fisher_curvature/p25": 91136.0, "global_fisher_curvature/p75": 91136.0, "global_fisher_curvature/p85": 91136.0, "global_fisher_curvature/p90": 91136.0, "global_fisher_curvature/p95": 91136.0, "global_fisher_curvature/p99": 91136.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.22868537902832e-06, "global_fisher_kl_divergence/max": 6.22868537902832e-06, "global_fisher_kl_divergence/median": 6.22868537902832e-06, "global_fisher_kl_divergence/min": 6.22868537902832e-06, "global_fisher_kl_divergence/p25": 6.22868537902832e-06, "global_fisher_kl_divergence/p75": 6.22868537902832e-06, "global_fisher_kl_divergence/p85": 6.22868537902832e-06, "global_fisher_kl_divergence/p90": 6.22868537902832e-06, "global_fisher_kl_divergence/p95": 6.22868537902832e-06, "global_fisher_kl_divergence/p99": 6.22868537902832e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.080078125, "global_full_update_term/max": 0.080078125, "global_full_update_term/median": 0.080078125, "global_full_update_term/min": 0.080078125, "global_full_update_term/p25": 0.080078125, "global_full_update_term/p75": 0.080078125, "global_full_update_term/p85": 0.080078125, "global_full_update_term/p90": 0.080078125, "global_full_update_term/p95": 0.080078125, "global_full_update_term/p99": 0.080078125, "global_full_update_term/var": NaN, "global_hessian_coeff": 23424.0, "global_hessian_coeff/max": 23424.0, "global_hessian_coeff/median": 23424.0, "global_hessian_coeff/min": 23424.0, "global_hessian_coeff/p25": 23424.0, "global_hessian_coeff/p75": 23424.0, "global_hessian_coeff/p99": 23424.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 23424.0, "global_hessian_coeff_abs/max": 23424.0, "global_hessian_coeff_abs/median": 23424.0, "global_hessian_coeff_abs/min": 23424.0, "global_hessian_coeff_abs/p25": 23424.0, "global_hessian_coeff_abs/p75": 23424.0, "global_hessian_coeff_abs/p99": 23424.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.03210429474711418, "learning_rate": 1.1474394481749037e-05, "loss": -0.6771, "masked_global_fisher_curvature": 428.0, "masked_global_fisher_curvature/max": 428.0, "masked_global_fisher_curvature/median": 428.0, "masked_global_fisher_curvature/min": 428.0, "masked_global_fisher_curvature/p25": 428.0, "masked_global_fisher_curvature/p75": 428.0, "masked_global_fisher_curvature/p85": 428.0, "masked_global_fisher_curvature/p90": 428.0, "masked_global_fisher_curvature/p95": 428.0, "masked_global_fisher_curvature/p99": 428.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/max": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/median": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/min": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/p25": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/p75": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/p85": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/p90": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/p95": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/p99": 2.922024577856064e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0028228759765625, "masked_global_full_update_term/max": 0.0028228759765625, "masked_global_full_update_term/median": 0.0028228759765625, "masked_global_full_update_term/min": 0.0028228759765625, "masked_global_full_update_term/p25": 0.0028228759765625, "masked_global_full_update_term/p75": 0.0028228759765625, "masked_global_full_update_term/p85": 0.0028228759765625, "masked_global_full_update_term/p90": 0.0028228759765625, "masked_global_full_update_term/p95": 0.0028228759765625, "masked_global_full_update_term/p99": 0.0028228759765625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1376.0, "masked_global_hessian_coeff/max": -1376.0, "masked_global_hessian_coeff/median": -1376.0, "masked_global_hessian_coeff/min": -1376.0, "masked_global_hessian_coeff/p25": -1376.0, "masked_global_hessian_coeff/p75": -1376.0, "masked_global_hessian_coeff/p99": -1376.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1376.0, "masked_global_hessian_coeff_abs/max": 1376.0, "masked_global_hessian_coeff_abs/median": 1376.0, "masked_global_hessian_coeff_abs/min": 1376.0, "masked_global_hessian_coeff_abs/p25": 1376.0, "masked_global_hessian_coeff_abs/p75": 1376.0, "masked_global_hessian_coeff_abs/p99": 1376.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.5135091543197632, "masked_per_sentence_gradient_norm/max": 7.0, "masked_per_sentence_gradient_norm/median": 1.21875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.390625, "masked_per_sentence_gradient_norm/var": 2.4055871963500977, "masked_per_token_gradient_norm": 0.028548788279294968, "masked_per_token_gradient_norm/max": 10.5625, "masked_per_token_gradient_norm/median": 1.1901590823981678e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 8.42846930027008e-08, "masked_per_token_gradient_norm/var": 0.1246575191617012, "masked_sentence_fisher_curvature": 209.82421875, "masked_sentence_fisher_curvature/max": 1056.0, "masked_sentence_fisher_curvature/median": 175.0, "masked_sentence_fisher_curvature/min": 0.7890625, "masked_sentence_fisher_curvature/p25": 56.9375, "masked_sentence_fisher_curvature/p75": 272.0, "masked_sentence_fisher_curvature/p85": 359.5, "masked_sentence_fisher_curvature/p90": 424.0, "masked_sentence_fisher_curvature/p95": 549.0, "masked_sentence_fisher_curvature/p99": 961.0003051757812, "masked_sentence_fisher_curvature/var": 40570.75, "masked_sentence_fisher_kl_divergence": 1.4345209464750042e-08, "masked_sentence_fisher_kl_divergence/max": 7.217749953269958e-08, "masked_sentence_fisher_kl_divergence/median": 1.1990778148174286e-08, "masked_sentence_fisher_kl_divergence/min": 5.3887561080046e-11, "masked_sentence_fisher_kl_divergence/p25": 3.892637323588133e-09, "masked_sentence_fisher_kl_divergence/p75": 1.862645149230957e-08, "masked_sentence_fisher_kl_divergence/p85": 2.459273673593998e-08, "masked_sentence_fisher_kl_divergence/p90": 2.898741513490677e-08, "masked_sentence_fisher_kl_divergence/p95": 3.754394128918648e-08, "masked_sentence_fisher_kl_divergence/p99": 6.554184750484637e-08, "masked_sentence_fisher_kl_divergence/var": 1.892756795226206e-16, "masked_sentence_full_gradient_variance/max_squared_error": 4.514012336730957, "masked_sentence_full_gradient_variance/metric": 4.514012336730957, "masked_sentence_full_gradient_variance/p75": 4.514012336730957, "masked_sentence_full_gradient_variance/p90": 4.514012336730957, "masked_sentence_full_gradient_variance/p95": 4.514012336730957, "masked_sentence_full_gradient_variance/p99": 4.514012336730957, "masked_sentence_full_update_term": 0.0009754225611686707, "masked_sentence_full_update_term/max": 0.00555419921875, "masked_sentence_full_update_term/median": 0.000865936279296875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.001300811767578125, "masked_sentence_full_update_term/p85": 0.0019207000732421875, "masked_sentence_full_update_term/p90": 0.00243377685546875, "masked_sentence_full_update_term/p95": 0.003162384033203125, "masked_sentence_full_update_term/p99": 0.004684451036155224, "masked_sentence_full_update_term/var": 1.3075655260763597e-06, "masked_sentence_hessian_coeff": -7075.7294921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -6656.0, "masked_sentence_hessian_coeff/min": -43520.0, "masked_sentence_hessian_coeff/p25": -9744.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 63700588.0, "masked_sentence_hessian_coeff_abs": 7075.7294921875, "masked_sentence_hessian_coeff_abs/max": 43520.0, "masked_sentence_hessian_coeff_abs/median": 6112.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 9744.0, "masked_sentence_hessian_coeff_abs/p99": 37440.01953125, "masked_sentence_hessian_coeff_abs/var": 63700588.0, "masked_token_fisher_curvature": 248.4395294189453, "masked_token_fisher_curvature/max": 144384.0, "masked_token_fisher_curvature/median": 3.447762908503904e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 9.55393607498747e-23, "masked_token_fisher_curvature/p75": 5.343281372915953e-12, "masked_token_fisher_curvature/p85": 2.852175384759903e-09, "masked_token_fisher_curvature/p90": 4.6416244003921747e-07, "masked_token_fisher_curvature/p95": 0.002148449420928955, "masked_token_fisher_curvature/p99": 1312.0, "masked_token_fisher_curvature/var": 15632929.0, "masked_token_fisher_kl_divergence": 1.6989318396554154e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 2.3602718284212673e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 6.548161810916602e-33, "masked_token_fisher_kl_divergence/p75": 3.6561383074843823e-22, "masked_token_fisher_kl_divergence/p85": 1.9481757786848908e-19, "masked_token_fisher_kl_divergence/p90": 3.178829947750664e-17, "masked_token_fisher_kl_divergence/p95": 1.4708373408112152e-13, "masked_token_fisher_kl_divergence/p99": 8.987262845039368e-08, "masked_token_fisher_kl_divergence/var": 7.313394274942667e-14, "masked_token_full_update_term": 1.1519731742737349e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.246830940246582e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.596323321948148e-14, "masked_token_full_update_term/p85": 1.0345502232667059e-11, "masked_token_full_update_term/p90": 2.1827872842550278e-10, "masked_token_full_update_term/p95": 3.073364496231079e-08, "masked_token_full_update_term/p99": 9.012222290039062e-05, "masked_token_full_update_term/var": 2.2009627898000872e-08, "masked_token_hessian_coeff": -9615.9560546875, "masked_token_hessian_coeff/max": 61.5, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -3653632.0, "masked_token_hessian_coeff/p25": -5.5730342864990234e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.001800537109375, "masked_token_hessian_coeff/var": 16238876672.0, "masked_token_hessian_coeff_abs": 9615.96484375, "masked_token_hessian_coeff_abs/max": 3653632.0, "masked_token_hessian_coeff_abs/median": 1.5006662579253316e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.1219253540039062e-05, "masked_token_hessian_coeff_abs/p99": 41502.0, "masked_token_hessian_coeff_abs/var": 16238876672.0, "mean_logprobs": -0.0101318359375, "mean_logprobs/var": 5.1975250244140625e-05, "num_completions/total": 3744, "per_sentence_gradient_norm": 40.974609375, "per_sentence_gradient_norm/max": 238.0, "per_sentence_gradient_norm/median": 29.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 57.875, "per_sentence_gradient_norm/var": 2356.031005859375, "per_token_feature_norm": 187.00546264648438, "per_token_feature_norm/max": 272.0, "per_token_feature_norm/median": 187.0, "per_token_feature_norm/min": 106.5, "per_token_feature_norm/p25": 181.0, "per_token_feature_norm/p75": 194.0, "per_token_feature_norm/var": 149.9012451171875, "per_token_gradient_norm": 0.8616054058074951, "per_token_gradient_norm/max": 306.0, "per_token_gradient_norm/median": 1.9184653865522705e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.100124791264534e-07, "per_token_gradient_norm/var": 117.90339660644531, "per_token_policy_error_norm": 0.005465945694595575, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0055050114169716835, "policy_entropy": 0.00958130694925785, "policy_entropy/max": 1.8828125, "policy_entropy/median": 1.979060471057892e-09, "policy_entropy/min": 1.641126335305207e-20, "policy_entropy/p25": 5.343281372915953e-12, "policy_entropy/p75": 4.009343683719635e-07, "policy_entropy/var": 0.004930804017931223, "policy_loss": -0.6770833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.22094298899173737, "policy_sharpness": 9.708147048950195, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.820285677909851, "reward": 0.6770833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.22094298899173737, "rewards/accuracy_reward": 0.6770833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.22094298899173737, "sentence_fisher_curvature": 203566.0, "sentence_fisher_curvature/max": 933888.0, "sentence_fisher_curvature/median": 159744.0, "sentence_fisher_curvature/min": 11.875, "sentence_fisher_curvature/p25": 684.0, "sentence_fisher_curvature/p75": 316928.0, "sentence_fisher_curvature/p85": 433664.0, "sentence_fisher_curvature/p90": 487424.0, "sentence_fisher_curvature/p95": 594944.0, "sentence_fisher_curvature/p99": 871629.0, "sentence_fisher_curvature/var": 46861795328.0, "sentence_fisher_kl_divergence": 1.3912506801716518e-05, "sentence_fisher_kl_divergence/max": 6.389617919921875e-05, "sentence_fisher_kl_divergence/median": 1.0907649993896484e-05, "sentence_fisher_kl_divergence/min": 8.11269273981452e-10, "sentence_fisher_kl_divergence/p25": 4.674075171351433e-08, "sentence_fisher_kl_divergence/p75": 2.1666288375854492e-05, "sentence_fisher_kl_divergence/p85": 2.9653310775756836e-05, "sentence_fisher_kl_divergence/p90": 3.325939178466797e-05, "sentence_fisher_kl_divergence/p95": 4.0650367736816406e-05, "sentence_fisher_kl_divergence/p99": 5.9592737670755014e-05, "sentence_fisher_kl_divergence/var": 2.1900499469484913e-10, "sentence_full_gradient_variance/max_squared_error": 3948.40771484375, "sentence_full_gradient_variance/metric": 3948.40771484375, "sentence_full_gradient_variance/p75": 3948.40771484375, "sentence_full_gradient_variance/p90": 3948.40771484375, "sentence_full_gradient_variance/p95": 3948.40771484375, "sentence_full_gradient_variance/p99": 3948.40771484375, "sentence_full_update_term": 0.030879339203238487, "sentence_full_update_term/max": 0.2109375, "sentence_full_update_term/median": 0.0234375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.03887939453125, "sentence_full_update_term/p85": 0.0528564453125, "sentence_full_update_term/p90": 0.06591796875, "sentence_full_update_term/p95": 0.1015625, "sentence_full_update_term/p99": 0.1747559756040573, "sentence_full_update_term/var": 0.0015789915341883898, "sentence_hessian_coeff": 30087.0, "sentence_hessian_coeff/max": 352256.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -167936.0, "sentence_hessian_coeff/p25": -22272.0, "sentence_hessian_coeff/p75": 75136.0, "sentence_hessian_coeff/p99": 348364.8125, "sentence_hessian_coeff/var": 12576146432.0, "sentence_hessian_coeff_abs": 75081.671875, "sentence_hessian_coeff_abs/max": 352256.0, "sentence_hessian_coeff_abs/median": 45824.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 105728.0, "sentence_hessian_coeff_abs/p99": 348364.8125, "sentence_hessian_coeff_abs/var": 7794306048.0, "step": 39, "token_fisher_curvature": 178229.375, "token_fisher_curvature/max": 114294784.0, "token_fisher_curvature/median": 4.5536491244391186e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.1394412937917956e-22, "token_fisher_curvature/p75": 7.972289495228324e-12, "token_fisher_curvature/p85": 6.288018994382583e-09, "token_fisher_curvature/p90": 1.952052116394043e-06, "token_fisher_curvature/p95": 0.02197265625, "token_fisher_curvature/p99": 169984.0, "token_fisher_curvature/var": 9607817723904.0, "token_fisher_kl_divergence": 1.2184695151518099e-05, "token_fisher_kl_divergence/max": 0.0078125, "token_fisher_kl_divergence/median": 3.1175782974334387e-27, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 7.787979212615885e-33, "token_fisher_kl_divergence/p75": 5.442848430598922e-22, "token_fisher_kl_divergence/p85": 4.303853814337905e-19, "token_fisher_kl_divergence/p90": 1.3357370765021415e-16, "token_fisher_kl_divergence/p95": 1.4992451724538114e-12, "token_fisher_kl_divergence/p99": 1.1622905731201172e-05, "token_fisher_kl_divergence/var": 4.490435401294235e-08, "token_full_update_term": 0.0003913485852535814, "token_full_update_term/max": 0.1318359375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.246830940246582e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 6.972200594645983e-14, "token_full_update_term/p85": 1.602984411874786e-11, "token_full_update_term/p90": 4.511093720793724e-10, "token_full_update_term/p95": 1.2852251529693604e-07, "token_full_update_term/p99": 0.0043756961822509766, "token_full_update_term/var": 2.5258954337914474e-05, "token_hessian_coeff": 16430.359375, "token_hessian_coeff/max": 114294784.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -14417920.0, "token_hessian_coeff/p25": -7.241964340209961e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.003387451171875, "token_hessian_coeff/var": 6426159742976.0, "token_hessian_coeff_abs": 168624.4375, "token_hessian_coeff_abs/max": 114294784.0, "token_hessian_coeff_abs/median": 2.808064891723916e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.753734588623047e-05, "token_hessian_coeff_abs/p99": 3271744.0, "token_hessian_coeff_abs/var": 6397994991616.0 }, { "accuracy_reward": 0.7083333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20877191424369812, "adam_stats/lm_head/lr_effective_max": 4.9028676585294306e-05, "adam_stats/lm_head/lr_effective_mean": 2.9046112737241003e-11, "adam_stats/lm_head/lr_effective_min": -4.999584416509606e-05, "adam_stats/lm_head/lr_effective_std": 1.1280010312475497e-06, "adam_stats/lr_effective_max": 5.931519262958318e-05, "adam_stats/lr_effective_mean": -1.5980458623054972e-10, "adam_stats/lr_effective_min": -5.949816477368586e-05, "adam_stats/m_t_max": 0.0008073162171058357, "adam_stats/m_t_mean": 8.128649663097232e-12, "adam_stats/m_t_min": -0.0007355159032158554, "adam_stats/v_t_max": 2.5803015887504444e-05, "adam_stats/v_t_mean": 1.7451131685553012e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7083333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20877191424369812, "all_logprobs": -0.008946890011429787, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -1.7881393432617188e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00012683868408203125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012050053104758263, "clip_ratio": 0.0, "completion_length": 547.3541870117188, "completion_length/correct": 500.5735168457031, "completion_length/correct/max": 885.0, "completion_length/correct/median": 503.0, "completion_length/correct/min": 246.0, "completion_length/correct/p25": 324.5, "completion_length/correct/p75": 647.75, "completion_length/correct/var": 37139.0546875, "completion_length/incorrect": 660.9642944335938, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 513.0, "completion_length/incorrect/min": 298.0, "completion_length/incorrect/p25": 447.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 81674.0390625, "completion_length/max": 1024.0, "completion_length/median": 513.0, "completion_length/min": 246.0, "completion_length/p25": 325.0, "completion_length/p75": 731.0, "completion_length/var": 54776.1328125, "curvature_clip_ratio_token_fisher": 0.009705781936645508, "curvature_clip_ratio_token_hessian": 0.006603737827390432, "curvature_clip_ratio_total_fisher": 0.009705781936645508, "curvature_clip_ratio_total_full": 0.009705781936645508, "curvature_clip_ratio_total_hessian": 0.006603737827390432, "epoch": 0.064, "feature_vector_variance/max_squared_error": 65493.5078125, "feature_vector_variance/metric": 29323.01953125, "generated_tokens/total": 2388141.0, "global_fisher_curvature": 74240.0, "global_fisher_curvature/max": 74240.0, "global_fisher_curvature/median": 74240.0, "global_fisher_curvature/min": 74240.0, "global_fisher_curvature/p25": 74240.0, "global_fisher_curvature/p75": 74240.0, "global_fisher_curvature/p85": 74240.0, "global_fisher_curvature/p90": 74240.0, "global_fisher_curvature/p95": 74240.0, "global_fisher_curvature/p99": 74240.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.887580871582031e-06, "global_fisher_kl_divergence/max": 4.887580871582031e-06, "global_fisher_kl_divergence/median": 4.887580871582031e-06, "global_fisher_kl_divergence/min": 4.887580871582031e-06, "global_fisher_kl_divergence/p25": 4.887580871582031e-06, "global_fisher_kl_divergence/p75": 4.887580871582031e-06, "global_fisher_kl_divergence/p85": 4.887580871582031e-06, "global_fisher_kl_divergence/p90": 4.887580871582031e-06, "global_fisher_kl_divergence/p95": 4.887580871582031e-06, "global_fisher_kl_divergence/p99": 4.887580871582031e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.06884765625, "global_full_update_term/max": 0.06884765625, "global_full_update_term/median": 0.06884765625, "global_full_update_term/min": 0.06884765625, "global_full_update_term/p25": 0.06884765625, "global_full_update_term/p75": 0.06884765625, "global_full_update_term/p85": 0.06884765625, "global_full_update_term/p90": 0.06884765625, "global_full_update_term/p95": 0.06884765625, "global_full_update_term/p99": 0.06884765625, "global_full_update_term/var": NaN, "global_hessian_coeff": 11072.0, "global_hessian_coeff/max": 11072.0, "global_hessian_coeff/median": 11072.0, "global_hessian_coeff/min": 11072.0, "global_hessian_coeff/p25": 11072.0, "global_hessian_coeff/p75": 11072.0, "global_hessian_coeff/p99": 11072.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 11072.0, "global_hessian_coeff_abs/max": 11072.0, "global_hessian_coeff_abs/median": 11072.0, "global_hessian_coeff_abs/min": 11072.0, "global_hessian_coeff_abs/p25": 11072.0, "global_hessian_coeff_abs/p75": 11072.0, "global_hessian_coeff_abs/p99": 11072.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.0498192235827446, "learning_rate": 1.125e-05, "loss": -0.7083, "masked_global_fisher_curvature": 174.0, "masked_global_fisher_curvature/max": 174.0, "masked_global_fisher_curvature/median": 174.0, "masked_global_fisher_curvature/min": 174.0, "masked_global_fisher_curvature/p25": 174.0, "masked_global_fisher_curvature/p75": 174.0, "masked_global_fisher_curvature/p85": 174.0, "masked_global_fisher_curvature/p90": 174.0, "masked_global_fisher_curvature/p95": 174.0, "masked_global_fisher_curvature/p99": 174.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/max": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/median": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/min": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/p25": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/p75": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/p85": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/p90": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/p95": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/p99": 1.146690919995308e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.002593994140625, "masked_global_full_update_term/max": 0.002593994140625, "masked_global_full_update_term/median": 0.002593994140625, "masked_global_full_update_term/min": 0.002593994140625, "masked_global_full_update_term/p25": 0.002593994140625, "masked_global_full_update_term/p75": 0.002593994140625, "masked_global_full_update_term/p85": 0.002593994140625, "masked_global_full_update_term/p90": 0.002593994140625, "masked_global_full_update_term/p95": 0.002593994140625, "masked_global_full_update_term/p99": 0.002593994140625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2464.0, "masked_global_hessian_coeff/max": -2464.0, "masked_global_hessian_coeff/median": -2464.0, "masked_global_hessian_coeff/min": -2464.0, "masked_global_hessian_coeff/p25": -2464.0, "masked_global_hessian_coeff/p75": -2464.0, "masked_global_hessian_coeff/p99": -2464.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2464.0, "masked_global_hessian_coeff_abs/max": 2464.0, "masked_global_hessian_coeff_abs/median": 2464.0, "masked_global_hessian_coeff_abs/min": 2464.0, "masked_global_hessian_coeff_abs/p25": 2464.0, "masked_global_hessian_coeff_abs/p75": 2464.0, "masked_global_hessian_coeff_abs/p99": 2464.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.0760092735290527, "masked_per_sentence_gradient_norm/max": 6.96875, "masked_per_sentence_gradient_norm/median": 1.7109375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.26171875, "masked_per_sentence_gradient_norm/var": 4.242676734924316, "masked_per_token_gradient_norm": 0.029564691707491875, "masked_per_token_gradient_norm/max": 10.625, "masked_per_token_gradient_norm/median": 2.8563817977556027e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 4.842877388000488e-08, "masked_per_token_gradient_norm/var": 0.13472075760364532, "masked_sentence_fisher_curvature": 241.049072265625, "masked_sentence_fisher_curvature/max": 912.0, "masked_sentence_fisher_curvature/median": 191.0, "masked_sentence_fisher_curvature/min": 1.5625, "masked_sentence_fisher_curvature/p25": 117.0, "masked_sentence_fisher_curvature/p75": 341.0, "masked_sentence_fisher_curvature/p85": 425.5, "masked_sentence_fisher_curvature/p90": 433.0, "masked_sentence_fisher_curvature/p95": 526.0, "masked_sentence_fisher_curvature/p99": 672.6007690429688, "masked_sentence_fisher_curvature/var": 27926.671875, "masked_sentence_fisher_kl_divergence": 1.5860109670029487e-08, "masked_sentence_fisher_kl_divergence/max": 6.007030606269836e-08, "masked_sentence_fisher_kl_divergence/median": 1.257285475730896e-08, "masked_sentence_fisher_kl_divergence/min": 1.0277290130034089e-10, "masked_sentence_fisher_kl_divergence/p25": 7.683411240577698e-09, "masked_sentence_fisher_kl_divergence/p75": 2.2439053282141685e-08, "masked_sentence_fisher_kl_divergence/p85": 2.7997884899377823e-08, "masked_sentence_fisher_kl_divergence/p90": 2.8463546186685562e-08, "masked_sentence_fisher_kl_divergence/p95": 3.457535058259964e-08, "masked_sentence_fisher_kl_divergence/p99": 4.43659295967791e-08, "masked_sentence_fisher_kl_divergence/var": 1.20923879387902e-16, "masked_sentence_full_gradient_variance/max_squared_error": 7.984858512878418, "masked_sentence_full_gradient_variance/metric": 7.984858512878418, "masked_sentence_full_gradient_variance/p75": 7.984858512878418, "masked_sentence_full_gradient_variance/p90": 7.984858512878418, "masked_sentence_full_gradient_variance/p95": 7.984858512878418, "masked_sentence_full_gradient_variance/p99": 7.984858512878418, "masked_sentence_full_update_term": 0.0012564858188852668, "masked_sentence_full_update_term/max": 0.005126953125, "masked_sentence_full_update_term/median": 0.0011444091796875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0019989013671875, "masked_sentence_full_update_term/p85": 0.00238037109375, "masked_sentence_full_update_term/p90": 0.00295257568359375, "masked_sentence_full_update_term/p95": 0.003429412841796875, "masked_sentence_full_update_term/p99": 0.004518129397183657, "masked_sentence_full_update_term/var": 1.5096859442564892e-06, "masked_sentence_hessian_coeff": -7682.625, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -7424.0, "masked_sentence_hessian_coeff/min": -23552.0, "masked_sentence_hessian_coeff/p25": -13056.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 51564308.0, "masked_sentence_hessian_coeff_abs": 7682.625, "masked_sentence_hessian_coeff_abs/max": 23552.0, "masked_sentence_hessian_coeff_abs/median": 7008.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 13056.0, "masked_sentence_hessian_coeff_abs/p99": 23430.400390625, "masked_sentence_hessian_coeff_abs/var": 51564308.0, "masked_token_fisher_curvature": 293.3439025878906, "masked_token_fisher_curvature/max": 150528.0, "masked_token_fisher_curvature/median": 5.637851296924623e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.1270335846035002e-23, "masked_token_fisher_curvature/p75": 1.5845103007450234e-12, "masked_token_fisher_curvature/p85": 6.402842700481415e-10, "masked_token_fisher_curvature/p90": 6.705522537231445e-08, "masked_token_fisher_curvature/p95": 0.00041961669921875, "masked_token_fisher_curvature/p99": 1936.0, "masked_token_fisher_curvature/var": 18197050.0, "masked_token_fisher_kl_divergence": 1.9309752019580628e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 3.7076462545387555e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 7.432885879119488e-34, "masked_token_fisher_kl_divergence/p75": 1.0422475718168149e-22, "masked_token_fisher_kl_divergence/p85": 4.213988912590144e-20, "masked_token_fisher_kl_divergence/p90": 4.4181238528784306e-18, "masked_token_fisher_kl_divergence/p95": 2.7644553313166398e-14, "masked_token_fisher_kl_divergence/p99": 1.2759119272232056e-07, "masked_token_fisher_kl_divergence/var": 7.882772532592439e-14, "masked_token_full_update_term": 1.2166226952103898e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -7.078051567077637e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.9531932455029164e-14, "masked_token_full_update_term/p85": 4.945377440890297e-12, "masked_token_full_update_term/p90": 8.094502845779061e-11, "masked_token_full_update_term/p95": 6.83940015733242e-09, "masked_token_full_update_term/p99": 7.486343383789062e-05, "masked_token_full_update_term/var": 2.3891201905712478e-08, "masked_token_hessian_coeff": -10502.4716796875, "masked_token_hessian_coeff/max": 120.0, "masked_token_hessian_coeff/median": -7.887024366937112e-13, "masked_token_hessian_coeff/min": -3620864.0, "masked_token_hessian_coeff/p25": -3.129243850708008e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.00225830078125, "masked_token_hessian_coeff/var": 18492760064.0, "masked_token_hessian_coeff_abs": 10502.498046875, "masked_token_hessian_coeff_abs/max": 3620864.0, "masked_token_hessian_coeff_abs/median": 4.3655745685100555e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.1742115020751953e-05, "masked_token_hessian_coeff_abs/p99": 33781.0, "masked_token_hessian_coeff_abs/var": 18492760064.0, "mean_logprobs": -0.00946044921875, "mean_logprobs/var": 6.198883056640625e-05, "num_completions/total": 3840, "per_sentence_gradient_norm": 46.185546875, "per_sentence_gradient_norm/max": 205.0, "per_sentence_gradient_norm/median": 24.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 82.125, "per_sentence_gradient_norm/var": 2923.257568359375, "per_token_feature_norm": 186.8802947998047, "per_token_feature_norm/max": 268.0, "per_token_feature_norm/median": 187.0, "per_token_feature_norm/min": 110.0, "per_token_feature_norm/p25": 181.0, "per_token_feature_norm/p75": 193.0, "per_token_feature_norm/var": 140.14454650878906, "per_token_gradient_norm": 0.7445462942123413, "per_token_gradient_norm/max": 270.0, "per_token_gradient_norm/median": 3.481659405224491e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 6.239861249923706e-08, "per_token_gradient_norm/var": 96.36804962158203, "per_token_policy_error_norm": 0.005306622479110956, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005070856306701899, "policy_entropy": 0.009879088029265404, "policy_entropy/max": 1.921875, "policy_entropy/median": 9.458744898438454e-10, "policy_entropy/min": 3.2610768469290563e-20, "policy_entropy/p25": 1.8047785488306545e-12, "policy_entropy/p75": 2.270098775625229e-07, "policy_entropy/var": 0.0052086166106164455, "policy_loss": -0.7083333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20877191424369812, "policy_sharpness": 9.721229553222656, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8254356384277344, "reward": 0.7083333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20877191424369812, "rewards/accuracy_reward": 0.7083333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20877191424369812, "sentence_fisher_curvature": 165191.046875, "sentence_fisher_curvature/max": 1204224.0, "sentence_fisher_curvature/median": 115200.0, "sentence_fisher_curvature/min": 67.0, "sentence_fisher_curvature/p25": 1150.0, "sentence_fisher_curvature/p75": 303104.0, "sentence_fisher_curvature/p85": 399872.0, "sentence_fisher_curvature/p90": 427008.0, "sentence_fisher_curvature/p95": 463360.0, "sentence_fisher_curvature/p99": 632219.4375, "sentence_fisher_curvature/var": 41082306560.0, "sentence_fisher_kl_divergence": 1.0874645340663847e-05, "sentence_fisher_kl_divergence/max": 7.915496826171875e-05, "sentence_fisher_kl_divergence/median": 7.569789886474609e-06, "sentence_fisher_kl_divergence/min": 4.423782229423523e-09, "sentence_fisher_kl_divergence/p25": 7.566995918750763e-08, "sentence_fisher_kl_divergence/p75": 1.9937753677368164e-05, "sentence_fisher_kl_divergence/p85": 2.6345252990722656e-05, "sentence_fisher_kl_divergence/p90": 2.8073787689208984e-05, "sentence_fisher_kl_divergence/p95": 3.0487775802612305e-05, "sentence_fisher_kl_divergence/p99": 4.155647911829874e-05, "sentence_fisher_kl_divergence/var": 1.7786268291519747e-10, "sentence_full_gradient_variance/max_squared_error": 4936.412109375, "sentence_full_gradient_variance/metric": 4936.412109375, "sentence_full_gradient_variance/p75": 4936.412109375, "sentence_full_gradient_variance/p90": 4936.412109375, "sentence_full_gradient_variance/p95": 4936.412109375, "sentence_full_gradient_variance/p99": 4936.412109375, "sentence_full_update_term": 0.03422260284423828, "sentence_full_update_term/max": 0.140625, "sentence_full_update_term/median": 0.0174560546875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.06201171875, "sentence_full_update_term/p85": 0.07080078125, "sentence_full_update_term/p90": 0.0849609375, "sentence_full_update_term/p95": 0.1112060546875, "sentence_full_update_term/p99": 0.12578129768371582, "sentence_full_update_term/var": 0.0014040564419701695, "sentence_hessian_coeff": 14308.0, "sentence_hessian_coeff/max": 1015808.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -166912.0, "sentence_hessian_coeff/p25": -69120.0, "sentence_hessian_coeff/p75": 29536.0, "sentence_hessian_coeff/p99": 480769.71875, "sentence_hessian_coeff/var": 20838955008.0, "sentence_hessian_coeff_abs": 74898.671875, "sentence_hessian_coeff_abs/max": 1015808.0, "sentence_hessian_coeff_abs/median": 44800.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 89088.0, "sentence_hessian_coeff_abs/p99": 480769.71875, "sentence_hessian_coeff_abs/var": 15376966656.0, "step": 40, "token_fisher_curvature": 147773.53125, "token_fisher_curvature/max": 111673344.0, "token_fisher_curvature/median": 7.318364664277155e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.3131492224279314e-23, "token_fisher_curvature/p75": 2.2168933355715126e-12, "token_fisher_curvature/p85": 1.127773430198431e-09, "token_fisher_curvature/p90": 2.039596438407898e-07, "token_fisher_curvature/p95": 0.005950927734375, "token_fisher_curvature/p99": 106496.0, "token_fisher_curvature/var": 7776489701376.0, "token_fisher_kl_divergence": 9.727873475640081e-06, "token_fisher_kl_divergence/max": 0.007354736328125, "token_fisher_kl_divergence/median": 4.827828739952592e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 8.666684749742561e-34, "token_fisher_kl_divergence/p75": 1.4558378780933287e-22, "token_fisher_kl_divergence/p85": 7.411538288475128e-20, "token_fisher_kl_divergence/p90": 1.3444106938820255e-17, "token_fisher_kl_divergence/p95": 3.9257486150745535e-13, "token_fisher_kl_divergence/p99": 7.0035457611083984e-06, "token_fisher_kl_divergence/var": 3.370056944618227e-08, "token_full_update_term": 0.0003338824026286602, "token_full_update_term/max": 0.1279296875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -7.078051567077637e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 3.9745984281580604e-14, "token_full_update_term/p85": 7.844391802791506e-12, "token_full_update_term/p90": 1.3642420526593924e-10, "token_full_update_term/p95": 2.6368070393800735e-08, "token_full_update_term/p99": 0.00319749116897583, "token_full_update_term/var": 2.0151510398136452e-05, "token_hessian_coeff": 174.88706970214844, "token_hessian_coeff/max": 110100480.0, "token_hessian_coeff/median": -1.1084466677857563e-12, "token_hessian_coeff/min": -14614528.0, "token_hessian_coeff/p25": -3.814697265625e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00428926944732666, "token_hessian_coeff/var": 5417247178752.0, "token_hessian_coeff_abs": 147400.234375, "token_hessian_coeff_abs/max": 110100480.0, "token_hessian_coeff_abs/median": 5.456968210637569e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.5079975128173828e-05, "token_hessian_coeff_abs/p99": 2490368.0, "token_hessian_coeff_abs/var": 5395520159744.0 }, { "accuracy_reward": 0.8229166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.14725877344608307, "adam_stats/lm_head/lr_effective_max": 4.688035187427886e-05, "adam_stats/lm_head/lr_effective_mean": 5.63169268164021e-11, "adam_stats/lm_head/lr_effective_min": -4.716418334282935e-05, "adam_stats/lm_head/lr_effective_std": 1.1445857808212168e-06, "adam_stats/lr_effective_max": 5.823949686600827e-05, "adam_stats/lr_effective_mean": -1.044902220304067e-10, "adam_stats/lr_effective_min": -5.920492185396142e-05, "adam_stats/m_t_max": 0.000668601191136986, "adam_stats/m_t_mean": 7.77857899619816e-12, "adam_stats/m_t_min": -0.0006283949478529394, "adam_stats/v_t_max": 2.5777258997550234e-05, "adam_stats/v_t_mean": 1.7435084409198054e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8229166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.14725877344608307, "all_logprobs": -0.009541373699903488, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.75, "all_logprobs/p1": -0.251953125, "all_logprobs/p10": -4.291534423828125e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0004093172028660774, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012175691314041615, "clip_ratio": 0.0, "completion_length": 498.8020935058594, "completion_length/correct": 474.088623046875, "completion_length/correct/max": 900.0, "completion_length/correct/median": 459.0, "completion_length/correct/min": 211.0, "completion_length/correct/p25": 307.5, "completion_length/correct/p75": 601.5, "completion_length/correct/var": 36412.234375, "completion_length/incorrect": 613.6470336914062, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 413.0, "completion_length/incorrect/min": 279.0, "completion_length/incorrect/p25": 296.0, "completion_length/incorrect/p75": 930.0, "completion_length/incorrect/var": 112500.9921875, "completion_length/max": 1024.0, "completion_length/median": 453.0, "completion_length/min": 211.0, "completion_length/p25": 296.0, "completion_length/p75": 646.75, "completion_length/var": 51711.98828125, "curvature_clip_ratio_token_fisher": 0.017354076728224754, "curvature_clip_ratio_token_hessian": 0.012133236043155193, "curvature_clip_ratio_total_fisher": 0.017354076728224754, "curvature_clip_ratio_total_full": 0.017354076728224754, "curvature_clip_ratio_total_hessian": 0.012133236043155193, "epoch": 0.0656, "feature_vector_variance/max_squared_error": 67042.90625, "feature_vector_variance/metric": 29732.82421875, "generated_tokens/total": 2436026.0, "global_fisher_curvature": 103424.0, "global_fisher_curvature/max": 103424.0, "global_fisher_curvature/median": 103424.0, "global_fisher_curvature/min": 103424.0, "global_fisher_curvature/p25": 103424.0, "global_fisher_curvature/p75": 103424.0, "global_fisher_curvature/p85": 103424.0, "global_fisher_curvature/p90": 103424.0, "global_fisher_curvature/p95": 103424.0, "global_fisher_curvature/p99": 103424.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.556510925292969e-06, "global_fisher_kl_divergence/max": 6.556510925292969e-06, "global_fisher_kl_divergence/median": 6.556510925292969e-06, "global_fisher_kl_divergence/min": 6.556510925292969e-06, "global_fisher_kl_divergence/p25": 6.556510925292969e-06, "global_fisher_kl_divergence/p75": 6.556510925292969e-06, "global_fisher_kl_divergence/p85": 6.556510925292969e-06, "global_fisher_kl_divergence/p90": 6.556510925292969e-06, "global_fisher_kl_divergence/p95": 6.556510925292969e-06, "global_fisher_kl_divergence/p99": 6.556510925292969e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.09033203125, "global_full_update_term/max": 0.09033203125, "global_full_update_term/median": 0.09033203125, "global_full_update_term/min": 0.09033203125, "global_full_update_term/p25": 0.09033203125, "global_full_update_term/p75": 0.09033203125, "global_full_update_term/p85": 0.09033203125, "global_full_update_term/p90": 0.09033203125, "global_full_update_term/p95": 0.09033203125, "global_full_update_term/p99": 0.09033203125, "global_full_update_term/var": NaN, "global_hessian_coeff": 1840.0, "global_hessian_coeff/max": 1840.0, "global_hessian_coeff/median": 1840.0, "global_hessian_coeff/min": 1840.0, "global_hessian_coeff/p25": 1840.0, "global_hessian_coeff/p75": 1840.0, "global_hessian_coeff/p99": 1840.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 1840.0, "global_hessian_coeff_abs/max": 1840.0, "global_hessian_coeff_abs/median": 1840.0, "global_hessian_coeff_abs/min": 1840.0, "global_hessian_coeff_abs/p25": 1840.0, "global_hessian_coeff_abs/p75": 1840.0, "global_hessian_coeff_abs/p99": 1840.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.03516928106546402, "learning_rate": 1.1021036720894182e-05, "loss": -0.8229, "masked_global_fisher_curvature": 174.0, "masked_global_fisher_curvature/max": 174.0, "masked_global_fisher_curvature/median": 174.0, "masked_global_fisher_curvature/min": 174.0, "masked_global_fisher_curvature/p25": 174.0, "masked_global_fisher_curvature/p75": 174.0, "masked_global_fisher_curvature/p85": 174.0, "masked_global_fisher_curvature/p90": 174.0, "masked_global_fisher_curvature/p95": 174.0, "masked_global_fisher_curvature/p99": 174.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/max": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/median": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/min": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/p25": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/p75": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/p85": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/p90": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/p95": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/p99": 1.100124791264534e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.006072998046875, "masked_global_full_update_term/max": 0.006072998046875, "masked_global_full_update_term/median": 0.006072998046875, "masked_global_full_update_term/min": 0.006072998046875, "masked_global_full_update_term/p25": 0.006072998046875, "masked_global_full_update_term/p75": 0.006072998046875, "masked_global_full_update_term/p85": 0.006072998046875, "masked_global_full_update_term/p90": 0.006072998046875, "masked_global_full_update_term/p95": 0.006072998046875, "masked_global_full_update_term/p99": 0.006072998046875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -5408.0, "masked_global_hessian_coeff/max": -5408.0, "masked_global_hessian_coeff/median": -5408.0, "masked_global_hessian_coeff/min": -5408.0, "masked_global_hessian_coeff/p25": -5408.0, "masked_global_hessian_coeff/p75": -5408.0, "masked_global_hessian_coeff/p99": -5408.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 5408.0, "masked_global_hessian_coeff_abs/max": 5408.0, "masked_global_hessian_coeff_abs/median": 5408.0, "masked_global_hessian_coeff_abs/min": 5408.0, "masked_global_hessian_coeff_abs/p25": 5408.0, "masked_global_hessian_coeff_abs/p75": 5408.0, "masked_global_hessian_coeff_abs/p99": 5408.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.085845947265625, "masked_per_sentence_gradient_norm/max": 5.78125, "masked_per_sentence_gradient_norm/median": 1.65625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.43896484375, "masked_per_sentence_gradient_norm/p75": 3.40625, "masked_per_sentence_gradient_norm/var": 3.166081190109253, "masked_per_token_gradient_norm": 0.043480243533849716, "masked_per_token_gradient_norm/max": 11.5625, "masked_per_token_gradient_norm/median": 7.130438461899757e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 3.0010716134398763e-15, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 5.662441253662109e-07, "masked_per_token_gradient_norm/var": 0.21397261321544647, "masked_sentence_fisher_curvature": 317.0690612792969, "masked_sentence_fisher_curvature/max": 1424.0, "masked_sentence_fisher_curvature/median": 207.0, "masked_sentence_fisher_curvature/min": 0.8359375, "masked_sentence_fisher_curvature/p25": 116.625, "masked_sentence_fisher_curvature/p75": 489.5, "masked_sentence_fisher_curvature/p85": 590.0, "masked_sentence_fisher_curvature/p90": 678.0, "masked_sentence_fisher_curvature/p95": 804.0, "masked_sentence_fisher_curvature/p99": 1059.201171875, "masked_sentence_fisher_curvature/var": 73938.6875, "masked_sentence_fisher_kl_divergence": 2.0071849959890642e-08, "masked_sentence_fisher_kl_divergence/max": 9.033828973770142e-08, "masked_sentence_fisher_kl_divergence/median": 1.3096723705530167e-08, "masked_sentence_fisher_kl_divergence/min": 5.297806637827307e-11, "masked_sentence_fisher_kl_divergence/p25": 7.3705450631678104e-09, "masked_sentence_fisher_kl_divergence/p75": 3.102468326687813e-08, "masked_sentence_fisher_kl_divergence/p85": 3.7369318306446075e-08, "masked_sentence_fisher_kl_divergence/p90": 4.284083843231201e-08, "masked_sentence_fisher_kl_divergence/p95": 5.098991096019745e-08, "masked_sentence_fisher_kl_divergence/p99": 6.689231923928673e-08, "masked_sentence_fisher_kl_divergence/var": 2.965208503150925e-16, "masked_sentence_full_gradient_variance/max_squared_error": 7.196744918823242, "masked_sentence_full_gradient_variance/metric": 7.196744918823242, "masked_sentence_full_gradient_variance/p75": 7.196744918823242, "masked_sentence_full_gradient_variance/p90": 7.196744918823242, "masked_sentence_full_gradient_variance/p95": 7.196744918823242, "masked_sentence_full_gradient_variance/p99": 7.196744918823242, "masked_sentence_full_update_term": 0.0013988863211125135, "masked_sentence_full_update_term/max": 0.00518798828125, "masked_sentence_full_update_term/median": 0.00128173828125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00024080276489257812, "masked_sentence_full_update_term/p75": 0.0022125244140625, "masked_sentence_full_update_term/p85": 0.00276947021484375, "masked_sentence_full_update_term/p90": 0.00289154052734375, "masked_sentence_full_update_term/p95": 0.003704071044921875, "masked_sentence_full_update_term/p99": 0.005043030250817537, "masked_sentence_full_update_term/var": 1.6208558690777863e-06, "masked_sentence_hessian_coeff": -12446.458984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -10176.0, "masked_sentence_hessian_coeff/min": -50176.0, "masked_sentence_hessian_coeff/p25": -19680.0, "masked_sentence_hessian_coeff/p75": -2608.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 130484136.0, "masked_sentence_hessian_coeff_abs": 12446.458984375, "masked_sentence_hessian_coeff_abs/max": 50176.0, "masked_sentence_hessian_coeff_abs/median": 9984.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 2608.0, "masked_sentence_hessian_coeff_abs/p75": 19680.0, "masked_sentence_hessian_coeff_abs/p99": 42880.0234375, "masked_sentence_hessian_coeff_abs/var": 130484136.0, "masked_token_fisher_curvature": 361.5263366699219, "masked_token_fisher_curvature/max": 156672.0, "masked_token_fisher_curvature/median": 4.7271214720367993e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.5964585822273434e-22, "masked_token_fisher_curvature/p75": 6.778577699151356e-12, "masked_token_fisher_curvature/p85": 2.168235369026661e-09, "masked_token_fisher_curvature/p90": 2.1457526599988341e-07, "masked_token_fisher_curvature/p95": 0.000766754150390625, "masked_token_fisher_curvature/p99": 2270.0, "masked_token_fisher_curvature/var": 25109176.0, "masked_token_fisher_kl_divergence": 2.2881650352246652e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 2.9913605525980768e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.0111132208032988e-32, "masked_token_fisher_kl_divergence/p75": 4.293067379150214e-22, "masked_token_fisher_kl_divergence/p85": 1.3721933745519665e-19, "masked_token_fisher_kl_divergence/p90": 1.357412649622379e-17, "masked_token_fisher_kl_divergence/p95": 4.8627768478581856e-14, "masked_token_fisher_kl_divergence/p99": 1.4374381862580776e-07, "masked_token_fisher_kl_divergence/var": 1.0058794753077874e-13, "masked_token_full_update_term": 1.7620106518734246e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 1.0570971181733668e-17, "masked_token_full_update_term/min": -1.1548399925231934e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 7.664979762012081e-13, "masked_token_full_update_term/p85": 4.5702108764089644e-11, "masked_token_full_update_term/p90": 6.148184183984995e-10, "masked_token_full_update_term/p95": 5.4796146287117153e-08, "masked_token_full_update_term/p99": 0.000396728515625, "masked_token_full_update_term/var": 3.692337458005568e-08, "masked_token_hessian_coeff": -15728.083984375, "masked_token_hessian_coeff/max": 1336.0, "masked_token_hessian_coeff/median": -3.3905962482094765e-09, "masked_token_hessian_coeff/min": -3817472.0, "masked_token_hessian_coeff/p25": -4.76837158203125e-05, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.01025390625, "masked_token_hessian_coeff/var": 30144440320.0, "masked_token_hessian_coeff_abs": 15728.2763671875, "masked_token_hessian_coeff_abs/max": 3817472.0, "masked_token_hessian_coeff_abs/median": 1.1129304766654968e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 1.0258460747536446e-13, "masked_token_hessian_coeff_abs/p75": 0.00019073486328125, "masked_token_hessian_coeff_abs/p99": 360448.0, "masked_token_hessian_coeff_abs/var": 30144438272.0, "mean_logprobs": -0.0101318359375, "mean_logprobs/var": 2.8133392333984375e-05, "num_completions/total": 3936, "per_sentence_gradient_norm": 56.70182418823242, "per_sentence_gradient_norm/max": 241.0, "per_sentence_gradient_norm/median": 41.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 18.875, "per_sentence_gradient_norm/p75": 73.25, "per_sentence_gradient_norm/var": 2819.558837890625, "per_token_feature_norm": 187.3314971923828, "per_token_feature_norm/max": 278.0, "per_token_feature_norm/median": 187.0, "per_token_feature_norm/min": 105.5, "per_token_feature_norm/p25": 181.0, "per_token_feature_norm/p75": 194.0, "per_token_feature_norm/var": 179.9518585205078, "per_token_gradient_norm": 1.2637630701065063, "per_token_gradient_norm/max": 274.0, "per_token_gradient_norm/median": 9.604264050722122e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 4.3576253716537394e-15, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 7.7858567237854e-07, "per_token_gradient_norm/var": 155.57017517089844, "per_token_policy_error_norm": 0.005645238794386387, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005045116879045963, "policy_entropy": 0.011229078285396099, "policy_entropy/max": 1.734375, "policy_entropy/median": 2.9976945370435715e-09, "policy_entropy/min": 1.2281977735187355e-20, "policy_entropy/p25": 6.849631972727366e-12, "policy_entropy/p75": 4.917383193969727e-07, "policy_entropy/var": 0.005401131231337786, "policy_loss": -0.8229166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.14725877344608307, "policy_sharpness": 9.672052383422852, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.120384454727173, "reward": 0.8229166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.14725877344608307, "rewards/accuracy_reward": 0.8229166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.14725877344608307, "sentence_fisher_curvature": 256928.359375, "sentence_fisher_curvature/max": 888832.0, "sentence_fisher_curvature/median": 217088.0, "sentence_fisher_curvature/min": 67.5, "sentence_fisher_curvature/p25": 109312.0, "sentence_fisher_curvature/p75": 358400.0, "sentence_fisher_curvature/p85": 436736.0, "sentence_fisher_curvature/p90": 522240.0, "sentence_fisher_curvature/p95": 729088.0, "sentence_fisher_curvature/p99": 795443.5, "sentence_fisher_curvature/var": 42012594176.0, "sentence_fisher_kl_divergence": 1.6259744370472617e-05, "sentence_fisher_kl_divergence/max": 5.626678466796875e-05, "sentence_fisher_kl_divergence/median": 1.3709068298339844e-05, "sentence_fisher_kl_divergence/min": 4.2782630771398544e-09, "sentence_fisher_kl_divergence/p25": 6.921589374542236e-06, "sentence_fisher_kl_divergence/p75": 2.2679567337036133e-05, "sentence_fisher_kl_divergence/p85": 2.765655517578125e-05, "sentence_fisher_kl_divergence/p90": 3.3020973205566406e-05, "sentence_fisher_kl_divergence/p95": 4.607439041137695e-05, "sentence_fisher_kl_divergence/p99": 5.037786468164995e-05, "sentence_fisher_kl_divergence/var": 1.6828675952762495e-10, "sentence_full_gradient_variance/max_squared_error": 5912.28515625, "sentence_full_gradient_variance/metric": 5912.28515625, "sentence_full_gradient_variance/p75": 5912.28515625, "sentence_full_gradient_variance/p90": 5912.28515625, "sentence_full_gradient_variance/p95": 5912.28515625, "sentence_full_gradient_variance/p99": 5912.28515625, "sentence_full_update_term": 0.043117523193359375, "sentence_full_update_term/max": 0.15625, "sentence_full_update_term/median": 0.035888671875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.016998291015625, "sentence_full_update_term/p75": 0.06048583984375, "sentence_full_update_term/p85": 0.076416015625, "sentence_full_update_term/p90": 0.096923828125, "sentence_full_update_term/p95": 0.1064453125, "sentence_full_update_term/p99": 0.1349121779203415, "sentence_full_update_term/var": 0.0012219198979437351, "sentence_hessian_coeff": -12174.6669921875, "sentence_hessian_coeff/max": 438272.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -378880.0, "sentence_hessian_coeff/p25": -107520.0, "sentence_hessian_coeff/p75": 54720.0, "sentence_hessian_coeff/p99": 434380.8125, "sentence_hessian_coeff/var": 19444762624.0, "sentence_hessian_coeff_abs": 100456.0, "sentence_hessian_coeff_abs/max": 438272.0, "sentence_hessian_coeff_abs/median": 86016.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 16752.0, "sentence_hessian_coeff_abs/p75": 157696.0, "sentence_hessian_coeff_abs/p99": 434380.8125, "sentence_hessian_coeff_abs/var": 9396910080.0, "step": 41, "token_fisher_curvature": 241870.25, "token_fisher_curvature/max": 114294784.0, "token_fisher_curvature/median": 7.41594285980085e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.001777082378327e-22, "token_fisher_curvature/p75": 1.2789769243681803e-11, "token_fisher_curvature/p85": 6.6356733441352844e-09, "token_fisher_curvature/p90": 1.6838312149047852e-06, "token_fisher_curvature/p95": 0.12080192565917969, "token_fisher_curvature/p99": 1724256.0, "token_fisher_curvature/var": 12000425934848.0, "token_fisher_kl_divergence": 1.5308791262214072e-05, "token_fisher_kl_divergence/max": 0.007232666015625, "token_fisher_kl_divergence/median": 4.695300107875462e-27, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.2711137632955757e-32, "token_fisher_kl_divergence/p75": 8.106370003019671e-22, "token_fisher_kl_divergence/p85": 4.2012834183813297e-19, "token_fisher_kl_divergence/p90": 1.0668549377257364e-16, "token_fisher_kl_divergence/p95": 7.651212996506729e-12, "token_fisher_kl_divergence/p99": 0.00010902434587478638, "token_fisher_kl_divergence/var": 4.807010967056158e-08, "token_full_update_term": 0.0005560080171562731, "token_full_update_term/max": 0.126953125, "token_full_update_term/median": 1.8106176280507924e-17, "token_full_update_term/min": -1.1548399925231934e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.3642420526593924e-12, "token_full_update_term/p85": 9.113243493175105e-11, "token_full_update_term/p90": 1.8553691916167736e-09, "token_full_update_term/p95": 7.338821887969971e-07, "token_full_update_term/p99": 0.014108896255493164, "token_full_update_term/var": 3.128600656054914e-05, "token_hessian_coeff": -28236.607421875, "token_hessian_coeff/max": 113770496.0, "token_hessian_coeff/median": -4.220055416226387e-09, "token_hessian_coeff/min": -14942208.0, "token_hessian_coeff/p25": -7.2479248046875e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0252685546875, "token_hessian_coeff/var": 7734588604416.0, "token_hessian_coeff_abs": 246300.53125, "token_hessian_coeff_abs/max": 113770496.0, "token_hessian_coeff_abs/median": 1.3783574104309082e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 1.4921397450962104e-13, "token_hessian_coeff_abs/p75": 0.000286102294921875, "token_hessian_coeff_abs/p99": 7929856.0, "token_hessian_coeff_abs/var": 7674721730560.0 }, { "accuracy_reward": 0.75, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.75, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.1894737035036087, "adam_stats/lm_head/lr_effective_max": 4.764103141496889e-05, "adam_stats/lm_head/lr_effective_mean": 4.916505744190225e-11, "adam_stats/lm_head/lr_effective_min": -5.180328662390821e-05, "adam_stats/lm_head/lr_effective_std": 1.146629188042425e-06, "adam_stats/lr_effective_max": 5.794427124783397e-05, "adam_stats/lr_effective_mean": -1.0067227607102325e-10, "adam_stats/lr_effective_min": -5.680823596776463e-05, "adam_stats/m_t_max": 0.0005991184734739363, "adam_stats/m_t_mean": 5.179077652850417e-12, "adam_stats/m_t_min": -0.0005979803390800953, "adam_stats/v_t_max": 2.5751836801646277e-05, "adam_stats/v_t_mean": 1.7428663763932595e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.75, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.75, "advantages/p75": 1.0, "advantages/var": 0.1894737035036087, "all_logprobs": -0.010116503573954105, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.0, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -2.1457672119140625e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0002613067626953125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.015507523901760578, "clip_ratio": 0.0, "completion_length": 536.8021240234375, "completion_length/correct": 458.6388854980469, "completion_length/correct/max": 1013.0, "completion_length/correct/median": 442.0, "completion_length/correct/min": 174.0, "completion_length/correct/p25": 328.75, "completion_length/correct/p75": 532.5, "completion_length/correct/var": 34465.5546875, "completion_length/incorrect": 771.2916870117188, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 276.0, "completion_length/incorrect/p25": 308.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 116166.046875, "completion_length/max": 1024.0, "completion_length/median": 447.0, "completion_length/min": 174.0, "completion_length/p25": 308.0, "completion_length/p75": 641.25, "completion_length/var": 72404.265625, "curvature_clip_ratio_token_fisher": 0.011817670427262783, "curvature_clip_ratio_token_hessian": 0.00871286354959011, "curvature_clip_ratio_total_fisher": 0.011817670427262783, "curvature_clip_ratio_total_full": 0.011817670427262783, "curvature_clip_ratio_total_hessian": 0.00871286354959011, "epoch": 0.0672, "feature_vector_variance/max_squared_error": 65163.984375, "feature_vector_variance/metric": 29769.119140625, "generated_tokens/total": 2487559.0, "global_fisher_curvature": 107008.0, "global_fisher_curvature/max": 107008.0, "global_fisher_curvature/median": 107008.0, "global_fisher_curvature/min": 107008.0, "global_fisher_curvature/p25": 107008.0, "global_fisher_curvature/p75": 107008.0, "global_fisher_curvature/p85": 107008.0, "global_fisher_curvature/p90": 107008.0, "global_fisher_curvature/p95": 107008.0, "global_fisher_curvature/p99": 107008.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.496906280517578e-06, "global_fisher_kl_divergence/max": 6.496906280517578e-06, "global_fisher_kl_divergence/median": 6.496906280517578e-06, "global_fisher_kl_divergence/min": 6.496906280517578e-06, "global_fisher_kl_divergence/p25": 6.496906280517578e-06, "global_fisher_kl_divergence/p75": 6.496906280517578e-06, "global_fisher_kl_divergence/p85": 6.496906280517578e-06, "global_fisher_kl_divergence/p90": 6.496906280517578e-06, "global_fisher_kl_divergence/p95": 6.496906280517578e-06, "global_fisher_kl_divergence/p99": 6.496906280517578e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.10107421875, "global_full_update_term/max": 0.10107421875, "global_full_update_term/median": 0.10107421875, "global_full_update_term/min": 0.10107421875, "global_full_update_term/p25": 0.10107421875, "global_full_update_term/p75": 0.10107421875, "global_full_update_term/p85": 0.10107421875, "global_full_update_term/p90": 0.10107421875, "global_full_update_term/p95": 0.10107421875, "global_full_update_term/p99": 0.10107421875, "global_full_update_term/var": NaN, "global_hessian_coeff": 17536.0, "global_hessian_coeff/max": 17536.0, "global_hessian_coeff/median": 17536.0, "global_hessian_coeff/min": 17536.0, "global_hessian_coeff/p25": 17536.0, "global_hessian_coeff/p75": 17536.0, "global_hessian_coeff/p99": 17536.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 17536.0, "global_hessian_coeff_abs/max": 17536.0, "global_hessian_coeff_abs/median": 17536.0, "global_hessian_coeff_abs/min": 17536.0, "global_hessian_coeff_abs/p25": 17536.0, "global_hessian_coeff_abs/p75": 17536.0, "global_hessian_coeff_abs/p99": 17536.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.06270398944616318, "learning_rate": 1.078778360091808e-05, "loss": -0.75, "masked_global_fisher_curvature": 188.0, "masked_global_fisher_curvature/max": 188.0, "masked_global_fisher_curvature/median": 188.0, "masked_global_fisher_curvature/min": 188.0, "masked_global_fisher_curvature/p25": 188.0, "masked_global_fisher_curvature/p75": 188.0, "masked_global_fisher_curvature/p85": 188.0, "masked_global_fisher_curvature/p90": 188.0, "masked_global_fisher_curvature/p95": 188.0, "masked_global_fisher_curvature/p99": 188.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/max": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/median": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/min": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/p25": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/p75": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/p85": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/p90": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/p95": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/p99": 1.1408701539039612e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.002593994140625, "masked_global_full_update_term/max": 0.002593994140625, "masked_global_full_update_term/median": 0.002593994140625, "masked_global_full_update_term/min": 0.002593994140625, "masked_global_full_update_term/p25": 0.002593994140625, "masked_global_full_update_term/p75": 0.002593994140625, "masked_global_full_update_term/p85": 0.002593994140625, "masked_global_full_update_term/p90": 0.002593994140625, "masked_global_full_update_term/p95": 0.002593994140625, "masked_global_full_update_term/p99": 0.002593994140625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -3088.0, "masked_global_hessian_coeff/max": -3088.0, "masked_global_hessian_coeff/median": -3088.0, "masked_global_hessian_coeff/min": -3088.0, "masked_global_hessian_coeff/p25": -3088.0, "masked_global_hessian_coeff/p75": -3088.0, "masked_global_hessian_coeff/p99": -3088.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 3088.0, "masked_global_hessian_coeff_abs/max": 3088.0, "masked_global_hessian_coeff_abs/median": 3088.0, "masked_global_hessian_coeff_abs/min": 3088.0, "masked_global_hessian_coeff_abs/p25": 3088.0, "masked_global_hessian_coeff_abs/p75": 3088.0, "masked_global_hessian_coeff_abs/p99": 3088.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.2905833721160889, "masked_per_sentence_gradient_norm/max": 5.0625, "masked_per_sentence_gradient_norm/median": 0.9765625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0860595703125, "masked_per_sentence_gradient_norm/p75": 2.109375, "masked_per_sentence_gradient_norm/var": 1.8023760318756104, "masked_per_token_gradient_norm": 0.03197812661528587, "masked_per_token_gradient_norm/max": 12.1875, "masked_per_token_gradient_norm/median": 4.291678123991005e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 8.800998330116272e-08, "masked_per_token_gradient_norm/var": 0.14809344708919525, "masked_sentence_fisher_curvature": 277.3125, "masked_sentence_fisher_curvature/max": 1208.0, "masked_sentence_fisher_curvature/median": 244.0, "masked_sentence_fisher_curvature/min": 5.875, "masked_sentence_fisher_curvature/p25": 104.875, "masked_sentence_fisher_curvature/p75": 381.5, "masked_sentence_fisher_curvature/p85": 479.0, "masked_sentence_fisher_curvature/p90": 534.0, "masked_sentence_fisher_curvature/p95": 704.0, "masked_sentence_fisher_curvature/p99": 1208.0, "masked_sentence_fisher_curvature/var": 52926.609375, "masked_sentence_fisher_kl_divergence": 1.6850227879672275e-08, "masked_sentence_fisher_kl_divergence/max": 7.35744833946228e-08, "masked_sentence_fisher_kl_divergence/median": 1.4842953532934189e-08, "masked_sentence_fisher_kl_divergence/min": 3.5652192309498787e-10, "masked_sentence_fisher_kl_divergence/p25": 6.366462912410498e-09, "masked_sentence_fisher_kl_divergence/p75": 2.3137545213103294e-08, "masked_sentence_fisher_kl_divergence/p85": 2.9045622795820236e-08, "masked_sentence_fisher_kl_divergence/p90": 3.247987478971481e-08, "masked_sentence_fisher_kl_divergence/p95": 4.284083843231201e-08, "masked_sentence_fisher_kl_divergence/p99": 7.35744833946228e-08, "masked_sentence_fisher_kl_divergence/var": 1.959302747587158e-16, "masked_sentence_full_gradient_variance/max_squared_error": 3.322253704071045, "masked_sentence_full_gradient_variance/metric": 3.322253704071045, "masked_sentence_full_gradient_variance/p75": 3.322253704071045, "masked_sentence_full_gradient_variance/p90": 3.322253704071045, "masked_sentence_full_gradient_variance/p95": 3.322253704071045, "masked_sentence_full_gradient_variance/p99": 3.322253704071045, "masked_sentence_full_update_term": 0.0008439670200459659, "masked_sentence_full_update_term/max": 0.003875732421875, "masked_sentence_full_update_term/median": 0.000518798828125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 3.701448440551758e-05, "masked_sentence_full_update_term/p75": 0.0015468597412109375, "masked_sentence_full_update_term/p85": 0.0019092559814453125, "masked_sentence_full_update_term/p90": 0.002166748046875, "masked_sentence_full_update_term/p95": 0.0022735595703125, "masked_sentence_full_update_term/p99": 0.0027595555875450373, "masked_sentence_full_update_term/var": 7.334976430684037e-07, "masked_sentence_hessian_coeff": -9614.75, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -8960.0, "masked_sentence_hessian_coeff/min": -34304.0, "masked_sentence_hessian_coeff/p25": -15360.0, "masked_sentence_hessian_coeff/p75": -750.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 77886632.0, "masked_sentence_hessian_coeff_abs": 9614.75, "masked_sentence_hessian_coeff_abs/max": 34304.0, "masked_sentence_hessian_coeff_abs/median": 8832.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 750.0, "masked_sentence_hessian_coeff_abs/p75": 15360.0, "masked_sentence_hessian_coeff_abs/p99": 34304.0, "masked_sentence_hessian_coeff_abs/var": 77886632.0, "masked_token_fisher_curvature": 329.4355163574219, "masked_token_fisher_curvature/max": 161792.0, "masked_token_fisher_curvature/median": 5.421010862427522e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.4475660719677984e-23, "masked_token_fisher_curvature/p75": 1.4814816040598089e-12, "masked_token_fisher_curvature/p85": 6.693881005048752e-10, "masked_token_fisher_curvature/p90": 4.819594323635101e-08, "masked_token_fisher_curvature/p95": 0.0004454478621482849, "masked_token_fisher_curvature/p99": 2024.0, "masked_token_fisher_curvature/var": 25449264.0, "masked_token_fisher_kl_divergence": 2.000716570194072e-08, "masked_token_fisher_kl_divergence/max": 9.834766387939453e-06, "masked_token_fisher_kl_divergence/median": 3.2974385838238293e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 8.787055371266764e-34, "masked_token_fisher_kl_divergence/p75": 8.995589161514176e-23, "masked_token_fisher_kl_divergence/p85": 4.0657581468206416e-20, "masked_token_fisher_kl_divergence/p90": 2.927345865710862e-18, "masked_token_fisher_kl_divergence/p95": 2.7038701139181498e-14, "masked_token_fisher_kl_divergence/p99": 1.2293457984924316e-07, "masked_token_fisher_kl_divergence/var": 9.390876748102683e-14, "masked_token_full_update_term": 1.2501467608672101e-05, "masked_token_full_update_term/max": 0.004180908203125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -9.715557098388672e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.241051954068098e-14, "masked_token_full_update_term/p85": 6.281197784119286e-12, "masked_token_full_update_term/p90": 1.0945733208700403e-10, "masked_token_full_update_term/p95": 7.101334631443024e-09, "masked_token_full_update_term/p99": 0.0003032684326171875, "masked_token_full_update_term/var": 2.3593926812282007e-08, "masked_token_hessian_coeff": -11523.85546875, "masked_token_hessian_coeff/max": 2400.0, "masked_token_hessian_coeff/median": -6.039613253960852e-14, "masked_token_hessian_coeff/min": -3899392.0, "masked_token_hessian_coeff/p25": -4.112720489501953e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.01092529296875, "masked_token_hessian_coeff/var": 20679114752.0, "masked_token_hessian_coeff_abs": 11523.96484375, "masked_token_hessian_coeff_abs/max": 3899392.0, "masked_token_hessian_coeff_abs/median": 8.330971468240023e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.288818359375e-05, "masked_token_hessian_coeff_abs/p99": 220160.0, "masked_token_hessian_coeff_abs/var": 20679112704.0, "mean_logprobs": -0.00982666015625, "mean_logprobs/var": 4.7206878662109375e-05, "num_completions/total": 4032, "per_sentence_gradient_norm": 45.77213668823242, "per_sentence_gradient_norm/max": 195.0, "per_sentence_gradient_norm/median": 30.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 4.7578125, "per_sentence_gradient_norm/p75": 61.3125, "per_sentence_gradient_norm/var": 2462.783935546875, "per_token_feature_norm": 187.80380249023438, "per_token_feature_norm/max": 274.0, "per_token_feature_norm/median": 188.0, "per_token_feature_norm/min": 88.5, "per_token_feature_norm/p25": 182.0, "per_token_feature_norm/p75": 194.0, "per_token_feature_norm/var": 150.41079711914062, "per_token_gradient_norm": 0.8808897137641907, "per_token_gradient_norm/max": 288.0, "per_token_gradient_norm/median": 5.258016244624741e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.1734664440155029e-07, "per_token_gradient_norm/var": 110.65538024902344, "per_token_policy_error_norm": 0.005892310291528702, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.00575227988883853, "policy_entropy": 0.010637244209647179, "policy_entropy/max": 3.0625, "policy_entropy/median": 1.0477378964424133e-09, "policy_entropy/min": 1.556423040579777e-20, "policy_entropy/p25": 2.0037305148434825e-12, "policy_entropy/p75": 2.4400651454925537e-07, "policy_entropy/var": 0.006242864765226841, "policy_loss": -0.75, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -0.75, "policy_loss/var": 0.1894737035036087, "policy_sharpness": 9.7020902633667, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8811819553375244, "reward": 0.75, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.75, "reward/p75": 1.0, "reward/var": 0.1894737035036087, "rewards/accuracy_reward": 0.75, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.75, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.1894737035036087, "sentence_fisher_curvature": 226255.75, "sentence_fisher_curvature/max": 1318912.0, "sentence_fisher_curvature/median": 189440.0, "sentence_fisher_curvature/min": 38.25, "sentence_fisher_curvature/p25": 24796.0, "sentence_fisher_curvature/p75": 336384.0, "sentence_fisher_curvature/p85": 445440.0, "sentence_fisher_curvature/p90": 506880.0, "sentence_fisher_curvature/p95": 583680.0, "sentence_fisher_curvature/p99": 1038746.5, "sentence_fisher_curvature/var": 58421915648.0, "sentence_fisher_kl_divergence": 1.3739710084337275e-05, "sentence_fisher_kl_divergence/max": 8.0108642578125e-05, "sentence_fisher_kl_divergence/median": 1.150369644165039e-05, "sentence_fisher_kl_divergence/min": 2.3283064365386963e-09, "sentence_fisher_kl_divergence/p25": 1.5071127563714981e-06, "sentence_fisher_kl_divergence/p75": 2.041459083557129e-05, "sentence_fisher_kl_divergence/p85": 2.7000904083251953e-05, "sentence_fisher_kl_divergence/p90": 3.081560134887695e-05, "sentence_fisher_kl_divergence/p95": 3.540515899658203e-05, "sentence_fisher_kl_divergence/p99": 6.289487646427006e-05, "sentence_fisher_kl_divergence/var": 2.1535516425696954e-10, "sentence_full_gradient_variance/max_squared_error": 4444.71826171875, "sentence_full_gradient_variance/metric": 4444.71826171875, "sentence_full_gradient_variance/p75": 4444.71826171875, "sentence_full_gradient_variance/p90": 4444.71826171875, "sentence_full_gradient_variance/p95": 4444.71826171875, "sentence_full_gradient_variance/p99": 4444.71826171875, "sentence_full_update_term": 0.03521379083395004, "sentence_full_update_term/max": 0.16796875, "sentence_full_update_term/median": 0.025390625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00295257568359375, "sentence_full_update_term/p75": 0.0518798828125, "sentence_full_update_term/p85": 0.0660400390625, "sentence_full_update_term/p90": 0.0888671875, "sentence_full_update_term/p95": 0.1004638671875, "sentence_full_update_term/p99": 0.16611328721046448, "sentence_full_update_term/var": 0.0014495636569336057, "sentence_hessian_coeff": 4975.75, "sentence_hessian_coeff/max": 544768.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -206848.0, "sentence_hessian_coeff/p25": -72064.0, "sentence_hessian_coeff/p75": 21280.0, "sentence_hessian_coeff/p99": 505856.125, "sentence_hessian_coeff/var": 18176413696.0, "sentence_hessian_coeff_abs": 86347.5859375, "sentence_hessian_coeff_abs/max": 544768.0, "sentence_hessian_coeff_abs/median": 61952.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 1050.0, "sentence_hessian_coeff_abs/p75": 124800.0, "sentence_hessian_coeff_abs/p99": 505856.125, "sentence_hessian_coeff_abs/var": 10667043840.0, "step": 42, "token_fisher_curvature": 178719.265625, "token_fisher_curvature/max": 115343360.0, "token_fisher_curvature/median": 7.318364664277155e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.747419044018271e-23, "token_fisher_curvature/p75": 2.5011104298755527e-12, "token_fisher_curvature/p85": 1.4770193956792355e-09, "token_fisher_curvature/p90": 1.7601996660232544e-07, "token_fisher_curvature/p95": 0.0238037109375, "token_fisher_curvature/p99": 399360.0, "token_fisher_curvature/var": 9325183500288.0, "token_fisher_kl_divergence": 1.0853511412278749e-05, "token_fisher_kl_divergence/max": 0.00701904296875, "token_fisher_kl_divergence/median": 4.449175505446507e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.0592614694129797e-33, "token_fisher_kl_divergence/p75": 1.522012327097571e-22, "token_fisher_kl_divergence/p85": 8.978549240895584e-20, "token_fisher_kl_divergence/p90": 1.0679391398982219e-17, "token_fisher_kl_divergence/p95": 1.4424017535930034e-12, "token_fisher_kl_divergence/p99": 2.4199485778808594e-05, "token_fisher_kl_divergence/var": 3.439026485807517e-08, "token_full_update_term": 0.00038703717291355133, "token_full_update_term/max": 0.1259765625, "token_full_update_term/median": 6.26040014383395e-26, "token_full_update_term/min": -9.715557098388672e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 6.661338147750939e-14, "token_full_update_term/p85": 9.947598300641403e-12, "token_full_update_term/p90": 2.0372681319713593e-10, "token_full_update_term/p95": 4.456342139746994e-08, "token_full_update_term/p99": 0.0064122676849365234, "token_full_update_term/var": 2.226711694675032e-05, "token_hessian_coeff": -12254.037109375, "token_hessian_coeff/max": 113770496.0, "token_hessian_coeff/median": -1.2789769243681803e-13, "token_hessian_coeff/min": -15269888.0, "token_hessian_coeff/p25": -5.125999450683594e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0201416015625, "token_hessian_coeff/var": 6030137229312.0, "token_hessian_coeff_abs": 180575.078125, "token_hessian_coeff_abs/max": 113770496.0, "token_hessian_coeff_abs/median": 1.0622898116707802e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.266334533691406e-05, "token_hessian_coeff_abs/p99": 5025280.0, "token_hessian_coeff_abs/var": 5997678559232.0 }, { "accuracy_reward": 0.6458333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.23114033043384552, "adam_stats/lm_head/lr_effective_max": 4.6103446948109195e-05, "adam_stats/lm_head/lr_effective_mean": 9.345615253897321e-11, "adam_stats/lm_head/lr_effective_min": -4.594785423250869e-05, "adam_stats/lm_head/lr_effective_std": 1.1348461157467682e-06, "adam_stats/lr_effective_max": 5.81638487346936e-05, "adam_stats/lr_effective_mean": -1.2651128755702956e-10, "adam_stats/lr_effective_min": -5.401340240496211e-05, "adam_stats/m_t_max": 0.0007017127354629338, "adam_stats/m_t_mean": 3.4060148364906118e-12, "adam_stats/m_t_min": -0.0007777453283779323, "adam_stats/v_t_max": 2.5726199964992702e-05, "adam_stats/v_t_mean": 1.7418429979626504e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6458333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.23114033043384552, "all_logprobs": -0.017307724803686142, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.125, "all_logprobs/p1": -0.474609375, "all_logprobs/p10": -7.867813110351562e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.001356886699795723, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.03014829196035862, "clip_ratio": 0.0, "completion_length": 571.7083740234375, "completion_length/correct": 489.2903137207031, "completion_length/correct/max": 1010.0, "completion_length/correct/median": 436.0, "completion_length/correct/min": 188.0, "completion_length/correct/p25": 348.25, "completion_length/correct/p75": 638.25, "completion_length/correct/var": 27924.142578125, "completion_length/incorrect": 722.0, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 676.0, "completion_length/incorrect/min": 370.0, "completion_length/incorrect/p25": 460.25, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 62428.12890625, "completion_length/max": 1024.0, "completion_length/median": 519.0, "completion_length/min": 188.0, "completion_length/p25": 397.0, "completion_length/p75": 663.0, "completion_length/var": 52132.9296875, "curvature_clip_ratio_token_fisher": 0.012827053666114807, "curvature_clip_ratio_token_hessian": 0.009164784103631973, "curvature_clip_ratio_total_fisher": 0.012827053666114807, "curvature_clip_ratio_total_full": 0.012827053666114807, "curvature_clip_ratio_total_hessian": 0.009164784103631973, "epoch": 0.0688, "feature_vector_variance/max_squared_error": 57796.67578125, "feature_vector_variance/metric": 30038.380859375, "generated_tokens/total": 2542443.0, "global_fisher_curvature": 106496.0, "global_fisher_curvature/max": 106496.0, "global_fisher_curvature/median": 106496.0, "global_fisher_curvature/min": 106496.0, "global_fisher_curvature/p25": 106496.0, "global_fisher_curvature/p75": 106496.0, "global_fisher_curvature/p85": 106496.0, "global_fisher_curvature/p90": 106496.0, "global_fisher_curvature/p95": 106496.0, "global_fisher_curvature/p99": 106496.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.198883056640625e-06, "global_fisher_kl_divergence/max": 6.198883056640625e-06, "global_fisher_kl_divergence/median": 6.198883056640625e-06, "global_fisher_kl_divergence/min": 6.198883056640625e-06, "global_fisher_kl_divergence/p25": 6.198883056640625e-06, "global_fisher_kl_divergence/p75": 6.198883056640625e-06, "global_fisher_kl_divergence/p85": 6.198883056640625e-06, "global_fisher_kl_divergence/p90": 6.198883056640625e-06, "global_fisher_kl_divergence/p95": 6.198883056640625e-06, "global_fisher_kl_divergence/p99": 6.198883056640625e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0576171875, "global_full_update_term/max": 0.0576171875, "global_full_update_term/median": 0.0576171875, "global_full_update_term/min": 0.0576171875, "global_full_update_term/p25": 0.0576171875, "global_full_update_term/p75": 0.0576171875, "global_full_update_term/p85": 0.0576171875, "global_full_update_term/p90": 0.0576171875, "global_full_update_term/p95": 0.0576171875, "global_full_update_term/p99": 0.0576171875, "global_full_update_term/var": NaN, "global_hessian_coeff": 17024.0, "global_hessian_coeff/max": 17024.0, "global_hessian_coeff/median": 17024.0, "global_hessian_coeff/min": 17024.0, "global_hessian_coeff/p25": 17024.0, "global_hessian_coeff/p75": 17024.0, "global_hessian_coeff/p99": 17024.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 17024.0, "global_hessian_coeff_abs/max": 17024.0, "global_hessian_coeff_abs/median": 17024.0, "global_hessian_coeff_abs/min": 17024.0, "global_hessian_coeff_abs/p25": 17024.0, "global_hessian_coeff_abs/p75": 17024.0, "global_hessian_coeff_abs/p99": 17024.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04718586429953575, "learning_rate": 1.0550524823068504e-05, "loss": -0.6458, "masked_global_fisher_curvature": 480.0, "masked_global_fisher_curvature/max": 480.0, "masked_global_fisher_curvature/median": 480.0, "masked_global_fisher_curvature/min": 480.0, "masked_global_fisher_curvature/p25": 480.0, "masked_global_fisher_curvature/p75": 480.0, "masked_global_fisher_curvature/p85": 480.0, "masked_global_fisher_curvature/p90": 480.0, "masked_global_fisher_curvature/p95": 480.0, "masked_global_fisher_curvature/p99": 480.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/max": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/median": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/min": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/p25": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/p75": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/p85": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/p90": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/p95": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/p99": 2.7939677238464355e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00174713134765625, "masked_global_full_update_term/max": 0.00174713134765625, "masked_global_full_update_term/median": 0.00174713134765625, "masked_global_full_update_term/min": 0.00174713134765625, "masked_global_full_update_term/p25": 0.00174713134765625, "masked_global_full_update_term/p75": 0.00174713134765625, "masked_global_full_update_term/p85": 0.00174713134765625, "masked_global_full_update_term/p90": 0.00174713134765625, "masked_global_full_update_term/p95": 0.00174713134765625, "masked_global_full_update_term/p99": 0.00174713134765625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2064.0, "masked_global_hessian_coeff/max": -2064.0, "masked_global_hessian_coeff/median": -2064.0, "masked_global_hessian_coeff/min": -2064.0, "masked_global_hessian_coeff/p25": -2064.0, "masked_global_hessian_coeff/p75": -2064.0, "masked_global_hessian_coeff/p99": -2064.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2064.0, "masked_global_hessian_coeff_abs/max": 2064.0, "masked_global_hessian_coeff_abs/median": 2064.0, "masked_global_hessian_coeff_abs/min": 2064.0, "masked_global_hessian_coeff_abs/p25": 2064.0, "masked_global_hessian_coeff_abs/p75": 2064.0, "masked_global_hessian_coeff_abs/p99": 2064.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.5723470449447632, "masked_per_sentence_gradient_norm/max": 10.25, "masked_per_sentence_gradient_norm/median": 0.95703125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.078125, "masked_per_sentence_gradient_norm/var": 4.447239398956299, "masked_per_token_gradient_norm": 0.030792487785220146, "masked_per_token_gradient_norm/max": 12.0, "masked_per_token_gradient_norm/median": 4.5075054799781356e-14, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 4.4994521886110306e-08, "masked_per_token_gradient_norm/var": 0.1507696807384491, "masked_sentence_fisher_curvature": 341.55078125, "masked_sentence_fisher_curvature/max": 1256.0, "masked_sentence_fisher_curvature/median": 284.0, "masked_sentence_fisher_curvature/min": 17.5, "masked_sentence_fisher_curvature/p25": 189.5, "masked_sentence_fisher_curvature/p75": 426.0, "masked_sentence_fisher_curvature/p85": 532.0, "masked_sentence_fisher_curvature/p90": 668.0, "masked_sentence_fisher_curvature/p95": 912.0, "masked_sentence_fisher_curvature/p99": 1195.2001953125, "masked_sentence_fisher_curvature/var": 65958.2421875, "masked_sentence_fisher_kl_divergence": 1.988087205972988e-08, "masked_sentence_fisher_kl_divergence/max": 7.310882210731506e-08, "masked_sentence_fisher_kl_divergence/median": 1.6530975699424744e-08, "masked_sentence_fisher_kl_divergence/min": 1.0186340659856796e-09, "masked_sentence_fisher_kl_divergence/p25": 1.1030351743102074e-08, "masked_sentence_fisher_kl_divergence/p75": 2.4796463549137115e-08, "masked_sentence_fisher_kl_divergence/p85": 3.096647560596466e-08, "masked_sentence_fisher_kl_divergence/p90": 3.888271749019623e-08, "masked_sentence_fisher_kl_divergence/p95": 5.3085386753082275e-08, "masked_sentence_fisher_kl_divergence/p99": 6.956980769246002e-08, "masked_sentence_fisher_kl_divergence/var": 2.234752171031642e-16, "masked_sentence_full_gradient_variance/max_squared_error": 6.704244136810303, "masked_sentence_full_gradient_variance/metric": 6.704244136810303, "masked_sentence_full_gradient_variance/p75": 6.704244136810303, "masked_sentence_full_gradient_variance/p90": 6.704244136810303, "masked_sentence_full_gradient_variance/p95": 6.704244136810303, "masked_sentence_full_gradient_variance/p99": 6.704244136810303, "masked_sentence_full_update_term": 0.0010332863312214613, "masked_sentence_full_update_term/max": 0.0081787109375, "masked_sentence_full_update_term/median": 0.000560760498046875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0013332366943359375, "masked_sentence_full_update_term/p85": 0.00240325927734375, "masked_sentence_full_update_term/p90": 0.002899169921875, "masked_sentence_full_update_term/p95": 0.004119873046875, "masked_sentence_full_update_term/p99": 0.005221567116677761, "masked_sentence_full_update_term/var": 2.0743277673318516e-06, "masked_sentence_hessian_coeff": -9540.6669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -7616.0, "masked_sentence_hessian_coeff/min": -61184.0, "masked_sentence_hessian_coeff/p25": -13824.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 137398688.0, "masked_sentence_hessian_coeff_abs": 9540.6669921875, "masked_sentence_hessian_coeff_abs/max": 61184.0, "masked_sentence_hessian_coeff_abs/median": 7616.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 13824.0, "masked_sentence_hessian_coeff_abs/p99": 57536.01171875, "masked_sentence_hessian_coeff_abs/var": 137398688.0, "masked_token_fisher_curvature": 390.7585144042969, "masked_token_fisher_curvature/max": 171008.0, "masked_token_fisher_curvature/median": 1.4137996329210978e-16, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 5.657915389862709e-22, "masked_token_fisher_curvature/p75": 1.1169731806148775e-11, "masked_token_fisher_curvature/p85": 9.837094694375992e-09, "masked_token_fisher_curvature/p90": 1.5869736671447754e-06, "masked_token_fisher_curvature/p95": 0.02197265625, "masked_token_fisher_curvature/p99": 3750.75, "masked_token_fisher_curvature/var": 29631802.0, "masked_token_fisher_kl_divergence": 2.2745139105495582e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 8.229396963265595e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.2933402049021733e-32, "masked_token_fisher_kl_divergence/p75": 6.5016396146667975e-22, "masked_token_fisher_kl_divergence/p85": 5.72594272343907e-19, "masked_token_fisher_kl_divergence/p90": 9.237402509576498e-17, "masked_token_fisher_kl_divergence/p95": 1.2789769243681803e-12, "masked_token_fisher_kl_divergence/p99": 2.1832238417118788e-07, "masked_token_fisher_kl_divergence/var": 1.0039645032206349e-13, "masked_token_full_update_term": 1.2140043509134557e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.3336539268493652e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.5765166949677223e-14, "masked_token_full_update_term/p85": 4.466538250369467e-12, "masked_token_full_update_term/p90": 9.322320693172514e-11, "masked_token_full_update_term/p95": 1.0128132998943329e-08, "masked_token_full_update_term/p99": 8.153915405273438e-05, "masked_token_full_update_term/var": 2.4630214312537646e-08, "masked_token_hessian_coeff": -11594.2861328125, "masked_token_hessian_coeff/max": 205.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -4161536.0, "masked_token_hessian_coeff/p25": -1.773238182067871e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.005591154098510742, "masked_token_hessian_coeff/var": 23225317376.0, "masked_token_hessian_coeff_abs": 11594.3232421875, "masked_token_hessian_coeff_abs/max": 4161536.0, "masked_token_hessian_coeff_abs/median": 3.268496584496461e-12, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.1146068572998047e-05, "masked_token_hessian_coeff_abs/p99": 40448.0, "masked_token_hessian_coeff_abs/var": 23225317376.0, "mean_logprobs": -0.01544189453125, "mean_logprobs/var": 0.00083160400390625, "num_completions/total": 4128, "per_sentence_gradient_norm": 46.712242126464844, "per_sentence_gradient_norm/max": 251.0, "per_sentence_gradient_norm/median": 33.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 72.5, "per_sentence_gradient_norm/var": 2868.37890625, "per_token_feature_norm": 187.74853515625, "per_token_feature_norm/max": 258.0, "per_token_feature_norm/median": 188.0, "per_token_feature_norm/min": 104.0, "per_token_feature_norm/p25": 182.0, "per_token_feature_norm/p75": 194.0, "per_token_feature_norm/var": 142.52603149414062, "per_token_gradient_norm": 0.9449148774147034, "per_token_gradient_norm/max": 314.0, "per_token_gradient_norm/median": 7.815970093361102e-14, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 6.146728992462158e-08, "per_token_gradient_norm/var": 115.99261474609375, "per_token_policy_error_norm": 0.009293286129832268, "per_token_policy_error_norm/max": 1.9765625, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.008483395911753178, "policy_entropy": 0.01859702728688717, "policy_entropy/max": 2.171875, "policy_entropy/median": 3.812601789832115e-09, "policy_entropy/min": 1.2758433768017899e-20, "policy_entropy/p25": 1.2562395568238571e-11, "policy_entropy/p75": 5.923211574554443e-07, "policy_entropy/var": 0.014658691361546516, "policy_loss": -0.6458333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.23114033043384552, "policy_sharpness": 9.6002779006958, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.66874098777771, "reward": 0.6458333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.23114033043384552, "rewards/accuracy_reward": 0.6458333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.23114033043384552, "sentence_fisher_curvature": 229339.546875, "sentence_fisher_curvature/max": 974848.0, "sentence_fisher_curvature/median": 187392.0, "sentence_fisher_curvature/min": 41.25, "sentence_fisher_curvature/p25": 1588.0, "sentence_fisher_curvature/p75": 373760.0, "sentence_fisher_curvature/p85": 500736.0, "sentence_fisher_curvature/p90": 575488.0, "sentence_fisher_curvature/p95": 747520.0, "sentence_fisher_curvature/p99": 939827.3125, "sentence_fisher_curvature/var": 62296535040.0, "sentence_fisher_kl_divergence": 1.3349318578548264e-05, "sentence_fisher_kl_divergence/max": 5.6743621826171875e-05, "sentence_fisher_kl_divergence/median": 1.0907649993896484e-05, "sentence_fisher_kl_divergence/min": 2.4010660126805305e-09, "sentence_fisher_kl_divergence/p25": 9.243376553058624e-08, "sentence_fisher_kl_divergence/p75": 2.1755695343017578e-05, "sentence_fisher_kl_divergence/p85": 2.9146671295166016e-05, "sentence_fisher_kl_divergence/p90": 3.349781036376953e-05, "sentence_fisher_kl_divergence/p95": 4.3511390686035156e-05, "sentence_fisher_kl_divergence/p99": 5.470514952321537e-05, "sentence_fisher_kl_divergence/var": 2.1106887071464797e-10, "sentence_full_gradient_variance/max_squared_error": 4951.533203125, "sentence_full_gradient_variance/metric": 4951.533203125, "sentence_full_gradient_variance/p75": 4951.533203125, "sentence_full_gradient_variance/p90": 4951.533203125, "sentence_full_gradient_variance/p95": 4951.533203125, "sentence_full_gradient_variance/p99": 4951.533203125, "sentence_full_update_term": 0.036032360047101974, "sentence_full_update_term/max": 0.1279296875, "sentence_full_update_term/median": 0.03173828125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0576171875, "sentence_full_update_term/p85": 0.07568359375, "sentence_full_update_term/p90": 0.09619140625, "sentence_full_update_term/p95": 0.1092529296875, "sentence_full_update_term/p99": 0.12097170203924179, "sentence_full_update_term/var": 0.0013496967731043696, "sentence_hessian_coeff": -5836.0, "sentence_hessian_coeff/max": 450560.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -299008.0, "sentence_hessian_coeff/p25": -114560.0, "sentence_hessian_coeff/p75": 400.0, "sentence_hessian_coeff/p99": 392192.1875, "sentence_hessian_coeff/var": 18655272960.0, "sentence_hessian_coeff_abs": 89792.0, "sentence_hessian_coeff_abs/max": 450560.0, "sentence_hessian_coeff_abs/median": 67584.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 127488.0, "sentence_hessian_coeff_abs/p99": 392192.1875, "sentence_hessian_coeff_abs/var": 10542217216.0, "step": 43, "token_fisher_curvature": 195011.3125, "token_fisher_curvature/max": 122683392.0, "token_fisher_curvature/median": 1.8561541192951836e-16, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 6.766337410683766e-22, "token_fisher_curvature/p75": 1.921307557495311e-11, "token_fisher_curvature/p85": 2.4796463549137115e-08, "token_fisher_curvature/p90": 7.62939453125e-06, "token_fisher_curvature/p95": 1.26324462890625, "token_fisher_curvature/p99": 671744.0, "token_fisher_curvature/var": 9822723375104.0, "token_fisher_kl_divergence": 1.1351152352290228e-05, "token_fisher_kl_divergence/max": 0.00714111328125, "token_fisher_kl_divergence/median": 1.0804238957906978e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.9385267362718973e-32, "token_fisher_kl_divergence/p75": 1.1183481881716934e-21, "token_fisher_kl_divergence/p85": 1.4433441421213278e-18, "token_fisher_kl_divergence/p90": 4.440892098500626e-16, "token_fisher_kl_divergence/p95": 7.353051501013397e-11, "token_fisher_kl_divergence/p99": 3.910064697265625e-05, "token_fisher_kl_divergence/var": 3.32806813219122e-08, "token_full_update_term": 0.00041400964255444705, "token_full_update_term/max": 0.1259765625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.3336539268493652e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.6423307986078726e-14, "token_full_update_term/p85": 8.355982572538778e-12, "token_full_update_term/p90": 2.1100277081131935e-10, "token_full_update_term/p95": 5.2386894822120667e-08, "token_full_update_term/p99": 0.00830078125, "token_full_update_term/var": 2.3150520064518787e-05, "token_hessian_coeff": -17039.5390625, "token_hessian_coeff/max": 122159104.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -15859712.0, "token_hessian_coeff/p25": -2.428889274597168e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.01092529296875, "token_hessian_coeff/var": 6340879581184.0, "token_hessian_coeff_abs": 185935.328125, "token_hessian_coeff_abs/max": 122159104.0, "token_hessian_coeff_abs/median": 7.02016222930979e-12, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.537799835205078e-05, "token_hessian_coeff_abs/p99": 6094848.0, "token_hessian_coeff_abs/var": 6306597437440.0 }, { "accuracy_reward": 0.875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.11052631586790085, "adam_stats/lm_head/lr_effective_max": 4.965469997841865e-05, "adam_stats/lm_head/lr_effective_mean": 4.5422846040032994e-11, "adam_stats/lm_head/lr_effective_min": -4.997179712518118e-05, "adam_stats/lm_head/lr_effective_std": 1.1091202622992569e-06, "adam_stats/lr_effective_max": 5.2595620218198746e-05, "adam_stats/lr_effective_mean": -1.5315392010162299e-10, "adam_stats/lr_effective_min": -5.656556095345877e-05, "adam_stats/m_t_max": 0.0006622497458010912, "adam_stats/m_t_mean": 1.4284971859915285e-12, "adam_stats/m_t_min": -0.0006587720126844943, "adam_stats/v_t_max": 2.5700999685795978e-05, "adam_stats/v_t_mean": 1.7405035745987618e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.11052631586790085, "all_logprobs": -0.013433573767542839, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.625, "all_logprobs/p1": -0.3558398485183716, "all_logprobs/p10": -9.655952453613281e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00116729736328125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01942487619817257, "clip_ratio": 0.0, "completion_length": 469.60418701171875, "completion_length/correct": 436.3928527832031, "completion_length/correct/max": 896.0, "completion_length/correct/median": 412.0, "completion_length/correct/min": 233.0, "completion_length/correct/p25": 282.0, "completion_length/correct/p75": 517.75, "completion_length/correct/var": 29916.74609375, "completion_length/incorrect": 702.0833740234375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 674.0, "completion_length/incorrect/min": 441.0, "completion_length/incorrect/p25": 567.75, "completion_length/incorrect/p75": 778.0, "completion_length/incorrect/var": 35290.62890625, "completion_length/max": 1024.0, "completion_length/median": 430.0, "completion_length/min": 233.0, "completion_length/p25": 283.0, "completion_length/p75": 573.5, "completion_length/var": 38026.28125, "curvature_clip_ratio_token_fisher": 0.01561598852276802, "curvature_clip_ratio_token_hessian": 0.011756354942917824, "curvature_clip_ratio_total_fisher": 0.01561598852276802, "curvature_clip_ratio_total_full": 0.01561598852276802, "curvature_clip_ratio_total_hessian": 0.011756354942917824, "epoch": 0.0704, "feature_vector_variance/max_squared_error": 56830.38671875, "feature_vector_variance/metric": 30118.96484375, "generated_tokens/total": 2587525.0, "global_fisher_curvature": 120832.0, "global_fisher_curvature/max": 120832.0, "global_fisher_curvature/median": 120832.0, "global_fisher_curvature/min": 120832.0, "global_fisher_curvature/p25": 120832.0, "global_fisher_curvature/p75": 120832.0, "global_fisher_curvature/p85": 120832.0, "global_fisher_curvature/p90": 120832.0, "global_fisher_curvature/p95": 120832.0, "global_fisher_curvature/p99": 120832.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.735324859619141e-06, "global_fisher_kl_divergence/max": 6.735324859619141e-06, "global_fisher_kl_divergence/median": 6.735324859619141e-06, "global_fisher_kl_divergence/min": 6.735324859619141e-06, "global_fisher_kl_divergence/p25": 6.735324859619141e-06, "global_fisher_kl_divergence/p75": 6.735324859619141e-06, "global_fisher_kl_divergence/p85": 6.735324859619141e-06, "global_fisher_kl_divergence/p90": 6.735324859619141e-06, "global_fisher_kl_divergence/p95": 6.735324859619141e-06, "global_fisher_kl_divergence/p99": 6.735324859619141e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.07373046875, "global_full_update_term/max": 0.07373046875, "global_full_update_term/median": 0.07373046875, "global_full_update_term/min": 0.07373046875, "global_full_update_term/p25": 0.07373046875, "global_full_update_term/p75": 0.07373046875, "global_full_update_term/p85": 0.07373046875, "global_full_update_term/p90": 0.07373046875, "global_full_update_term/p95": 0.07373046875, "global_full_update_term/p99": 0.07373046875, "global_full_update_term/var": NaN, "global_hessian_coeff": 33536.0, "global_hessian_coeff/max": 33536.0, "global_hessian_coeff/median": 33536.0, "global_hessian_coeff/min": 33536.0, "global_hessian_coeff/p25": 33536.0, "global_hessian_coeff/p75": 33536.0, "global_hessian_coeff/p99": 33536.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 33536.0, "global_hessian_coeff_abs/max": 33536.0, "global_hessian_coeff_abs/median": 33536.0, "global_hessian_coeff_abs/min": 33536.0, "global_hessian_coeff_abs/p25": 33536.0, "global_hessian_coeff_abs/p75": 33536.0, "global_hessian_coeff_abs/p99": 33536.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.047927338629961014, "learning_rate": 1.0309549450619342e-05, "loss": -0.875, "masked_global_fisher_curvature": 572.0, "masked_global_fisher_curvature/max": 572.0, "masked_global_fisher_curvature/median": 572.0, "masked_global_fisher_curvature/min": 572.0, "masked_global_fisher_curvature/p25": 572.0, "masked_global_fisher_curvature/p75": 572.0, "masked_global_fisher_curvature/p85": 572.0, "masked_global_fisher_curvature/p90": 572.0, "masked_global_fisher_curvature/p95": 572.0, "masked_global_fisher_curvature/p99": 572.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/max": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/median": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/min": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/p25": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/p75": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/p85": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/p90": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/p95": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/p99": 3.189779818058014e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00604248046875, "masked_global_full_update_term/max": 0.00604248046875, "masked_global_full_update_term/median": 0.00604248046875, "masked_global_full_update_term/min": 0.00604248046875, "masked_global_full_update_term/p25": 0.00604248046875, "masked_global_full_update_term/p75": 0.00604248046875, "masked_global_full_update_term/p85": 0.00604248046875, "masked_global_full_update_term/p90": 0.00604248046875, "masked_global_full_update_term/p95": 0.00604248046875, "masked_global_full_update_term/p99": 0.00604248046875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -3648.0, "masked_global_hessian_coeff/max": -3648.0, "masked_global_hessian_coeff/median": -3648.0, "masked_global_hessian_coeff/min": -3648.0, "masked_global_hessian_coeff/p25": -3648.0, "masked_global_hessian_coeff/p75": -3648.0, "masked_global_hessian_coeff/p99": -3648.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 3648.0, "masked_global_hessian_coeff_abs/max": 3648.0, "masked_global_hessian_coeff_abs/median": 3648.0, "masked_global_hessian_coeff_abs/min": 3648.0, "masked_global_hessian_coeff_abs/p25": 3648.0, "masked_global_hessian_coeff_abs/p75": 3648.0, "masked_global_hessian_coeff_abs/p99": 3648.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.5362651348114014, "masked_per_sentence_gradient_norm/max": 10.0625, "masked_per_sentence_gradient_norm/median": 1.7109375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.59130859375, "masked_per_sentence_gradient_norm/p75": 4.8515625, "masked_per_sentence_gradient_norm/var": 5.911717891693115, "masked_per_token_gradient_norm": 0.04290400445461273, "masked_per_token_gradient_norm/max": 11.125, "masked_per_token_gradient_norm/median": 9.677023626863956e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 1.2836953722228372e-14, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 8.270144462585449e-07, "masked_per_token_gradient_norm/var": 0.20859825611114502, "masked_sentence_fisher_curvature": 360.87457275390625, "masked_sentence_fisher_curvature/max": 1064.0, "masked_sentence_fisher_curvature/median": 314.0, "masked_sentence_fisher_curvature/min": 0.0693359375, "masked_sentence_fisher_curvature/p25": 181.0, "masked_sentence_fisher_curvature/p75": 532.0, "masked_sentence_fisher_curvature/p85": 578.0, "masked_sentence_fisher_curvature/p90": 722.0, "masked_sentence_fisher_curvature/p95": 839.0, "masked_sentence_fisher_curvature/p99": 1064.0, "masked_sentence_fisher_curvature/var": 61942.0625, "masked_sentence_fisher_kl_divergence": 2.0084511831441887e-08, "masked_sentence_fisher_kl_divergence/max": 5.9138983488082886e-08, "masked_sentence_fisher_kl_divergence/median": 1.7462298274040222e-08, "masked_sentence_fisher_kl_divergence/min": 3.865352482534945e-12, "masked_sentence_fisher_kl_divergence/p25": 1.0069925338029861e-08, "masked_sentence_fisher_kl_divergence/p75": 2.9569491744041443e-08, "masked_sentence_fisher_kl_divergence/p85": 3.2247044146060944e-08, "masked_sentence_fisher_kl_divergence/p90": 4.016328603029251e-08, "masked_sentence_fisher_kl_divergence/p95": 4.674075171351433e-08, "masked_sentence_fisher_kl_divergence/p99": 5.9138983488082886e-08, "masked_sentence_fisher_kl_divergence/var": 1.9183423518403083e-16, "masked_sentence_full_gradient_variance/max_squared_error": 11.7964506149292, "masked_sentence_full_gradient_variance/metric": 11.7964506149292, "masked_sentence_full_gradient_variance/p75": 11.7964506149292, "masked_sentence_full_gradient_variance/p90": 11.7964506149292, "masked_sentence_full_gradient_variance/p95": 11.7964506149292, "masked_sentence_full_gradient_variance/p99": 11.7964506149292, "masked_sentence_full_update_term": 0.0015947173815220594, "masked_sentence_full_update_term/max": 0.007720947265625, "masked_sentence_full_update_term/median": 0.00113677978515625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0002918243408203125, "masked_sentence_full_update_term/p75": 0.0024871826171875, "masked_sentence_full_update_term/p85": 0.003276824951171875, "masked_sentence_full_update_term/p90": 0.003631591796875, "masked_sentence_full_update_term/p95": 0.005035400390625, "masked_sentence_full_update_term/p99": 0.0058654844760894775, "masked_sentence_full_update_term/var": 2.5340946194774006e-06, "masked_sentence_hessian_coeff": -12411.7607421875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -11328.0, "masked_sentence_hessian_coeff/min": -33536.0, "masked_sentence_hessian_coeff/p25": -18784.0, "masked_sentence_hessian_coeff/p75": -6088.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 81142608.0, "masked_sentence_hessian_coeff_abs": 12411.7607421875, "masked_sentence_hessian_coeff_abs/max": 33536.0, "masked_sentence_hessian_coeff_abs/median": 10816.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 6088.0, "masked_sentence_hessian_coeff_abs/p75": 18784.0, "masked_sentence_hessian_coeff_abs/p99": 31833.60546875, "masked_sentence_hessian_coeff_abs/var": 81142608.0, "masked_token_fisher_curvature": 458.39453125, "masked_token_fisher_curvature/max": 176128.0, "masked_token_fisher_curvature/median": 3.209238430557093e-16, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.034864306880448e-22, "masked_token_fisher_curvature/p75": 2.2168933355715126e-11, "masked_token_fisher_curvature/p85": 1.2456439435482025e-08, "masked_token_fisher_curvature/p90": 1.3679382391273975e-06, "masked_token_fisher_curvature/p95": 0.00946044921875, "masked_token_fisher_curvature/p99": 6080.0, "masked_token_fisher_curvature/var": 37477352.0, "masked_token_fisher_kl_divergence": 2.5517877588754345e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 1.7872432668687243e-26, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.131483842327501e-32, "masked_token_fisher_kl_divergence/p75": 1.2308447514789052e-21, "masked_token_fisher_kl_divergence/p85": 6.945670167485263e-19, "masked_token_fisher_kl_divergence/p90": 7.615503822173963e-17, "masked_token_fisher_kl_divergence/p95": 5.258016244624741e-13, "masked_token_fisher_kl_divergence/p99": 3.390014171600342e-07, "masked_token_fisher_kl_divergence/var": 1.1615281490535284e-13, "masked_token_full_update_term": 1.706247712718323e-05, "masked_token_full_update_term/max": 0.004180908203125, "masked_token_full_update_term/median": 9.269928574751063e-18, "masked_token_full_update_term/min": -2.175569534301758e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.3997691894473974e-12, "masked_token_full_update_term/p85": 4.6838977141305804e-11, "masked_token_full_update_term/p90": 5.966285243630409e-10, "masked_token_full_update_term/p95": 3.5157427191734314e-08, "masked_token_full_update_term/p99": 0.000385284423828125, "masked_token_full_update_term/var": 3.495058109592719e-08, "masked_token_hessian_coeff": -16982.09765625, "masked_token_hessian_coeff/max": 1080.0, "masked_token_hessian_coeff/median": -4.016328603029251e-09, "masked_token_hessian_coeff/min": -4227072.0, "masked_token_hessian_coeff/p25": -9.72747802734375e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.01019287109375, "masked_token_hessian_coeff/var": 35586220032.0, "masked_token_hessian_coeff_abs": 16982.1953125, "masked_token_hessian_coeff_abs/max": 4227072.0, "masked_token_hessian_coeff_abs/median": 1.4156103134155273e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 8.384404281969182e-13, "masked_token_hessian_coeff_abs/p75": 0.0002536773681640625, "masked_token_hessian_coeff_abs/p99": 399360.0, "masked_token_hessian_coeff_abs/var": 35586220032.0, "mean_logprobs": -0.01177978515625, "mean_logprobs/var": 8.726119995117188e-05, "num_completions/total": 4224, "per_sentence_gradient_norm": 50.41552734375, "per_sentence_gradient_norm/max": 258.0, "per_sentence_gradient_norm/median": 38.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 9.4375, "per_sentence_gradient_norm/p75": 59.875, "per_sentence_gradient_norm/var": 2869.037841796875, "per_token_feature_norm": 188.11322021484375, "per_token_feature_norm/max": 250.0, "per_token_feature_norm/median": 188.0, "per_token_feature_norm/min": 115.5, "per_token_feature_norm/p25": 182.0, "per_token_feature_norm/p75": 195.0, "per_token_feature_norm/var": 143.92782592773438, "per_token_gradient_norm": 1.311254858970642, "per_token_gradient_norm/max": 278.0, "per_token_gradient_norm/median": 1.2078089639544487e-09, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 1.6209256159527285e-14, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.087784767150879e-06, "per_token_gradient_norm/var": 174.08657836914062, "per_token_policy_error_norm": 0.007924188859760761, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.007416748441755772, "policy_entropy": 0.014416981488466263, "policy_entropy/max": 2.359375, "policy_entropy/median": 6.082700565457344e-09, "policy_entropy/min": 4.7645603283054394e-21, "policy_entropy/p25": 9.094947017729282e-12, "policy_entropy/p75": 8.940696716308594e-07, "policy_entropy/var": 0.008785088546574116, "policy_loss": -0.875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.11052631586790085, "policy_sharpness": 9.615716934204102, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.503441095352173, "reward": 0.875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.11052631586790085, "rewards/accuracy_reward": 0.875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.11052631586790085, "sentence_fisher_curvature": 291270.625, "sentence_fisher_curvature/max": 856064.0, "sentence_fisher_curvature/median": 288768.0, "sentence_fisher_curvature/min": 704.0, "sentence_fisher_curvature/p25": 43264.0, "sentence_fisher_curvature/p75": 462848.0, "sentence_fisher_curvature/p85": 556032.0, "sentence_fisher_curvature/p90": 647168.0, "sentence_fisher_curvature/p95": 712704.0, "sentence_fisher_curvature/p99": 828825.6875, "sentence_fisher_curvature/var": 60172931072.0, "sentence_fisher_kl_divergence": 1.6211804904742166e-05, "sentence_fisher_kl_divergence/max": 4.76837158203125e-05, "sentence_fisher_kl_divergence/median": 1.609325408935547e-05, "sentence_fisher_kl_divergence/min": 3.91155481338501e-08, "sentence_fisher_kl_divergence/p25": 2.4139881134033203e-06, "sentence_fisher_kl_divergence/p75": 2.574920654296875e-05, "sentence_fisher_kl_divergence/p85": 3.0934810638427734e-05, "sentence_fisher_kl_divergence/p90": 3.600120544433594e-05, "sentence_fisher_kl_divergence/p95": 3.975629806518555e-05, "sentence_fisher_kl_divergence/p99": 4.609823736245744e-05, "sentence_fisher_kl_divergence/var": 1.8649227995215512e-10, "sentence_full_gradient_variance/max_squared_error": 5302.37744140625, "sentence_full_gradient_variance/metric": 5302.37744140625, "sentence_full_gradient_variance/p75": 5302.37744140625, "sentence_full_gradient_variance/p90": 5302.37744140625, "sentence_full_gradient_variance/p95": 5302.37744140625, "sentence_full_gradient_variance/p99": 5302.37744140625, "sentence_full_update_term": 0.03620036691427231, "sentence_full_update_term/max": 0.134765625, "sentence_full_update_term/median": 0.02734375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00673675537109375, "sentence_full_update_term/p75": 0.05230712890625, "sentence_full_update_term/p85": 0.078369140625, "sentence_full_update_term/p90": 0.09130859375, "sentence_full_update_term/p95": 0.1123046875, "sentence_full_update_term/p99": 0.1227051168680191, "sentence_full_update_term/var": 0.001172729185782373, "sentence_hessian_coeff": 34164.5, "sentence_hessian_coeff/max": 663552.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -247808.0, "sentence_hessian_coeff/p25": -75776.0, "sentence_hessian_coeff/p75": 125440.0, "sentence_hessian_coeff/p99": 410624.8125, "sentence_hessian_coeff/var": 24993468416.0, "sentence_hessian_coeff_abs": 118742.171875, "sentence_hessian_coeff_abs/max": 663552.0, "sentence_hessian_coeff_abs/median": 91136.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 55040.0, "sentence_hessian_coeff_abs/p75": 160768.0, "sentence_hessian_coeff_abs/p99": 410624.8125, "sentence_hessian_coeff_abs/var": 11924847616.0, "step": 44, "token_fisher_curvature": 300256.53125, "token_fisher_curvature/max": 122683392.0, "token_fisher_curvature/median": 4.891920202254596e-16, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.539444480537795e-22, "token_fisher_curvature/p75": 4.297362465877086e-11, "token_fisher_curvature/p85": 3.3993273973464966e-08, "token_fisher_curvature/p90": 8.404254913330078e-06, "token_fisher_curvature/p95": 0.998809814453125, "token_fisher_curvature/p99": 2410880.0, "token_fisher_curvature/var": 16768756088832.0, "token_fisher_kl_divergence": 1.6711732314433903e-05, "token_fisher_kl_divergence/max": 0.0068359375, "token_fisher_kl_divergence/median": 2.726303288443817e-26, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.4107436842636503e-32, "token_fisher_kl_divergence/p75": 2.395515053953568e-21, "token_fisher_kl_divergence/p85": 1.8973538018496328e-18, "token_fisher_kl_divergence/p90": 4.683753385137379e-16, "token_fisher_kl_divergence/p95": 5.5637272566855245e-11, "token_fisher_kl_divergence/p99": 0.00013442710041999817, "token_fisher_kl_divergence/var": 5.193559715621632e-08, "token_full_update_term": 0.000571738462895155, "token_full_update_term/max": 0.12353515625, "token_full_update_term/median": 1.360673726469308e-17, "token_full_update_term/min": -2.175569534301758e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.0321522242738865e-12, "token_full_update_term/p85": 8.503775461576879e-11, "token_full_update_term/p90": 1.4115357771515846e-09, "token_full_update_term/p95": 3.259629011154175e-07, "token_full_update_term/p99": 0.01569509506225586, "token_full_update_term/var": 3.428231502766721e-05, "token_hessian_coeff": 15914.44921875, "token_hessian_coeff/max": 120061952.0, "token_hessian_coeff/median": -4.889443516731262e-09, "token_hessian_coeff/min": -15925248.0, "token_hessian_coeff/p25": -0.00012874603271484375, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.026658058166503906, "token_hessian_coeff/var": 10409359704064.0, "token_hessian_coeff_abs": 273879.15625, "token_hessian_coeff_abs/max": 120061952.0, "token_hessian_coeff_abs/median": 1.7881393432617188e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 1.1013412404281553e-12, "token_hessian_coeff_abs/p75": 0.0003490447998046875, "token_hessian_coeff_abs/p99": 8781824.0, "token_hessian_coeff_abs/var": 10334601478144.0 }, { "accuracy_reward": 0.8333333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.14035087823867798, "adam_stats/lm_head/lr_effective_max": 4.4328731746645644e-05, "adam_stats/lm_head/lr_effective_mean": 7.425971296215472e-12, "adam_stats/lm_head/lr_effective_min": -4.5952638174640015e-05, "adam_stats/lm_head/lr_effective_std": 1.089816009880451e-06, "adam_stats/lr_effective_max": 5.377836350817233e-05, "adam_stats/lr_effective_mean": -1.2985189312697543e-10, "adam_stats/lr_effective_min": -5.5508633522549644e-05, "adam_stats/m_t_max": 0.0005747399991378188, "adam_stats/m_t_mean": 2.735010026615192e-13, "adam_stats/m_t_min": -0.0005548494518734515, "adam_stats/v_t_max": 2.5675308279460296e-05, "adam_stats/v_t_mean": 1.7397946147981735e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8333333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.14035087823867798, "all_logprobs": -0.009695012122392654, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.75, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -2.6226043701171875e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0001277923583984375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.016530295833945274, "clip_ratio": 0.0, "completion_length": 494.3958435058594, "completion_length/correct": 455.3374938964844, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 434.0, "completion_length/correct/min": 226.0, "completion_length/correct/p25": 253.0, "completion_length/correct/p75": 554.0, "completion_length/correct/var": 36714.53125, "completion_length/incorrect": 689.6875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 625.0, "completion_length/incorrect/min": 385.0, "completion_length/incorrect/p25": 610.0, "completion_length/incorrect/p75": 707.75, "completion_length/incorrect/var": 30415.962890625, "completion_length/max": 1024.0, "completion_length/median": 460.0, "completion_length/min": 226.0, "completion_length/p25": 285.75, "completion_length/p75": 599.5, "completion_length/var": 43041.6171875, "curvature_clip_ratio_token_fisher": 0.01409548707306385, "curvature_clip_ratio_token_hessian": 0.011061481200158596, "curvature_clip_ratio_total_fisher": 0.01409548707306385, "curvature_clip_ratio_total_full": 0.01409548707306385, "curvature_clip_ratio_total_hessian": 0.011061481200158596, "epoch": 0.072, "feature_vector_variance/max_squared_error": 53845.44140625, "feature_vector_variance/metric": 30219.564453125, "generated_tokens/total": 2634987.0, "global_fisher_curvature": 129024.0, "global_fisher_curvature/max": 129024.0, "global_fisher_curvature/median": 129024.0, "global_fisher_curvature/min": 129024.0, "global_fisher_curvature/p25": 129024.0, "global_fisher_curvature/p75": 129024.0, "global_fisher_curvature/p85": 129024.0, "global_fisher_curvature/p90": 129024.0, "global_fisher_curvature/p95": 129024.0, "global_fisher_curvature/p99": 129024.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.854534149169922e-06, "global_fisher_kl_divergence/max": 6.854534149169922e-06, "global_fisher_kl_divergence/median": 6.854534149169922e-06, "global_fisher_kl_divergence/min": 6.854534149169922e-06, "global_fisher_kl_divergence/p25": 6.854534149169922e-06, "global_fisher_kl_divergence/p75": 6.854534149169922e-06, "global_fisher_kl_divergence/p85": 6.854534149169922e-06, "global_fisher_kl_divergence/p90": 6.854534149169922e-06, "global_fisher_kl_divergence/p95": 6.854534149169922e-06, "global_fisher_kl_divergence/p99": 6.854534149169922e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.12060546875, "global_full_update_term/max": 0.12060546875, "global_full_update_term/median": 0.12060546875, "global_full_update_term/min": 0.12060546875, "global_full_update_term/p25": 0.12060546875, "global_full_update_term/p75": 0.12060546875, "global_full_update_term/p85": 0.12060546875, "global_full_update_term/p90": 0.12060546875, "global_full_update_term/p95": 0.12060546875, "global_full_update_term/p99": 0.12060546875, "global_full_update_term/var": NaN, "global_hessian_coeff": 14912.0, "global_hessian_coeff/max": 14912.0, "global_hessian_coeff/median": 14912.0, "global_hessian_coeff/min": 14912.0, "global_hessian_coeff/p25": 14912.0, "global_hessian_coeff/p75": 14912.0, "global_hessian_coeff/p99": 14912.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 14912.0, "global_hessian_coeff_abs/max": 14912.0, "global_hessian_coeff_abs/median": 14912.0, "global_hessian_coeff_abs/min": 14912.0, "global_hessian_coeff_abs/p25": 14912.0, "global_hessian_coeff_abs/p75": 14912.0, "global_hessian_coeff_abs/p99": 14912.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05772220343351364, "learning_rate": 1.0065151074942516e-05, "loss": -0.8333, "masked_global_fisher_curvature": 360.0, "masked_global_fisher_curvature/max": 360.0, "masked_global_fisher_curvature/median": 360.0, "masked_global_fisher_curvature/min": 360.0, "masked_global_fisher_curvature/p25": 360.0, "masked_global_fisher_curvature/p75": 360.0, "masked_global_fisher_curvature/p85": 360.0, "masked_global_fisher_curvature/p90": 360.0, "masked_global_fisher_curvature/p95": 360.0, "masked_global_fisher_curvature/p99": 360.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/max": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/median": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/min": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/p25": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/p75": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/p85": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/p90": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/p95": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/p99": 1.909211277961731e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00341796875, "masked_global_full_update_term/max": 0.00341796875, "masked_global_full_update_term/median": 0.00341796875, "masked_global_full_update_term/min": 0.00341796875, "masked_global_full_update_term/p25": 0.00341796875, "masked_global_full_update_term/p75": 0.00341796875, "masked_global_full_update_term/p85": 0.00341796875, "masked_global_full_update_term/p90": 0.00341796875, "masked_global_full_update_term/p95": 0.00341796875, "masked_global_full_update_term/p99": 0.00341796875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -3600.0, "masked_global_hessian_coeff/max": -3600.0, "masked_global_hessian_coeff/median": -3600.0, "masked_global_hessian_coeff/min": -3600.0, "masked_global_hessian_coeff/p25": -3600.0, "masked_global_hessian_coeff/p75": -3600.0, "masked_global_hessian_coeff/p99": -3600.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 3600.0, "masked_global_hessian_coeff_abs/max": 3600.0, "masked_global_hessian_coeff_abs/median": 3600.0, "masked_global_hessian_coeff_abs/min": 3600.0, "masked_global_hessian_coeff_abs/p25": 3600.0, "masked_global_hessian_coeff_abs/p75": 3600.0, "masked_global_hessian_coeff_abs/p99": 3600.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.0817363262176514, "masked_per_sentence_gradient_norm/max": 7.625, "masked_per_sentence_gradient_norm/median": 1.609375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.28857421875, "masked_per_sentence_gradient_norm/p75": 3.09375, "masked_per_sentence_gradient_norm/var": 3.681135416030884, "masked_per_token_gradient_norm": 0.04262707009911537, "masked_per_token_gradient_norm/max": 11.125, "masked_per_token_gradient_norm/median": 3.6925484891980886e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 2.2898349882893854e-16, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 4.0046870708465576e-07, "masked_per_token_gradient_norm/var": 0.22837205231189728, "masked_sentence_fisher_curvature": 320.9375, "masked_sentence_fisher_curvature/max": 1096.0, "masked_sentence_fisher_curvature/median": 306.0, "masked_sentence_fisher_curvature/min": 21.875, "masked_sentence_fisher_curvature/p25": 112.0, "masked_sentence_fisher_curvature/p75": 492.0, "masked_sentence_fisher_curvature/p85": 540.0, "masked_sentence_fisher_curvature/p90": 624.0, "masked_sentence_fisher_curvature/p95": 748.0, "masked_sentence_fisher_curvature/p99": 769.2010498046875, "masked_sentence_fisher_curvature/var": 55615.29296875, "masked_sentence_fisher_kl_divergence": 1.706568397707997e-08, "masked_sentence_fisher_kl_divergence/max": 5.820766091346741e-08, "masked_sentence_fisher_kl_divergence/median": 1.6298145055770874e-08, "masked_sentence_fisher_kl_divergence/min": 1.1641532182693481e-09, "masked_sentence_fisher_kl_divergence/p25": 5.951733328402042e-09, "masked_sentence_fisher_kl_divergence/p75": 2.61643435806036e-08, "masked_sentence_fisher_kl_divergence/p85": 2.87545844912529e-08, "masked_sentence_fisher_kl_divergence/p90": 3.317836672067642e-08, "masked_sentence_fisher_kl_divergence/p95": 3.9814040064811707e-08, "masked_sentence_fisher_kl_divergence/p99": 4.095496564104906e-08, "masked_sentence_fisher_kl_divergence/var": 1.5728227895787022e-16, "masked_sentence_full_gradient_variance/max_squared_error": 7.601415634155273, "masked_sentence_full_gradient_variance/metric": 7.601415634155273, "masked_sentence_full_gradient_variance/p75": 7.601415634155273, "masked_sentence_full_gradient_variance/p90": 7.601415634155273, "masked_sentence_full_gradient_variance/p95": 7.601415634155273, "masked_sentence_full_gradient_variance/p99": 7.601415634155273, "masked_sentence_full_update_term": 0.0011997472029179335, "masked_sentence_full_update_term/max": 0.0040283203125, "masked_sentence_full_update_term/median": 0.000926971435546875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00016188621520996094, "masked_sentence_full_update_term/p75": 0.0019741058349609375, "masked_sentence_full_update_term/p85": 0.00254058837890625, "masked_sentence_full_update_term/p90": 0.002593994140625, "masked_sentence_full_update_term/p95": 0.003326416015625, "masked_sentence_full_update_term/p99": 0.0037818916607648134, "masked_sentence_full_update_term/var": 1.185764858746552e-06, "masked_sentence_hessian_coeff": -12071.1669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -12608.0, "masked_sentence_hessian_coeff/min": -41216.0, "masked_sentence_hessian_coeff/p25": -18304.0, "masked_sentence_hessian_coeff/p75": -2928.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 81742256.0, "masked_sentence_hessian_coeff_abs": 12071.1669921875, "masked_sentence_hessian_coeff_abs/max": 41216.0, "masked_sentence_hessian_coeff_abs/median": 12160.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 2928.0, "masked_sentence_hessian_coeff_abs/p75": 18304.0, "masked_sentence_hessian_coeff_abs/p99": 29299.23828125, "masked_sentence_hessian_coeff_abs/var": 81742256.0, "masked_token_fisher_curvature": 420.6796569824219, "masked_token_fisher_curvature/max": 185344.0, "masked_token_fisher_curvature/median": 5.984795992119984e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 8.271806125530277e-23, "masked_token_fisher_curvature/p75": 5.6274984672199935e-12, "masked_token_fisher_curvature/p85": 1.9063008949160576e-09, "masked_token_fisher_curvature/p90": 1.395128492731601e-07, "masked_token_fisher_curvature/p95": 0.0005012452602386475, "masked_token_fisher_curvature/p99": 2024.0, "masked_token_fisher_curvature/var": 40410256.0, "masked_token_fisher_kl_divergence": 2.2355711948307544e-08, "masked_token_fisher_kl_divergence/max": 9.834766387939453e-06, "masked_token_fisher_kl_divergence/median": 3.1806871698511196e-27, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.405564747785802e-33, "masked_token_fisher_kl_divergence/p75": 2.99439381744196e-22, "masked_token_fisher_kl_divergence/p85": 1.0122043719688889e-19, "masked_token_fisher_kl_divergence/p90": 7.415985211448213e-18, "masked_token_fisher_kl_divergence/p95": 2.662280118581606e-14, "masked_token_fisher_kl_divergence/p99": 1.0756775736808777e-07, "masked_token_fisher_kl_divergence/var": 1.1413606333925824e-13, "masked_token_full_update_term": 1.6474792573717423e-05, "masked_token_full_update_term/max": 0.004180908203125, "masked_token_full_update_term/median": 2.290377089375628e-18, "masked_token_full_update_term/min": -2.7120113372802734e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.613287612504791e-13, "masked_token_full_update_term/p85": 3.296918293926865e-11, "masked_token_full_update_term/p90": 4.474713932722807e-10, "masked_token_full_update_term/p95": 4.6100467443466187e-08, "masked_token_full_update_term/p99": 0.0003569871187210083, "masked_token_full_update_term/var": 3.577003937493828e-08, "masked_token_hessian_coeff": -16840.14453125, "masked_token_hessian_coeff/max": 195.0, "masked_token_hessian_coeff/median": -1.3169483281672e-09, "masked_token_hessian_coeff/min": -4423680.0, "masked_token_hessian_coeff/p25": -4.4345855712890625e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.019287109375, "masked_token_hessian_coeff/var": 38392274944.0, "masked_token_hessian_coeff_abs": 16840.16796875, "masked_token_hessian_coeff_abs/max": 4423680.0, "masked_token_hessian_coeff_abs/median": 6.938353180885315e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 5.079270337660091e-15, "masked_token_hessian_coeff_abs/p75": 0.00014400482177734375, "masked_token_hessian_coeff_abs/p99": 354304.0, "masked_token_hessian_coeff_abs/var": 38392274944.0, "mean_logprobs": -0.00970458984375, "mean_logprobs/var": 4.172325134277344e-05, "num_completions/total": 4320, "per_sentence_gradient_norm": 45.6083984375, "per_sentence_gradient_norm/max": 210.0, "per_sentence_gradient_norm/median": 35.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 13.09375, "per_sentence_gradient_norm/p75": 74.0, "per_sentence_gradient_norm/var": 1863.6942138671875, "per_token_feature_norm": 188.53651428222656, "per_token_feature_norm/max": 249.0, "per_token_feature_norm/median": 189.0, "per_token_feature_norm/min": 107.0, "per_token_feature_norm/p25": 183.0, "per_token_feature_norm/p75": 195.0, "per_token_feature_norm/var": 137.23934936523438, "per_token_gradient_norm": 1.181165099143982, "per_token_gradient_norm/max": 286.0, "per_token_gradient_norm/median": 4.4929038267582655e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 3.3480163086352377e-16, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 5.476176738739014e-07, "per_token_gradient_norm/var": 155.73207092285156, "per_token_policy_error_norm": 0.005550822243094444, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0053629144094884396, "policy_entropy": 0.01015824917703867, "policy_entropy/max": 1.859375, "policy_entropy/median": 2.764863893389702e-09, "policy_entropy/min": 5.166900978251232e-20, "policy_entropy/p25": 5.6061821851471905e-12, "policy_entropy/p75": 3.7997961044311523e-07, "policy_entropy/var": 0.00542935635894537, "policy_loss": -0.8333333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.14035087823867798, "policy_sharpness": 9.71589469909668, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8521530628204346, "reward": 0.8333333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.14035087823867798, "rewards/accuracy_reward": 0.8333333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.14035087823867798, "sentence_fisher_curvature": 286682.75, "sentence_fisher_curvature/max": 1261568.0, "sentence_fisher_curvature/median": 258048.0, "sentence_fisher_curvature/min": 27.75, "sentence_fisher_curvature/p25": 32512.0, "sentence_fisher_curvature/p75": 427520.0, "sentence_fisher_curvature/p85": 520704.0, "sentence_fisher_curvature/p90": 593920.0, "sentence_fisher_curvature/p95": 775168.0, "sentence_fisher_curvature/p99": 1175961.875, "sentence_fisher_curvature/var": 71680548864.0, "sentence_fisher_kl_divergence": 1.5245156646415126e-05, "sentence_fisher_kl_divergence/max": 6.723403930664062e-05, "sentence_fisher_kl_divergence/median": 1.3709068298339844e-05, "sentence_fisher_kl_divergence/min": 1.4770193956792355e-09, "sentence_fisher_kl_divergence/p25": 1.7285346984863281e-06, "sentence_fisher_kl_divergence/p75": 2.2709369659423828e-05, "sentence_fisher_kl_divergence/p85": 2.7686357498168945e-05, "sentence_fisher_kl_divergence/p90": 3.159046173095703e-05, "sentence_fisher_kl_divergence/p95": 4.124641418457031e-05, "sentence_fisher_kl_divergence/p99": 6.270410085562617e-05, "sentence_fisher_kl_divergence/var": 2.0294559088807063e-10, "sentence_full_gradient_variance/max_squared_error": 3856.906982421875, "sentence_full_gradient_variance/metric": 3856.906982421875, "sentence_full_gradient_variance/p75": 3856.906982421875, "sentence_full_gradient_variance/p90": 3856.906982421875, "sentence_full_gradient_variance/p95": 3856.906982421875, "sentence_full_gradient_variance/p99": 3856.906982421875, "sentence_full_update_term": 0.03519042581319809, "sentence_full_update_term/max": 0.1279296875, "sentence_full_update_term/median": 0.030517578125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00791168212890625, "sentence_full_update_term/p75": 0.05169677734375, "sentence_full_update_term/p85": 0.0631103515625, "sentence_full_update_term/p90": 0.076171875, "sentence_full_update_term/p95": 0.0985107421875, "sentence_full_update_term/p99": 0.12282716482877731, "sentence_full_update_term/var": 0.0009661341900937259, "sentence_hessian_coeff": 24541.333984375, "sentence_hessian_coeff/max": 778240.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -288768.0, "sentence_hessian_coeff/p25": -79872.0, "sentence_hessian_coeff/p75": 95232.0, "sentence_hessian_coeff/p99": 743219.3125, "sentence_hessian_coeff/var": 33116485632.0, "sentence_hessian_coeff_abs": 122072.0, "sentence_hessian_coeff_abs/max": 778240.0, "sentence_hessian_coeff_abs/median": 79872.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 41472.0, "sentence_hessian_coeff_abs/p75": 145408.0, "sentence_hessian_coeff_abs/p99": 743219.3125, "sentence_hessian_coeff_abs/var": 18666668032.0, "step": 45, "token_fisher_curvature": 267586.125, "token_fisher_curvature/max": 125304832.0, "token_fisher_curvature/median": 8.370040771588094e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 9.678013166870424e-23, "token_fisher_curvature/p75": 9.606537787476555e-12, "token_fisher_curvature/p85": 4.64797267341055e-09, "token_fisher_curvature/p90": 6.668269634246826e-07, "token_fisher_curvature/p95": 0.01360464096069336, "token_fisher_curvature/p99": 1638400.0, "token_fisher_curvature/var": 15565477380096.0, "token_fisher_kl_divergence": 1.4221360288502183e-05, "token_fisher_kl_divergence/max": 0.00665283203125, "token_fisher_kl_divergence/median": 4.4428646182047385e-27, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.151862601235856e-33, "token_fisher_kl_divergence/p75": 5.0954325733266505e-22, "token_fisher_kl_divergence/p85": 2.4683069478582346e-19, "token_fisher_kl_divergence/p90": 3.5344990823027445e-17, "token_fisher_kl_divergence/p95": 7.24392767992299e-13, "token_fisher_kl_divergence/p99": 8.726119995117188e-05, "token_fisher_kl_divergence/var": 4.397393027488761e-08, "token_full_update_term": 0.0005025138380005956, "token_full_update_term/max": 0.12109375, "token_full_update_term/median": 3.4558944247975454e-18, "token_full_update_term/min": -2.7120113372802734e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 8.348877145181177e-13, "token_full_update_term/p85": 5.771916278263234e-11, "token_full_update_term/p90": 1.0040821507573128e-09, "token_full_update_term/p95": 2.421438694000244e-07, "token_full_update_term/p99": 0.01263427734375, "token_full_update_term/var": 2.922235034930054e-05, "token_hessian_coeff": 7521.01220703125, "token_hessian_coeff/max": 123207680.0, "token_hessian_coeff/median": -1.57160684466362e-09, "token_hessian_coeff/min": -15925248.0, "token_hessian_coeff/p25": -5.698204040527344e-05, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.048583984375, "token_hessian_coeff/var": 10259839057920.0, "token_hessian_coeff_abs": 247779.71875, "token_hessian_coeff_abs/max": 123207680.0, "token_hessian_coeff_abs/median": 8.335337042808533e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 7.618905506490137e-15, "token_hessian_coeff_abs/p75": 0.0001983642578125, "token_hessian_coeff_abs/p99": 7418368.0, "token_hessian_coeff_abs/var": 10198499459072.0 }, { "accuracy_reward": 0.5104166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.25252190232276917, "adam_stats/lm_head/lr_effective_max": 4.1992348997155204e-05, "adam_stats/lm_head/lr_effective_mean": 1.9240085219474068e-11, "adam_stats/lm_head/lr_effective_min": -5.0253584049642086e-05, "adam_stats/lm_head/lr_effective_std": 1.046113084157696e-06, "adam_stats/lr_effective_max": 4.9033544200938195e-05, "adam_stats/lr_effective_mean": -1.052789036504187e-10, "adam_stats/lr_effective_min": -5.056898953625932e-05, "adam_stats/m_t_max": 0.0006008078926242888, "adam_stats/m_t_mean": -4.325560742923784e-12, "adam_stats/m_t_min": -0.0005947319441474974, "adam_stats/v_t_max": 2.5650995667092502e-05, "adam_stats/v_t_mean": 1.7383108841251271e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5104166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.25252190232276917, "all_logprobs": -0.008540232665836811, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.0, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -2.1457672119140625e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -9.632110595703125e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012482360005378723, "clip_ratio": 0.0, "completion_length": 542.8958740234375, "completion_length/correct": 572.3673095703125, "completion_length/correct/max": 1022.0, "completion_length/correct/median": 566.0, "completion_length/correct/min": 254.0, "completion_length/correct/p25": 318.0, "completion_length/correct/p75": 749.0, "completion_length/correct/var": 60077.32421875, "completion_length/incorrect": 512.1702270507812, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 326.0, "completion_length/incorrect/min": 243.0, "completion_length/incorrect/p25": 289.5, "completion_length/incorrect/p75": 748.5, "completion_length/incorrect/var": 91783.8515625, "completion_length/max": 1024.0, "completion_length/median": 499.0, "completion_length/min": 243.0, "completion_length/p25": 315.0, "completion_length/p75": 749.75, "completion_length/var": 75712.6171875, "curvature_clip_ratio_token_fisher": 0.008058636449277401, "curvature_clip_ratio_token_hessian": 0.006197474896907806, "curvature_clip_ratio_total_fisher": 0.008058636449277401, "curvature_clip_ratio_total_full": 0.008058636449277401, "curvature_clip_ratio_total_hessian": 0.006197474896907806, "epoch": 0.0736, "feature_vector_variance/max_squared_error": 57147.55859375, "feature_vector_variance/metric": 30317.2421875, "generated_tokens/total": 2687105.0, "global_fisher_curvature": 84992.0, "global_fisher_curvature/max": 84992.0, "global_fisher_curvature/median": 84992.0, "global_fisher_curvature/min": 84992.0, "global_fisher_curvature/p25": 84992.0, "global_fisher_curvature/p75": 84992.0, "global_fisher_curvature/p85": 84992.0, "global_fisher_curvature/p90": 84992.0, "global_fisher_curvature/p95": 84992.0, "global_fisher_curvature/p99": 84992.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.291534423828125e-06, "global_fisher_kl_divergence/max": 4.291534423828125e-06, "global_fisher_kl_divergence/median": 4.291534423828125e-06, "global_fisher_kl_divergence/min": 4.291534423828125e-06, "global_fisher_kl_divergence/p25": 4.291534423828125e-06, "global_fisher_kl_divergence/p75": 4.291534423828125e-06, "global_fisher_kl_divergence/p85": 4.291534423828125e-06, "global_fisher_kl_divergence/p90": 4.291534423828125e-06, "global_fisher_kl_divergence/p95": 4.291534423828125e-06, "global_fisher_kl_divergence/p99": 4.291534423828125e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.099609375, "global_full_update_term/max": 0.099609375, "global_full_update_term/median": 0.099609375, "global_full_update_term/min": 0.099609375, "global_full_update_term/p25": 0.099609375, "global_full_update_term/p75": 0.099609375, "global_full_update_term/p85": 0.099609375, "global_full_update_term/p90": 0.099609375, "global_full_update_term/p95": 0.099609375, "global_full_update_term/p99": 0.099609375, "global_full_update_term/var": NaN, "global_hessian_coeff": 18432.0, "global_hessian_coeff/max": 18432.0, "global_hessian_coeff/median": 18432.0, "global_hessian_coeff/min": 18432.0, "global_hessian_coeff/p25": 18432.0, "global_hessian_coeff/p75": 18432.0, "global_hessian_coeff/p99": 18432.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 18432.0, "global_hessian_coeff_abs/max": 18432.0, "global_hessian_coeff_abs/median": 18432.0, "global_hessian_coeff_abs/min": 18432.0, "global_hessian_coeff_abs/p25": 18432.0, "global_hessian_coeff_abs/p75": 18432.0, "global_hessian_coeff_abs/p99": 18432.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04753457009792328, "learning_rate": 9.817627457812105e-06, "loss": -0.5104, "masked_global_fisher_curvature": 528.0, "masked_global_fisher_curvature/max": 528.0, "masked_global_fisher_curvature/median": 528.0, "masked_global_fisher_curvature/min": 528.0, "masked_global_fisher_curvature/p25": 528.0, "masked_global_fisher_curvature/p75": 528.0, "masked_global_fisher_curvature/p85": 528.0, "masked_global_fisher_curvature/p90": 528.0, "masked_global_fisher_curvature/p95": 528.0, "masked_global_fisher_curvature/p99": 528.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/max": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/median": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/min": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p25": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p75": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p85": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p90": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p95": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/p99": 2.6775524020195007e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00194549560546875, "masked_global_full_update_term/max": 0.00194549560546875, "masked_global_full_update_term/median": 0.00194549560546875, "masked_global_full_update_term/min": 0.00194549560546875, "masked_global_full_update_term/p25": 0.00194549560546875, "masked_global_full_update_term/p75": 0.00194549560546875, "masked_global_full_update_term/p85": 0.00194549560546875, "masked_global_full_update_term/p90": 0.00194549560546875, "masked_global_full_update_term/p95": 0.00194549560546875, "masked_global_full_update_term/p99": 0.00194549560546875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -3200.0, "masked_global_hessian_coeff/max": -3200.0, "masked_global_hessian_coeff/median": -3200.0, "masked_global_hessian_coeff/min": -3200.0, "masked_global_hessian_coeff/p25": -3200.0, "masked_global_hessian_coeff/p75": -3200.0, "masked_global_hessian_coeff/p99": -3200.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 3200.0, "masked_global_hessian_coeff_abs/max": 3200.0, "masked_global_hessian_coeff_abs/median": 3200.0, "masked_global_hessian_coeff_abs/min": 3200.0, "masked_global_hessian_coeff_abs/p25": 3200.0, "masked_global_hessian_coeff_abs/p75": 3200.0, "masked_global_hessian_coeff_abs/p99": 3200.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.22698974609375, "masked_per_sentence_gradient_norm/max": 6.375, "masked_per_sentence_gradient_norm/median": 0.29296875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.12890625, "masked_per_sentence_gradient_norm/var": 2.29858660697937, "masked_per_token_gradient_norm": 0.032856810837984085, "masked_per_token_gradient_norm/max": 13.5625, "masked_per_token_gradient_norm/median": 3.802513859341161e-15, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.1757947504520416e-08, "masked_per_token_gradient_norm/var": 0.1875409185886383, "masked_sentence_fisher_curvature": 346.6468200683594, "masked_sentence_fisher_curvature/max": 1048.0, "masked_sentence_fisher_curvature/median": 243.0, "masked_sentence_fisher_curvature/min": 5.40625, "masked_sentence_fisher_curvature/p25": 113.625, "masked_sentence_fisher_curvature/p75": 514.0, "masked_sentence_fisher_curvature/p85": 623.0, "masked_sentence_fisher_curvature/p90": 798.0, "masked_sentence_fisher_curvature/p95": 1048.0, "masked_sentence_fisher_curvature/p99": 1048.0, "masked_sentence_fisher_curvature/var": 89366.8828125, "masked_sentence_fisher_kl_divergence": 1.755705625328119e-08, "masked_sentence_fisher_kl_divergence/max": 5.3085386753082275e-08, "masked_sentence_fisher_kl_divergence/median": 1.2281816452741623e-08, "masked_sentence_fisher_kl_divergence/min": 2.7466739993542433e-10, "masked_sentence_fisher_kl_divergence/p25": 5.75528247281909e-09, "masked_sentence_fisher_kl_divergence/p75": 2.601882442831993e-08, "masked_sentence_fisher_kl_divergence/p85": 3.15194483846426e-08, "masked_sentence_fisher_kl_divergence/p90": 4.0512531995773315e-08, "masked_sentence_fisher_kl_divergence/p95": 5.3085386753082275e-08, "masked_sentence_fisher_kl_divergence/p99": 5.3085386753082275e-08, "masked_sentence_fisher_kl_divergence/var": 2.293245089995472e-16, "masked_sentence_full_gradient_variance/max_squared_error": 3.606318950653076, "masked_sentence_full_gradient_variance/metric": 3.606318950653076, "masked_sentence_full_gradient_variance/p75": 3.606318950653076, "masked_sentence_full_gradient_variance/p90": 3.606318950653076, "masked_sentence_full_gradient_variance/p95": 3.606318950653076, "masked_sentence_full_gradient_variance/p99": 3.606318950653076, "masked_sentence_full_update_term": 0.0007653981447219849, "masked_sentence_full_update_term/max": 0.0026702880859375, "masked_sentence_full_update_term/median": 9.870529174804688e-05, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00159454345703125, "masked_sentence_full_update_term/p85": 0.0019550323486328125, "masked_sentence_full_update_term/p90": 0.00231170654296875, "masked_sentence_full_update_term/p95": 0.0023193359375, "masked_sentence_full_update_term/p99": 0.0026557922828942537, "masked_sentence_full_update_term/var": 7.984432954799559e-07, "masked_sentence_hessian_coeff": -9420.583984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -3440.0, "masked_sentence_hessian_coeff/min": -39680.0, "masked_sentence_hessian_coeff/p25": -16736.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 149834768.0, "masked_sentence_hessian_coeff_abs": 9420.583984375, "masked_sentence_hessian_coeff_abs/max": 39680.0, "masked_sentence_hessian_coeff_abs/median": 1640.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 16736.0, "masked_sentence_hessian_coeff_abs/p99": 39680.0, "masked_sentence_hessian_coeff_abs/var": 149834768.0, "masked_token_fisher_curvature": 391.85577392578125, "masked_token_fisher_curvature/max": 193536.0, "masked_token_fisher_curvature/median": 9.812029660993815e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.0701649174904796e-23, "masked_token_fisher_curvature/p75": 3.410605131648481e-12, "masked_token_fisher_curvature/p85": 1.622538547962904e-09, "masked_token_fisher_curvature/p90": 1.3317912817001343e-07, "masked_token_fisher_curvature/p95": 0.000499725341796875, "masked_token_fisher_curvature/p99": 2065.0, "masked_token_fisher_curvature/var": 39258096.0, "masked_token_fisher_kl_divergence": 1.985445940988484e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 4.985600920996795e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 5.416677968589101e-34, "masked_token_fisher_kl_divergence/p75": 1.7288074802358278e-22, "masked_token_fisher_kl_divergence/p85": 8.216219588366713e-20, "masked_token_fisher_kl_divergence/p90": 6.749158523722265e-18, "masked_token_fisher_kl_divergence/p95": 2.531308496145357e-14, "masked_token_fisher_kl_divergence/p99": 1.0481744538992643e-07, "masked_token_fisher_kl_divergence/var": 1.0080482862285731e-13, "masked_token_full_update_term": 1.2277744644961786e-05, "masked_token_full_update_term/max": 0.004150390625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -5.334615707397461e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.7478019859472624e-15, "masked_token_full_update_term/p85": 1.7156276399532544e-12, "masked_token_full_update_term/p90": 5.002220859751105e-11, "masked_token_full_update_term/p95": 7.363269105553627e-09, "masked_token_full_update_term/p99": 7.677078247070312e-05, "masked_token_full_update_term/var": 2.7440025363034692e-08, "masked_token_hessian_coeff": -12959.5478515625, "masked_token_hessian_coeff/max": 148.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -4620288.0, "masked_token_hessian_coeff/p25": -3.441236913204193e-07, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.003345966339111328, "masked_token_hessian_coeff/var": 31705583616.0, "masked_token_hessian_coeff_abs": 12959.5673828125, "masked_token_hessian_coeff_abs/max": 4620288.0, "masked_token_hessian_coeff_abs/median": 2.575717417130363e-13, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.8014183044433594e-06, "masked_token_hessian_coeff_abs/p99": 37120.0, "masked_token_hessian_coeff_abs/var": 31705583616.0, "mean_logprobs": -0.00921630859375, "mean_logprobs/var": 4.9114227294921875e-05, "num_completions/total": 4416, "per_sentence_gradient_norm": 27.30013084411621, "per_sentence_gradient_norm/max": 186.0, "per_sentence_gradient_norm/median": 5.4375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 49.25, "per_sentence_gradient_norm/var": 1631.1878662109375, "per_token_feature_norm": 189.3792266845703, "per_token_feature_norm/max": 262.0, "per_token_feature_norm/median": 189.0, "per_token_feature_norm/min": 113.0, "per_token_feature_norm/p25": 183.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 147.05128479003906, "per_token_gradient_norm": 0.7130383849143982, "per_token_gradient_norm/max": 284.0, "per_token_gradient_norm/median": 5.773159728050814e-15, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.501757651567459e-08, "per_token_gradient_norm/var": 98.97098541259766, "per_token_policy_error_norm": 0.005075734108686447, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004922021646052599, "policy_entropy": 0.009062877856194973, "policy_entropy/max": 1.8515625, "policy_entropy/median": 1.127773430198431e-09, "policy_entropy/min": 4.393983413881683e-21, "policy_entropy/p25": 1.8047785488306545e-12, "policy_entropy/p75": 3.241002559661865e-07, "policy_entropy/var": 0.004973039496690035, "policy_loss": -0.5104166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.25252190232276917, "policy_sharpness": 9.739720344543457, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6631258726119995, "reward": 0.5104166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.25252190232276917, "rewards/accuracy_reward": 0.5104166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.25252190232276917, "sentence_fisher_curvature": 143980.34375, "sentence_fisher_curvature/max": 749568.0, "sentence_fisher_curvature/median": 25600.0, "sentence_fisher_curvature/min": 5.40625, "sentence_fisher_curvature/p25": 311.5, "sentence_fisher_curvature/p75": 252416.0, "sentence_fisher_curvature/p85": 359936.0, "sentence_fisher_curvature/p90": 451584.0, "sentence_fisher_curvature/p95": 546816.0, "sentence_fisher_curvature/p99": 613376.4375, "sentence_fisher_curvature/var": 36148326400.0, "sentence_fisher_kl_divergence": 7.293401722563431e-06, "sentence_fisher_kl_divergence/max": 3.790855407714844e-05, "sentence_fisher_kl_divergence/median": 1.296401023864746e-06, "sentence_fisher_kl_divergence/min": 2.7466739993542433e-10, "sentence_fisher_kl_divergence/p25": 1.5774276107549667e-08, "sentence_fisher_kl_divergence/p75": 1.2785196304321289e-05, "sentence_fisher_kl_divergence/p85": 1.8209218978881836e-05, "sentence_fisher_kl_divergence/p90": 2.288818359375e-05, "sentence_fisher_kl_divergence/p95": 2.771615982055664e-05, "sentence_fisher_kl_divergence/p99": 3.111364640062675e-05, "sentence_fisher_kl_divergence/var": 9.271299700186475e-11, "sentence_full_gradient_variance/max_squared_error": 2312.74365234375, "sentence_full_gradient_variance/metric": 2312.74365234375, "sentence_full_gradient_variance/p75": 2312.74365234375, "sentence_full_gradient_variance/p90": 2312.74365234375, "sentence_full_gradient_variance/p95": 2312.74365234375, "sentence_full_gradient_variance/p99": 2312.74365234375, "sentence_full_update_term": 0.019957860931754112, "sentence_full_update_term/max": 0.107421875, "sentence_full_update_term/median": 0.004150390625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.035888671875, "sentence_full_update_term/p85": 0.04901123046875, "sentence_full_update_term/p90": 0.062744140625, "sentence_full_update_term/p95": 0.0743408203125, "sentence_full_update_term/p99": 0.09953615814447403, "sentence_full_update_term/var": 0.0007563261897303164, "sentence_hessian_coeff": 14633.25, "sentence_hessian_coeff/max": 292864.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -244736.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 24288.0, "sentence_hessian_coeff/p99": 287027.21875, "sentence_hessian_coeff/var": 8542151168.0, "sentence_hessian_coeff_abs": 53660.0859375, "sentence_hessian_coeff_abs/max": 292864.0, "sentence_hessian_coeff_abs/median": 392.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 80384.0, "sentence_hessian_coeff_abs/p99": 287027.21875, "sentence_hessian_coeff_abs/var": 5848823296.0, "step": 46, "token_fisher_curvature": 166112.65625, "token_fisher_curvature/max": 124256256.0, "token_fisher_curvature/median": 1.1817803680091998e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.1761474334738362e-23, "token_fisher_curvature/p75": 4.604316927725449e-12, "token_fisher_curvature/p85": 2.9103830456733704e-09, "token_fisher_curvature/p90": 3.071327228099108e-07, "token_fisher_curvature/p95": 0.004504203796386719, "token_fisher_curvature/p99": 63488.0, "token_fisher_curvature/var": 10212746461184.0, "token_fisher_kl_divergence": 8.414927833655383e-06, "token_fisher_kl_divergence/max": 0.00628662109375, "token_fisher_kl_divergence/median": 5.99534287967969e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.9658689292932736e-34, "token_fisher_kl_divergence/p75": 2.332649327399538e-22, "token_fisher_kl_divergence/p85": 1.4738373282224826e-19, "token_fisher_kl_divergence/p90": 1.556846557053404e-17, "token_fisher_kl_divergence/p95": 2.281647093482775e-13, "token_fisher_kl_divergence/p99": 3.2186508178710938e-06, "token_fisher_kl_divergence/var": 2.6212990889007415e-08, "token_full_update_term": 0.0002933471405413002, "token_full_update_term/max": 0.11865234375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -5.334615707397461e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.135580766728708e-15, "token_full_update_term/p85": 2.8279600883251987e-12, "token_full_update_term/p90": 8.426326303379028e-11, "token_full_update_term/p95": 1.8975697457790375e-08, "token_full_update_term/p99": 0.001953125, "token_full_update_term/var": 1.7386511899530888e-05, "token_hessian_coeff": 15697.2734375, "token_hessian_coeff/max": 122159104.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -16252928.0, "token_hessian_coeff/p25": -4.4889748096466064e-07, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.0068359375, "token_hessian_coeff/var": 6607164932096.0, "token_hessian_coeff_abs": 159662.296875, "token_hessian_coeff_abs/max": 122159104.0, "token_hessian_coeff_abs/median": 3.979039320256561e-13, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.546476364135742e-06, "token_hessian_coeff_abs/p99": 1865024.0, "token_hessian_coeff_abs/var": 6581919416320.0 }, { "accuracy_reward": 0.6875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.21710526943206787, "adam_stats/lm_head/lr_effective_max": 4.260784407961182e-05, "adam_stats/lm_head/lr_effective_mean": -2.773829950453166e-11, "adam_stats/lm_head/lr_effective_min": -4.305234688217752e-05, "adam_stats/lm_head/lr_effective_std": 1.0541217534409952e-06, "adam_stats/lr_effective_max": 4.7762554459040985e-05, "adam_stats/lr_effective_mean": -4.2562776220167464e-11, "adam_stats/lr_effective_min": -4.8999019782058895e-05, "adam_stats/m_t_max": 0.0005968031473457813, "adam_stats/m_t_mean": -3.942311321142311e-12, "adam_stats/m_t_min": -0.0005145884933881462, "adam_stats/v_t_max": 2.5625533453421667e-05, "adam_stats/v_t_mean": 1.7372597501189024e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.21710526943206787, "all_logprobs": -0.010183850303292274, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.25, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -1.3113021850585938e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00020313262939453125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.015413853339850903, "clip_ratio": 0.0, "completion_length": 601.4479370117188, "completion_length/correct": 526.3333740234375, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 436.0, "completion_length/correct/min": 279.0, "completion_length/correct/p25": 303.75, "completion_length/correct/p75": 701.0, "completion_length/correct/var": 64852.4140625, "completion_length/incorrect": 766.7000122070312, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 827.0, "completion_length/incorrect/min": 372.0, "completion_length/incorrect/p25": 553.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 58968.83984375, "completion_length/max": 1024.0, "completion_length/median": 524.0, "completion_length/min": 279.0, "completion_length/p25": 353.75, "completion_length/p75": 895.25, "completion_length/var": 74917.2109375, "curvature_clip_ratio_token_fisher": 0.010859210975468159, "curvature_clip_ratio_token_hessian": 0.007724415045231581, "curvature_clip_ratio_total_fisher": 0.010859210975468159, "curvature_clip_ratio_total_full": 0.010859210975468159, "curvature_clip_ratio_total_hessian": 0.007724415045231581, "epoch": 0.0752, "feature_vector_variance/max_squared_error": 62861.69921875, "feature_vector_variance/metric": 30536.697265625, "generated_tokens/total": 2744844.0, "global_fisher_curvature": 113152.0, "global_fisher_curvature/max": 113152.0, "global_fisher_curvature/median": 113152.0, "global_fisher_curvature/min": 113152.0, "global_fisher_curvature/p25": 113152.0, "global_fisher_curvature/p75": 113152.0, "global_fisher_curvature/p85": 113152.0, "global_fisher_curvature/p90": 113152.0, "global_fisher_curvature/p95": 113152.0, "global_fisher_curvature/p99": 113152.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.453824996948242e-06, "global_fisher_kl_divergence/max": 5.453824996948242e-06, "global_fisher_kl_divergence/median": 5.453824996948242e-06, "global_fisher_kl_divergence/min": 5.453824996948242e-06, "global_fisher_kl_divergence/p25": 5.453824996948242e-06, "global_fisher_kl_divergence/p75": 5.453824996948242e-06, "global_fisher_kl_divergence/p85": 5.453824996948242e-06, "global_fisher_kl_divergence/p90": 5.453824996948242e-06, "global_fisher_kl_divergence/p95": 5.453824996948242e-06, "global_fisher_kl_divergence/p99": 5.453824996948242e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.10546875, "global_full_update_term/max": 0.10546875, "global_full_update_term/median": 0.10546875, "global_full_update_term/min": 0.10546875, "global_full_update_term/p25": 0.10546875, "global_full_update_term/p75": 0.10546875, "global_full_update_term/p85": 0.10546875, "global_full_update_term/p90": 0.10546875, "global_full_update_term/p95": 0.10546875, "global_full_update_term/p99": 0.10546875, "global_full_update_term/var": NaN, "global_hessian_coeff": 29440.0, "global_hessian_coeff/max": 29440.0, "global_hessian_coeff/median": 29440.0, "global_hessian_coeff/min": 29440.0, "global_hessian_coeff/p25": 29440.0, "global_hessian_coeff/p75": 29440.0, "global_hessian_coeff/p99": 29440.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 29440.0, "global_hessian_coeff_abs/max": 29440.0, "global_hessian_coeff_abs/median": 29440.0, "global_hessian_coeff_abs/min": 29440.0, "global_hessian_coeff_abs/p25": 29440.0, "global_hessian_coeff_abs/p75": 29440.0, "global_hessian_coeff_abs/p99": 29440.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05783920735120773, "learning_rate": 9.567280168627493e-06, "loss": -0.6875, "masked_global_fisher_curvature": 280.0, "masked_global_fisher_curvature/max": 280.0, "masked_global_fisher_curvature/median": 280.0, "masked_global_fisher_curvature/min": 280.0, "masked_global_fisher_curvature/p25": 280.0, "masked_global_fisher_curvature/p75": 280.0, "masked_global_fisher_curvature/p85": 280.0, "masked_global_fisher_curvature/p90": 280.0, "masked_global_fisher_curvature/p95": 280.0, "masked_global_fisher_curvature/p99": 280.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/max": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/median": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/min": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/p25": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/p75": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/p85": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/p90": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/p95": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/p99": 1.3504177331924438e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0047607421875, "masked_global_full_update_term/max": 0.0047607421875, "masked_global_full_update_term/median": 0.0047607421875, "masked_global_full_update_term/min": 0.0047607421875, "masked_global_full_update_term/p25": 0.0047607421875, "masked_global_full_update_term/p75": 0.0047607421875, "masked_global_full_update_term/p85": 0.0047607421875, "masked_global_full_update_term/p90": 0.0047607421875, "masked_global_full_update_term/p95": 0.0047607421875, "masked_global_full_update_term/p99": 0.0047607421875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -4896.0, "masked_global_hessian_coeff/max": -4896.0, "masked_global_hessian_coeff/median": -4896.0, "masked_global_hessian_coeff/min": -4896.0, "masked_global_hessian_coeff/p25": -4896.0, "masked_global_hessian_coeff/p75": -4896.0, "masked_global_hessian_coeff/p99": -4896.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 4896.0, "masked_global_hessian_coeff_abs/max": 4896.0, "masked_global_hessian_coeff_abs/median": 4896.0, "masked_global_hessian_coeff_abs/min": 4896.0, "masked_global_hessian_coeff_abs/p25": 4896.0, "masked_global_hessian_coeff_abs/p75": 4896.0, "masked_global_hessian_coeff_abs/p99": 4896.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.1962890625, "masked_per_sentence_gradient_norm/max": 8.4375, "masked_per_sentence_gradient_norm/median": 1.7421875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.9140625, "masked_per_sentence_gradient_norm/var": 5.046258449554443, "masked_per_token_gradient_norm": 0.03866828233003616, "masked_per_token_gradient_norm/max": 12.6875, "masked_per_token_gradient_norm/median": 3.268496584496461e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.7345882952213287e-08, "masked_per_token_gradient_norm/var": 0.22880399227142334, "masked_sentence_fisher_curvature": 470.42449951171875, "masked_sentence_fisher_curvature/max": 1552.0, "masked_sentence_fisher_curvature/median": 318.0, "masked_sentence_fisher_curvature/min": 19.875, "masked_sentence_fisher_curvature/p25": 174.5, "masked_sentence_fisher_curvature/p75": 645.0, "masked_sentence_fisher_curvature/p85": 896.0, "masked_sentence_fisher_curvature/p90": 1200.0, "masked_sentence_fisher_curvature/p95": 1316.0, "masked_sentence_fisher_curvature/p99": 1552.0, "masked_sentence_fisher_curvature/var": 172135.34375, "masked_sentence_fisher_kl_divergence": 2.266908083470298e-08, "masked_sentence_fisher_kl_divergence/max": 7.497146725654602e-08, "masked_sentence_fisher_kl_divergence/median": 1.5366822481155396e-08, "masked_sentence_fisher_kl_divergence/min": 9.604264050722122e-10, "masked_sentence_fisher_kl_divergence/p25": 8.41100700199604e-09, "masked_sentence_fisher_kl_divergence/p75": 3.102468326687813e-08, "masked_sentence_fisher_kl_divergence/p85": 4.313187673687935e-08, "masked_sentence_fisher_kl_divergence/p90": 5.774199962615967e-08, "masked_sentence_fisher_kl_divergence/p95": 6.332993507385254e-08, "masked_sentence_fisher_kl_divergence/p99": 7.497146725654602e-08, "masked_sentence_fisher_kl_divergence/var": 3.9978689854938015e-16, "masked_sentence_full_gradient_variance/max_squared_error": 9.411128997802734, "masked_sentence_full_gradient_variance/metric": 9.411128997802734, "masked_sentence_full_gradient_variance/p75": 9.411128997802734, "masked_sentence_full_gradient_variance/p90": 9.411128997802734, "masked_sentence_full_gradient_variance/p95": 9.411128997802734, "masked_sentence_full_gradient_variance/p99": 9.411128997802734, "masked_sentence_full_update_term": 0.0013359934091567993, "masked_sentence_full_update_term/max": 0.004974365234375, "masked_sentence_full_update_term/median": 0.00079345703125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.002349853515625, "masked_sentence_full_update_term/p85": 0.0030975341796875, "masked_sentence_full_update_term/p90": 0.00360870361328125, "masked_sentence_full_update_term/p95": 0.0040130615234375, "masked_sentence_full_update_term/p99": 0.00474243238568306, "masked_sentence_full_update_term/var": 2.0742963897646405e-06, "masked_sentence_hessian_coeff": -14930.833984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -8640.0, "masked_sentence_hessian_coeff/min": -63232.0, "masked_sentence_hessian_coeff/p25": -21408.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 298908352.0, "masked_sentence_hessian_coeff_abs": 14930.833984375, "masked_sentence_hessian_coeff_abs/max": 63232.0, "masked_sentence_hessian_coeff_abs/median": 8640.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 21408.0, "masked_sentence_hessian_coeff_abs/p99": 63232.0, "masked_sentence_hessian_coeff_abs/var": 298908352.0, "masked_token_fisher_curvature": 497.80682373046875, "masked_token_fisher_curvature/max": 202752.0, "masked_token_fisher_curvature/median": 2.5343225781848666e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.782137916322191e-24, "masked_token_fisher_curvature/p75": 4.2099657093785936e-13, "masked_token_fisher_curvature/p85": 2.02817318495363e-10, "masked_token_fisher_curvature/p90": 2.5960616767406464e-08, "masked_token_fisher_curvature/p95": 0.0002956390380859375, "masked_token_fisher_curvature/p99": 3081.0, "masked_token_fisher_curvature/var": 47783580.0, "masked_token_fisher_kl_divergence": 2.3991727715610978e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 1.2227344030925683e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.302088136650368e-34, "masked_token_fisher_kl_divergence/p75": 2.0265925007549178e-23, "masked_token_fisher_kl_divergence/p85": 9.793818452627848e-21, "masked_token_fisher_kl_divergence/p90": 1.2536087619363645e-18, "masked_token_fisher_kl_divergence/p95": 1.4210854715202004e-14, "masked_token_fisher_kl_divergence/p99": 1.486041583120823e-07, "masked_token_fisher_kl_divergence/var": 1.1100898710458482e-13, "masked_token_full_update_term": 1.4540857591782697e-05, "masked_token_full_update_term/max": 0.004150390625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -6.556510925292969e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.941291737419306e-15, "masked_token_full_update_term/p85": 1.16068266109437e-12, "masked_token_full_update_term/p90": 2.2396307031158358e-11, "masked_token_full_update_term/p95": 2.0081643015146255e-09, "masked_token_full_update_term/p99": 7.772445678710938e-05, "masked_token_full_update_term/var": 3.337488152510559e-08, "masked_token_hessian_coeff": -16273.0439453125, "masked_token_hessian_coeff/max": 1880.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -4653056.0, "masked_token_hessian_coeff/p25": -7.189810276031494e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.002241373062133789, "masked_token_hessian_coeff/var": 42515644416.0, "masked_token_hessian_coeff_abs": 16273.4697265625, "masked_token_hessian_coeff_abs/max": 4653056.0, "masked_token_hessian_coeff_abs/median": 3.1604940886609256e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 4.112720489501953e-06, "masked_token_hessian_coeff_abs/p99": 38116.0, "masked_token_hessian_coeff_abs/var": 42515632128.0, "mean_logprobs": -0.0103759765625, "mean_logprobs/var": 4.744529724121094e-05, "num_completions/total": 4512, "per_sentence_gradient_norm": 45.306640625, "per_sentence_gradient_norm/max": 185.0, "per_sentence_gradient_norm/median": 39.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 74.25, "per_sentence_gradient_norm/var": 2164.594482421875, "per_token_feature_norm": 189.4986114501953, "per_token_feature_norm/max": 264.0, "per_token_feature_norm/median": 189.0, "per_token_feature_norm/min": 106.0, "per_token_feature_norm/p25": 183.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 140.0716094970703, "per_token_gradient_norm": 0.8870799541473389, "per_token_gradient_norm/max": 270.0, "per_token_gradient_norm/median": 4.192202140984591e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.421438694000244e-08, "per_token_gradient_norm/var": 115.35198974609375, "per_token_policy_error_norm": 0.005869354121387005, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0055307322181761265, "policy_entropy": 0.010955078527331352, "policy_entropy/max": 2.515625, "policy_entropy/median": 6.803020369261503e-10, "policy_entropy/min": 3.176373552203626e-20, "policy_entropy/p25": 1.6981971384666394e-12, "policy_entropy/p75": 1.3690441846847534e-07, "policy_entropy/var": 0.006309142801910639, "policy_loss": -0.6875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.21710526943206787, "policy_sharpness": 9.703560829162598, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.9206197261810303, "reward": 0.6875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.21710526943206787, "rewards/accuracy_reward": 0.6875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.21710526943206787, "sentence_fisher_curvature": 246088.21875, "sentence_fisher_curvature/max": 1253376.0, "sentence_fisher_curvature/median": 165888.0, "sentence_fisher_curvature/min": 21.0, "sentence_fisher_curvature/p25": 3344.0, "sentence_fisher_curvature/p75": 392192.0, "sentence_fisher_curvature/p85": 494592.0, "sentence_fisher_curvature/p90": 552960.0, "sentence_fisher_curvature/p95": 812032.0, "sentence_fisher_curvature/p99": 1214464.125, "sentence_fisher_curvature/var": 79739617280.0, "sentence_fisher_kl_divergence": 1.186166082334239e-05, "sentence_fisher_kl_divergence/max": 6.031990051269531e-05, "sentence_fisher_kl_divergence/median": 7.987022399902344e-06, "sentence_fisher_kl_divergence/min": 1.0113581083714962e-09, "sentence_fisher_kl_divergence/p25": 1.6111880540847778e-07, "sentence_fisher_kl_divergence/p75": 1.8894672393798828e-05, "sentence_fisher_kl_divergence/p85": 2.384185791015625e-05, "sentence_fisher_kl_divergence/p90": 2.664327621459961e-05, "sentence_fisher_kl_divergence/p95": 3.921985626220703e-05, "sentence_fisher_kl_divergence/p99": 5.850792513228953e-05, "sentence_fisher_kl_divergence/var": 1.8530606216149437e-10, "sentence_full_gradient_variance/max_squared_error": 4110.73828125, "sentence_full_gradient_variance/metric": 4110.73828125, "sentence_full_gradient_variance/p75": 4110.73828125, "sentence_full_gradient_variance/p90": 4110.73828125, "sentence_full_gradient_variance/p95": 4110.73828125, "sentence_full_gradient_variance/p99": 4110.73828125, "sentence_full_update_term": 0.031160354614257812, "sentence_full_update_term/max": 0.1416015625, "sentence_full_update_term/median": 0.0291748046875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.05059814453125, "sentence_full_update_term/p85": 0.0660400390625, "sentence_full_update_term/p90": 0.07080078125, "sentence_full_update_term/p95": 0.0892333984375, "sentence_full_update_term/p99": 0.11748054623603821, "sentence_full_update_term/var": 0.0010299349669367075, "sentence_hessian_coeff": 32123.1875, "sentence_hessian_coeff/max": 667648.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -264192.0, "sentence_hessian_coeff/p25": -52480.0, "sentence_hessian_coeff/p75": 43584.0, "sentence_hessian_coeff/p99": 659865.625, "sentence_hessian_coeff/var": 29386205184.0, "sentence_hessian_coeff_abs": 98296.484375, "sentence_hessian_coeff_abs/max": 667648.0, "sentence_hessian_coeff_abs/median": 50432.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 131584.0, "sentence_hessian_coeff_abs/p99": 659865.625, "sentence_hessian_coeff_abs/var": 20665063424.0, "step": 47, "token_fisher_curvature": 207391.703125, "token_fisher_curvature/max": 128450560.0, "token_fisher_curvature/median": 3.2255014631443757e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.635167923017501e-24, "token_fisher_curvature/p75": 6.252776074688882e-13, "token_fisher_curvature/p85": 4.31100488640368e-10, "token_fisher_curvature/p90": 9.341056284029037e-08, "token_fisher_curvature/p95": 0.00665283203125, "token_fisher_curvature/p99": 294912.0, "token_fisher_curvature/var": 12220290301952.0, "token_fisher_kl_divergence": 9.992857485485729e-06, "token_fisher_kl_divergence/max": 0.006195068359375, "token_fisher_kl_divergence/median": 1.5540559832853933e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.7083389842945504e-34, "token_fisher_kl_divergence/p75": 3.019209235818551e-23, "token_fisher_kl_divergence/p85": 2.0752307207730358e-20, "token_fisher_kl_divergence/p90": 4.510238685892336e-18, "token_fisher_kl_divergence/p95": 3.197442310920451e-13, "token_fisher_kl_divergence/p99": 1.4185905456542969e-05, "token_fisher_kl_divergence/var": 2.8362144632865238e-08, "token_full_update_term": 0.0003657973138615489, "token_full_update_term/max": 0.11767578125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -6.556510925292969e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 6.800116025829084e-15, "token_full_update_term/p85": 1.7855994460802549e-12, "token_full_update_term/p90": 3.979039320256561e-11, "token_full_update_term/p95": 9.313225746154785e-09, "token_full_update_term/p99": 0.00494384765625, "token_full_update_term/var": 2.049939939752221e-05, "token_hessian_coeff": 3547.50146484375, "token_hessian_coeff/max": 128450560.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -17039360.0, "token_hessian_coeff/p25": -9.611248970031738e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.007350564002990723, "token_hessian_coeff/var": 7705964052480.0, "token_hessian_coeff_abs": 195938.25, "token_hessian_coeff_abs/max": 128450560.0, "token_hessian_coeff_abs/median": 4.4792614062316716e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 5.781650543212891e-06, "token_hessian_coeff_abs/p99": 4521984.0, "token_hessian_coeff_abs/var": 7667584073728.0 }, { "accuracy_reward": 0.6041666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24166665971279144, "adam_stats/lm_head/lr_effective_max": 4.324155815993436e-05, "adam_stats/lm_head/lr_effective_mean": -5.94481130988811e-11, "adam_stats/lm_head/lr_effective_min": -4.3094190914416686e-05, "adam_stats/lm_head/lr_effective_std": 1.0055838401967776e-06, "adam_stats/lr_effective_max": 4.921890649711713e-05, "adam_stats/lr_effective_mean": 3.2445834213801206e-11, "adam_stats/lr_effective_min": -4.880360938841477e-05, "adam_stats/m_t_max": 0.0004524365358520299, "adam_stats/m_t_mean": -5.782485601457665e-12, "adam_stats/m_t_min": -0.00036416383227333426, "adam_stats/v_t_max": 2.5600149456295185e-05, "adam_stats/v_t_mean": 1.7362215181185303e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6041666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24166665971279144, "all_logprobs": -0.009168647229671478, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -10.6875, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00012302398681640625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.016078511252999306, "clip_ratio": 0.0, "completion_length": 564.2291870117188, "completion_length/correct": 462.7930908203125, "completion_length/correct/max": 1020.0, "completion_length/correct/median": 426.0, "completion_length/correct/min": 136.0, "completion_length/correct/p25": 303.5, "completion_length/correct/p75": 532.5, "completion_length/correct/var": 62347.60546875, "completion_length/incorrect": 719.0526123046875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 642.0, "completion_length/incorrect/min": 388.0, "completion_length/incorrect/p25": 470.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 69096.421875, "completion_length/max": 1024.0, "completion_length/median": 470.0, "completion_length/min": 136.0, "completion_length/p25": 379.0, "completion_length/p75": 727.5, "completion_length/var": 80189.796875, "curvature_clip_ratio_token_fisher": 0.00852933619171381, "curvature_clip_ratio_token_hessian": 0.005907765123993158, "curvature_clip_ratio_total_fisher": 0.00852933619171381, "curvature_clip_ratio_total_full": 0.00852933619171381, "curvature_clip_ratio_total_hessian": 0.005907765123993158, "epoch": 0.0768, "feature_vector_variance/max_squared_error": 62102.046875, "feature_vector_variance/metric": 30295.36328125, "generated_tokens/total": 2799010.0, "global_fisher_curvature": 111616.0, "global_fisher_curvature/max": 111616.0, "global_fisher_curvature/median": 111616.0, "global_fisher_curvature/min": 111616.0, "global_fisher_curvature/p25": 111616.0, "global_fisher_curvature/p75": 111616.0, "global_fisher_curvature/p85": 111616.0, "global_fisher_curvature/p90": 111616.0, "global_fisher_curvature/p95": 111616.0, "global_fisher_curvature/p99": 111616.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.0961971282958984e-06, "global_fisher_kl_divergence/max": 5.0961971282958984e-06, "global_fisher_kl_divergence/median": 5.0961971282958984e-06, "global_fisher_kl_divergence/min": 5.0961971282958984e-06, "global_fisher_kl_divergence/p25": 5.0961971282958984e-06, "global_fisher_kl_divergence/p75": 5.0961971282958984e-06, "global_fisher_kl_divergence/p85": 5.0961971282958984e-06, "global_fisher_kl_divergence/p90": 5.0961971282958984e-06, "global_fisher_kl_divergence/p95": 5.0961971282958984e-06, "global_fisher_kl_divergence/p99": 5.0961971282958984e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0625, "global_full_update_term/max": 0.0625, "global_full_update_term/median": 0.0625, "global_full_update_term/min": 0.0625, "global_full_update_term/p25": 0.0625, "global_full_update_term/p75": 0.0625, "global_full_update_term/p85": 0.0625, "global_full_update_term/p90": 0.0625, "global_full_update_term/p95": 0.0625, "global_full_update_term/p99": 0.0625, "global_full_update_term/var": NaN, "global_hessian_coeff": 13696.0, "global_hessian_coeff/max": 13696.0, "global_hessian_coeff/median": 13696.0, "global_hessian_coeff/min": 13696.0, "global_hessian_coeff/p25": 13696.0, "global_hessian_coeff/p75": 13696.0, "global_hessian_coeff/p99": 13696.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 13696.0, "global_hessian_coeff_abs/max": 13696.0, "global_hessian_coeff_abs/median": 13696.0, "global_hessian_coeff_abs/min": 13696.0, "global_hessian_coeff_abs/p25": 13696.0, "global_hessian_coeff_abs/p75": 13696.0, "global_hessian_coeff_abs/p99": 13696.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05896929278969765, "learning_rate": 9.314414216997507e-06, "loss": -0.6042, "masked_global_fisher_curvature": 220.0, "masked_global_fisher_curvature/max": 220.0, "masked_global_fisher_curvature/median": 220.0, "masked_global_fisher_curvature/min": 220.0, "masked_global_fisher_curvature/p25": 220.0, "masked_global_fisher_curvature/p75": 220.0, "masked_global_fisher_curvature/p85": 220.0, "masked_global_fisher_curvature/p90": 220.0, "masked_global_fisher_curvature/p95": 220.0, "masked_global_fisher_curvature/p99": 220.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/max": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/median": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/min": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/p25": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/p75": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/p85": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/p90": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/p95": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/p99": 1.0069925338029861e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00127410888671875, "masked_global_full_update_term/max": 0.00127410888671875, "masked_global_full_update_term/median": 0.00127410888671875, "masked_global_full_update_term/min": 0.00127410888671875, "masked_global_full_update_term/p25": 0.00127410888671875, "masked_global_full_update_term/p75": 0.00127410888671875, "masked_global_full_update_term/p85": 0.00127410888671875, "masked_global_full_update_term/p90": 0.00127410888671875, "masked_global_full_update_term/p95": 0.00127410888671875, "masked_global_full_update_term/p99": 0.00127410888671875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -1608.0, "masked_global_hessian_coeff/max": -1608.0, "masked_global_hessian_coeff/median": -1608.0, "masked_global_hessian_coeff/min": -1608.0, "masked_global_hessian_coeff/p25": -1608.0, "masked_global_hessian_coeff/p75": -1608.0, "masked_global_hessian_coeff/p99": -1608.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 1608.0, "masked_global_hessian_coeff_abs/max": 1608.0, "masked_global_hessian_coeff_abs/median": 1608.0, "masked_global_hessian_coeff_abs/min": 1608.0, "masked_global_hessian_coeff_abs/p25": 1608.0, "masked_global_hessian_coeff_abs/p75": 1608.0, "masked_global_hessian_coeff_abs/p99": 1608.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.144775390625, "masked_per_sentence_gradient_norm/max": 6.875, "masked_per_sentence_gradient_norm/median": 0.7265625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.078125, "masked_per_sentence_gradient_norm/var": 1.8832625150680542, "masked_per_token_gradient_norm": 0.025567712262272835, "masked_per_token_gradient_norm/max": 11.125, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.5756889954209328e-09, "masked_per_token_gradient_norm/var": 0.1325044333934784, "masked_sentence_fisher_curvature": 316.1009216308594, "masked_sentence_fisher_curvature/max": 1544.0, "masked_sentence_fisher_curvature/median": 231.0, "masked_sentence_fisher_curvature/min": 5.375, "masked_sentence_fisher_curvature/p25": 106.875, "masked_sentence_fisher_curvature/p75": 406.5, "masked_sentence_fisher_curvature/p85": 732.0, "masked_sentence_fisher_curvature/p90": 744.0, "masked_sentence_fisher_curvature/p95": 874.0, "masked_sentence_fisher_curvature/p99": 974.0018310546875, "masked_sentence_fisher_curvature/var": 84953.53125, "masked_sentence_fisher_kl_divergence": 1.4468582776316907e-08, "masked_sentence_fisher_kl_divergence/max": 7.078051567077637e-08, "masked_sentence_fisher_kl_divergence/median": 1.0593794286251068e-08, "masked_sentence_fisher_kl_divergence/min": 2.455635694786906e-10, "masked_sentence_fisher_kl_divergence/p25": 4.889443516731262e-09, "masked_sentence_fisher_kl_divergence/p75": 1.862645149230957e-08, "masked_sentence_fisher_kl_divergence/p85": 3.3585820347070694e-08, "masked_sentence_fisher_kl_divergence/p90": 3.3993273973464966e-08, "masked_sentence_fisher_kl_divergence/p95": 4.0046870708465576e-08, "masked_sentence_fisher_kl_divergence/p99": 4.46802843612204e-08, "masked_sentence_fisher_kl_divergence/var": 1.7805793251120928e-16, "masked_sentence_full_gradient_variance/max_squared_error": 3.0511090755462646, "masked_sentence_full_gradient_variance/metric": 3.0511090755462646, "masked_sentence_full_gradient_variance/p75": 3.0511090755462646, "masked_sentence_full_gradient_variance/p90": 3.0511090755462646, "masked_sentence_full_gradient_variance/p95": 3.0511090755462646, "masked_sentence_full_gradient_variance/p99": 3.0511090755462646, "masked_sentence_full_update_term": 0.0006445447797887027, "masked_sentence_full_update_term/max": 0.00341796875, "masked_sentence_full_update_term/median": 0.0003261566162109375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00122833251953125, "masked_sentence_full_update_term/p85": 0.00139617919921875, "masked_sentence_full_update_term/p90": 0.001621246337890625, "masked_sentence_full_update_term/p95": 0.0018177032470703125, "masked_sentence_full_update_term/p99": 0.0032295233104377985, "masked_sentence_full_update_term/var": 5.862313514626294e-07, "masked_sentence_hessian_coeff": -8853.583984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -4288.0, "masked_sentence_hessian_coeff/min": -47616.0, "masked_sentence_hessian_coeff/p25": -13152.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 123509296.0, "masked_sentence_hessian_coeff_abs": 8853.583984375, "masked_sentence_hessian_coeff_abs/max": 47616.0, "masked_sentence_hessian_coeff_abs/median": 4224.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 13152.0, "masked_sentence_hessian_coeff_abs/p99": 38374.4296875, "masked_sentence_hessian_coeff_abs/var": 123509296.0, "masked_token_fisher_curvature": 358.0589294433594, "masked_token_fisher_curvature/max": 204800.0, "masked_token_fisher_curvature/median": 1.870248747537495e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.0132962503774589e-23, "masked_token_fisher_curvature/p75": 3.197442310920451e-13, "masked_token_fisher_curvature/p85": 1.9826984498649836e-10, "masked_token_fisher_curvature/p90": 3.050081431865692e-08, "masked_token_fisher_curvature/p95": 0.00021648406982421875, "masked_token_fisher_curvature/p99": 1823.5, "masked_token_fisher_curvature/var": 35862252.0, "masked_token_fisher_kl_divergence": 1.638417046478935e-08, "masked_token_fisher_kl_divergence/max": 9.357929229736328e-06, "masked_token_fisher_kl_divergence/median": 8.559140821647978e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.634268928681786e-34, "masked_token_fisher_kl_divergence/p75": 1.468245587281624e-23, "masked_token_fisher_kl_divergence/p85": 9.052664623780335e-21, "masked_token_fisher_kl_divergence/p90": 1.395910297075087e-18, "masked_token_fisher_kl_divergence/p95": 9.880984919163893e-15, "masked_token_fisher_kl_divergence/p99": 8.333881851285696e-08, "masked_token_fisher_kl_divergence/var": 7.505586728301025e-14, "masked_token_full_update_term": 9.507462891633622e-06, "masked_token_full_update_term/max": 0.0040283203125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.0505318641662598e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 6.245004513516506e-17, "masked_token_full_update_term/p85": 9.370282327836321e-14, "masked_token_full_update_term/p90": 4.035882739117369e-12, "masked_token_full_update_term/p95": 6.106262162575149e-10, "masked_token_full_update_term/p99": 2.5447458028793335e-05, "masked_token_full_update_term/var": 1.9019420705035373e-08, "masked_token_hessian_coeff": -10788.83203125, "masked_token_hessian_coeff/max": 80.5, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -4751360.0, "masked_token_hessian_coeff/p25": -3.2421667128801346e-08, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0013275146484375, "masked_token_hessian_coeff/var": 25092952064.0, "masked_token_hessian_coeff_abs": 10788.857421875, "masked_token_hessian_coeff_abs/max": 4751360.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 4.5634806156158447e-07, "masked_token_hessian_coeff_abs/p99": 13864.0, "masked_token_hessian_coeff_abs/var": 25092952064.0, "mean_logprobs": -0.0096435546875, "mean_logprobs/var": 4.792213439941406e-05, "num_completions/total": 4608, "per_sentence_gradient_norm": 44.5703125, "per_sentence_gradient_norm/max": 176.0, "per_sentence_gradient_norm/median": 19.125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 73.875, "per_sentence_gradient_norm/var": 2659.668701171875, "per_token_feature_norm": 188.71180725097656, "per_token_feature_norm/max": 262.0, "per_token_feature_norm/median": 189.0, "per_token_feature_norm/min": 111.5, "per_token_feature_norm/p25": 183.0, "per_token_feature_norm/p75": 195.0, "per_token_feature_norm/var": 119.41035461425781, "per_token_gradient_norm": 0.682041585445404, "per_token_gradient_norm/max": 268.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 3.41970007866621e-09, "per_token_gradient_norm/var": 90.63658905029297, "per_token_policy_error_norm": 0.005308730993419886, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0052479770965874195, "policy_entropy": 0.009256573393940926, "policy_entropy/max": 1.734375, "policy_entropy/median": 6.548361852765083e-10, "policy_entropy/min": 1.9481757786848908e-20, "policy_entropy/p25": 2.319922032256727e-12, "policy_entropy/p75": 1.2759119272232056e-07, "policy_entropy/var": 0.004949048161506653, "policy_loss": -0.6041666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24166665971279144, "policy_sharpness": 9.729604721069336, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7196012735366821, "reward": 0.6041666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24166665971279144, "rewards/accuracy_reward": 0.6041666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24166665971279144, "sentence_fisher_curvature": 216703.796875, "sentence_fisher_curvature/max": 1171456.0, "sentence_fisher_curvature/median": 141312.0, "sentence_fisher_curvature/min": 90.0, "sentence_fisher_curvature/p25": 750.0, "sentence_fisher_curvature/p75": 311296.0, "sentence_fisher_curvature/p85": 527872.0, "sentence_fisher_curvature/p90": 600064.0, "sentence_fisher_curvature/p95": 871424.0, "sentence_fisher_curvature/p99": 992461.375, "sentence_fisher_curvature/var": 74090782720.0, "sentence_fisher_kl_divergence": 9.918810974340886e-06, "sentence_fisher_kl_divergence/max": 5.364418029785156e-05, "sentence_fisher_kl_divergence/median": 6.467103958129883e-06, "sentence_fisher_kl_divergence/min": 4.132743924856186e-09, "sentence_fisher_kl_divergence/p25": 3.440072759985924e-08, "sentence_fisher_kl_divergence/p75": 1.424551010131836e-05, "sentence_fisher_kl_divergence/p85": 2.4199485778808594e-05, "sentence_fisher_kl_divergence/p90": 2.7477741241455078e-05, "sentence_fisher_kl_divergence/p95": 3.987550735473633e-05, "sentence_fisher_kl_divergence/p99": 4.5490291086025536e-05, "sentence_fisher_kl_divergence/var": 1.5526521734976484e-10, "sentence_full_gradient_variance/max_squared_error": 4503.9765625, "sentence_full_gradient_variance/metric": 4503.9765625, "sentence_full_gradient_variance/p75": 4503.9765625, "sentence_full_gradient_variance/p90": 4503.9765625, "sentence_full_gradient_variance/p95": 4503.9765625, "sentence_full_gradient_variance/p99": 4503.9765625, "sentence_full_update_term": 0.027243297547101974, "sentence_full_update_term/max": 0.11328125, "sentence_full_update_term/median": 0.0101318359375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.05078125, "sentence_full_update_term/p85": 0.0621337890625, "sentence_full_update_term/p90": 0.064697265625, "sentence_full_update_term/p95": 0.0787353515625, "sentence_full_update_term/p99": 0.10632326453924179, "sentence_full_update_term/var": 0.0009171147248707712, "sentence_hessian_coeff": 15937.5, "sentence_hessian_coeff/max": 667648.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -428032.0, "sentence_hessian_coeff/p25": -4596.0, "sentence_hessian_coeff/p75": 31744.0, "sentence_hessian_coeff/p99": 488653.375, "sentence_hessian_coeff/var": 23035131904.0, "sentence_hessian_coeff_abs": 79429.171875, "sentence_hessian_coeff_abs/max": 667648.0, "sentence_hessian_coeff_abs/median": 26112.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 114432.0, "sentence_hessian_coeff_abs/p99": 488653.375, "sentence_hessian_coeff_abs/var": 16916407296.0, "step": 48, "token_fisher_curvature": 164063.09375, "token_fisher_curvature/max": 124256256.0, "token_fisher_curvature/median": 2.358139725155972e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.173562494059608e-23, "token_fisher_curvature/p75": 4.547473508864641e-13, "token_fisher_curvature/p85": 4.1109160520136356e-10, "token_fisher_curvature/p90": 8.521601557731628e-08, "token_fisher_curvature/p95": 0.00147247314453125, "token_fisher_curvature/p99": 77312.0, "token_fisher_curvature/var": 9993949544448.0, "token_fisher_kl_divergence": 7.507044756493997e-06, "token_fisher_kl_divergence/max": 0.00567626953125, "token_fisher_kl_divergence/median": 1.0807394401527862e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.356492657827e-34, "token_fisher_kl_divergence/p75": 2.078291289039482e-23, "token_fisher_kl_divergence/p85": 1.8846483076408183e-20, "token_fisher_kl_divergence/p90": 3.903127820947816e-18, "token_fisher_kl_divergence/p95": 6.750155989720952e-14, "token_fisher_kl_divergence/p99": 3.5315752029418945e-06, "token_fisher_kl_divergence/var": 2.0918474064046677e-08, "token_full_update_term": 0.0002770166320260614, "token_full_update_term/max": 0.11181640625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.0505318641662598e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 9.237402509576498e-17, "token_full_update_term/p85": 1.5187850976872141e-13, "token_full_update_term/p90": 6.863842827442568e-12, "token_full_update_term/p95": 1.9063008949160576e-09, "token_full_update_term/p99": 0.0015411376953125, "token_full_update_term/var": 1.5434425222338177e-05, "token_hessian_coeff": 4783.0859375, "token_hessian_coeff/max": 119013376.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -17039360.0, "token_hessian_coeff/p25": -4.0978193283081055e-08, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.0022735595703125, "token_hessian_coeff/var": 6298335707136.0, "token_hessian_coeff_abs": 156406.03125, "token_hessian_coeff_abs/max": 119013376.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 5.960464477539062e-07, "token_hessian_coeff_abs/p99": 1688000.0, "token_hessian_coeff_abs/var": 6273896022016.0 }, { "accuracy_reward": 0.65625, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2279605269432068, "adam_stats/lm_head/lr_effective_max": 3.866108454531059e-05, "adam_stats/lm_head/lr_effective_mean": -4.312965956598802e-11, "adam_stats/lm_head/lr_effective_min": -3.9526395994471386e-05, "adam_stats/lm_head/lr_effective_std": 9.463372521167912e-07, "adam_stats/lr_effective_max": 4.713322050520219e-05, "adam_stats/lr_effective_mean": -2.0985321119715294e-11, "adam_stats/lr_effective_min": -4.6483059122692794e-05, "adam_stats/m_t_max": 0.00036656635347753763, "adam_stats/m_t_mean": -5.526407456657445e-12, "adam_stats/m_t_min": -0.0003508921363390982, "adam_stats/v_t_max": 2.5574709070497192e-05, "adam_stats/v_t_mean": 1.7346098515891306e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.65625, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2279605269432068, "all_logprobs": -0.010667801834642887, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.0, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -2.0265579223632812e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00017547607421875, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.016720687970519066, "clip_ratio": 0.0, "completion_length": 480.3125, "completion_length/correct": 394.3174743652344, "completion_length/correct/max": 863.0, "completion_length/correct/median": 358.0, "completion_length/correct/min": 203.0, "completion_length/correct/p25": 298.5, "completion_length/correct/p75": 487.0, "completion_length/correct/var": 19935.671875, "completion_length/incorrect": 644.48486328125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 673.0, "completion_length/incorrect/min": 305.0, "completion_length/incorrect/p25": 340.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 93993.1953125, "completion_length/max": 1024.0, "completion_length/median": 360.0, "completion_length/min": 203.0, "completion_length/p25": 311.0, "completion_length/p75": 564.0, "completion_length/var": 58938.125, "curvature_clip_ratio_token_fisher": 0.01008458063006401, "curvature_clip_ratio_token_hessian": 0.006788115482777357, "curvature_clip_ratio_total_fisher": 0.01008458063006401, "curvature_clip_ratio_total_full": 0.01008458063006401, "curvature_clip_ratio_total_hessian": 0.006788115482777357, "epoch": 0.0784, "feature_vector_variance/max_squared_error": 69175.1171875, "feature_vector_variance/metric": 30932.072265625, "generated_tokens/total": 2845120.0, "global_fisher_curvature": 151552.0, "global_fisher_curvature/max": 151552.0, "global_fisher_curvature/median": 151552.0, "global_fisher_curvature/min": 151552.0, "global_fisher_curvature/p25": 151552.0, "global_fisher_curvature/p75": 151552.0, "global_fisher_curvature/p85": 151552.0, "global_fisher_curvature/p90": 151552.0, "global_fisher_curvature/p95": 151552.0, "global_fisher_curvature/p99": 151552.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.586313247680664e-06, "global_fisher_kl_divergence/max": 6.586313247680664e-06, "global_fisher_kl_divergence/median": 6.586313247680664e-06, "global_fisher_kl_divergence/min": 6.586313247680664e-06, "global_fisher_kl_divergence/p25": 6.586313247680664e-06, "global_fisher_kl_divergence/p75": 6.586313247680664e-06, "global_fisher_kl_divergence/p85": 6.586313247680664e-06, "global_fisher_kl_divergence/p90": 6.586313247680664e-06, "global_fisher_kl_divergence/p95": 6.586313247680664e-06, "global_fisher_kl_divergence/p99": 6.586313247680664e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.10009765625, "global_full_update_term/max": 0.10009765625, "global_full_update_term/median": 0.10009765625, "global_full_update_term/min": 0.10009765625, "global_full_update_term/p25": 0.10009765625, "global_full_update_term/p75": 0.10009765625, "global_full_update_term/p85": 0.10009765625, "global_full_update_term/p90": 0.10009765625, "global_full_update_term/p95": 0.10009765625, "global_full_update_term/p99": 0.10009765625, "global_full_update_term/var": NaN, "global_hessian_coeff": 62720.0, "global_hessian_coeff/max": 62720.0, "global_hessian_coeff/median": 62720.0, "global_hessian_coeff/min": 62720.0, "global_hessian_coeff/p25": 62720.0, "global_hessian_coeff/p75": 62720.0, "global_hessian_coeff/p99": 62720.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 62720.0, "global_hessian_coeff_abs/max": 62720.0, "global_hessian_coeff_abs/median": 62720.0, "global_hessian_coeff_abs/min": 62720.0, "global_hessian_coeff_abs/p25": 62720.0, "global_hessian_coeff_abs/p75": 62720.0, "global_hessian_coeff_abs/p99": 62720.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.030779723078012466, "learning_rate": 9.059337681133194e-06, "loss": -0.6562, "masked_global_fisher_curvature": 426.0, "masked_global_fisher_curvature/max": 426.0, "masked_global_fisher_curvature/median": 426.0, "masked_global_fisher_curvature/min": 426.0, "masked_global_fisher_curvature/p25": 426.0, "masked_global_fisher_curvature/p75": 426.0, "masked_global_fisher_curvature/p85": 426.0, "masked_global_fisher_curvature/p90": 426.0, "masked_global_fisher_curvature/p95": 426.0, "masked_global_fisher_curvature/p99": 426.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/max": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/median": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/min": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/p25": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/p75": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/p85": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/p90": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/p95": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/p99": 1.8510036170482635e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0030975341796875, "masked_global_full_update_term/max": 0.0030975341796875, "masked_global_full_update_term/median": 0.0030975341796875, "masked_global_full_update_term/min": 0.0030975341796875, "masked_global_full_update_term/p25": 0.0030975341796875, "masked_global_full_update_term/p75": 0.0030975341796875, "masked_global_full_update_term/p85": 0.0030975341796875, "masked_global_full_update_term/p90": 0.0030975341796875, "masked_global_full_update_term/p95": 0.0030975341796875, "masked_global_full_update_term/p99": 0.0030975341796875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -2624.0, "masked_global_hessian_coeff/max": -2624.0, "masked_global_hessian_coeff/median": -2624.0, "masked_global_hessian_coeff/min": -2624.0, "masked_global_hessian_coeff/p25": -2624.0, "masked_global_hessian_coeff/p75": -2624.0, "masked_global_hessian_coeff/p99": -2624.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 2624.0, "masked_global_hessian_coeff_abs/max": 2624.0, "masked_global_hessian_coeff_abs/median": 2624.0, "masked_global_hessian_coeff_abs/min": 2624.0, "masked_global_hessian_coeff_abs/p25": 2624.0, "masked_global_hessian_coeff_abs/p75": 2624.0, "masked_global_hessian_coeff_abs/p99": 2624.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.9464519023895264, "masked_per_sentence_gradient_norm/max": 9.875, "masked_per_sentence_gradient_norm/median": 0.69921875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.87109375, "masked_per_sentence_gradient_norm/var": 6.686402797698975, "masked_per_token_gradient_norm": 0.02811657078564167, "masked_per_token_gradient_norm/max": 10.875, "masked_per_token_gradient_norm/median": 1.021405182655144e-14, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.7229467630386353e-08, "masked_per_token_gradient_norm/var": 0.15110820531845093, "masked_sentence_fisher_curvature": 370.20703125, "masked_sentence_fisher_curvature/max": 1536.0, "masked_sentence_fisher_curvature/median": 314.0, "masked_sentence_fisher_curvature/min": 3.6875, "masked_sentence_fisher_curvature/p25": 91.625, "masked_sentence_fisher_curvature/p75": 557.0, "masked_sentence_fisher_curvature/p85": 696.0, "masked_sentence_fisher_curvature/p90": 756.0, "masked_sentence_fisher_curvature/p95": 779.0, "masked_sentence_fisher_curvature/p99": 1520.800048828125, "masked_sentence_fisher_curvature/var": 95644.0390625, "masked_sentence_fisher_kl_divergence": 1.6068423036585955e-08, "masked_sentence_fisher_kl_divergence/max": 6.658956408500671e-08, "masked_sentence_fisher_kl_divergence/median": 1.3620592653751373e-08, "masked_sentence_fisher_kl_divergence/min": 1.6007106751203537e-10, "masked_sentence_fisher_kl_divergence/p25": 3.9726728573441505e-09, "masked_sentence_fisher_kl_divergence/p75": 2.4185283109545708e-08, "masked_sentence_fisher_kl_divergence/p85": 3.026798367500305e-08, "masked_sentence_fisher_kl_divergence/p90": 3.282912075519562e-08, "masked_sentence_fisher_kl_divergence/p95": 3.3760443329811096e-08, "masked_sentence_fisher_kl_divergence/p99": 6.614718728314983e-08, "masked_sentence_fisher_kl_divergence/var": 1.802732413056345e-16, "masked_sentence_full_gradient_variance/max_squared_error": 9.997224807739258, "masked_sentence_full_gradient_variance/metric": 9.997224807739258, "masked_sentence_full_gradient_variance/p75": 9.997224807739258, "masked_sentence_full_gradient_variance/p90": 9.997224807739258, "masked_sentence_full_gradient_variance/p95": 9.997224807739258, "masked_sentence_full_gradient_variance/p99": 9.997224807739258, "masked_sentence_full_update_term": 0.001065736054442823, "masked_sentence_full_update_term/max": 0.00628662109375, "masked_sentence_full_update_term/median": 0.00023937225341796875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.001689910888671875, "masked_sentence_full_update_term/p85": 0.00244903564453125, "masked_sentence_full_update_term/p90": 0.00267791748046875, "masked_sentence_full_update_term/p95": 0.004547119140625, "masked_sentence_full_update_term/p99": 0.005590822547674179, "masked_sentence_full_update_term/var": 2.186655819969019e-06, "masked_sentence_hessian_coeff": -9669.833984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -5632.0, "masked_sentence_hessian_coeff/min": -52992.0, "masked_sentence_hessian_coeff/p25": -17568.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 129662056.0, "masked_sentence_hessian_coeff_abs": 9669.833984375, "masked_sentence_hessian_coeff_abs/max": 52992.0, "masked_sentence_hessian_coeff_abs/median": 5088.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 17568.0, "masked_sentence_hessian_coeff_abs/p99": 46182.421875, "masked_sentence_hessian_coeff_abs/var": 129662056.0, "masked_token_fisher_curvature": 424.8608093261719, "masked_token_fisher_curvature/max": 224256.0, "masked_token_fisher_curvature/median": 8.40256683676266e-18, "masked_token_fisher_curvature/min": 9.183549615799121e-41, "masked_token_fisher_curvature/p25": 1.1839022517165209e-23, "masked_token_fisher_curvature/p75": 9.521272659185342e-13, "masked_token_fisher_curvature/p85": 4.911271389573812e-10, "masked_token_fisher_curvature/p90": 8.754432201385498e-08, "masked_token_fisher_curvature/p95": 0.000560760498046875, "masked_token_fisher_curvature/p99": 2081.875, "masked_token_fisher_curvature/var": 46133496.0, "masked_token_fisher_kl_divergence": 1.8431494552828553e-08, "masked_token_fisher_kl_divergence/max": 9.715557098388672e-06, "masked_token_fisher_kl_divergence/median": 3.6445373821210745e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 5.145844070159646e-34, "masked_token_fisher_kl_divergence/p75": 4.1359030627651384e-23, "masked_token_fisher_kl_divergence/p85": 2.1281702799764296e-20, "masked_token_fisher_kl_divergence/p90": 3.7947076036992655e-18, "masked_token_fisher_kl_divergence/p95": 2.4313884239290928e-14, "masked_token_fisher_kl_divergence/p99": 9.018731361720711e-08, "masked_token_fisher_kl_divergence/var": 8.679731602510496e-14, "masked_token_full_update_term": 1.0098804523295257e-05, "masked_token_full_update_term/max": 0.0040283203125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.0174851417541504e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.4849232954361469e-15, "masked_token_full_update_term/p85": 8.363171266623226e-13, "masked_token_full_update_term/p90": 2.4851676272419354e-11, "masked_token_full_update_term/p95": 3.6088749766349792e-09, "masked_token_full_update_term/p99": 3.7670135498046875e-05, "masked_token_full_update_term/var": 2.1088895962861898e-08, "masked_token_hessian_coeff": -11826.4453125, "masked_token_hessian_coeff/max": 22.125, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -4980736.0, "masked_token_hessian_coeff/p25": -3.7066638469696045e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.005212187767028809, "masked_token_hessian_coeff/var": 30128525312.0, "masked_token_hessian_coeff_abs": 11826.466796875, "masked_token_hessian_coeff_abs/max": 4980736.0, "masked_token_hessian_coeff_abs/median": 4.0678571622265736e-13, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 3.769993782043457e-06, "masked_token_hessian_coeff_abs/p99": 20935.5, "masked_token_hessian_coeff_abs/var": 30128527360.0, "mean_logprobs": -0.01007080078125, "mean_logprobs/var": 6.771087646484375e-05, "num_completions/total": 4704, "per_sentence_gradient_norm": 43.42252731323242, "per_sentence_gradient_norm/max": 288.0, "per_sentence_gradient_norm/median": 30.625, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 57.1875, "per_sentence_gradient_norm/var": 3446.282470703125, "per_token_feature_norm": 189.98460388183594, "per_token_feature_norm/max": 274.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 111.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 145.1356964111328, "per_token_gradient_norm": 0.8233248591423035, "per_token_gradient_norm/max": 288.0, "per_token_gradient_norm/median": 1.6764367671839864e-14, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.3166649043560028e-08, "per_token_gradient_norm/var": 114.7032241821289, "per_token_policy_error_norm": 0.0061372434720396996, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0060095894150435925, "policy_entropy": 0.011128252372145653, "policy_entropy/max": 3.0625, "policy_entropy/median": 1.3169483281672e-09, "policy_entropy/min": 1.0926725019580474e-19, "policy_entropy/p25": 2.8279600883251987e-12, "policy_entropy/p75": 1.955777406692505e-07, "policy_entropy/var": 0.007769582327455282, "policy_loss": -0.65625, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2279605269432068, "policy_sharpness": 9.70997142791748, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.871550440788269, "reward": 0.65625, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2279605269432068, "rewards/accuracy_reward": 0.65625, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2279605269432068, "sentence_fisher_curvature": 248191.875, "sentence_fisher_curvature/max": 1622016.0, "sentence_fisher_curvature/median": 75264.0, "sentence_fisher_curvature/min": 66.5, "sentence_fisher_curvature/p25": 1466.0, "sentence_fisher_curvature/p75": 434688.0, "sentence_fisher_curvature/p85": 619520.0, "sentence_fisher_curvature/p90": 718848.0, "sentence_fisher_curvature/p95": 745472.0, "sentence_fisher_curvature/p99": 1404109.5, "sentence_fisher_curvature/var": 107262525440.0, "sentence_fisher_kl_divergence": 1.077064371202141e-05, "sentence_fisher_kl_divergence/max": 7.05718994140625e-05, "sentence_fisher_kl_divergence/median": 3.2633543014526367e-06, "sentence_fisher_kl_divergence/min": 2.8812792152166367e-09, "sentence_fisher_kl_divergence/p25": 6.356276571750641e-08, "sentence_fisher_kl_divergence/p75": 1.8864870071411133e-05, "sentence_fisher_kl_divergence/p85": 2.6911497116088867e-05, "sentence_fisher_kl_divergence/p90": 3.123283386230469e-05, "sentence_fisher_kl_divergence/p95": 3.24249267578125e-05, "sentence_fisher_kl_divergence/p99": 6.083253174438141e-05, "sentence_fisher_kl_divergence/var": 2.021193906687202e-10, "sentence_full_gradient_variance/max_squared_error": 5239.400390625, "sentence_full_gradient_variance/metric": 5239.400390625, "sentence_full_gradient_variance/p75": 5239.400390625, "sentence_full_gradient_variance/p90": 5239.400390625, "sentence_full_gradient_variance/p95": 5239.400390625, "sentence_full_gradient_variance/p99": 5239.400390625, "sentence_full_update_term": 0.029347261413931847, "sentence_full_update_term/max": 0.2216796875, "sentence_full_update_term/median": 0.0186767578125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.038330078125, "sentence_full_update_term/p85": 0.0648193359375, "sentence_full_update_term/p90": 0.0849609375, "sentence_full_update_term/p95": 0.098388671875, "sentence_full_update_term/p99": 0.17065446078777313, "sentence_full_update_term/var": 0.001620772061869502, "sentence_hessian_coeff": 44304.66796875, "sentence_hessian_coeff/max": 1003520.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -270336.0, "sentence_hessian_coeff/p25": -66560.0, "sentence_hessian_coeff/p75": 60608.0, "sentence_hessian_coeff/p99": 925696.25, "sentence_hessian_coeff/var": 48706859008.0, "sentence_hessian_coeff_abs": 124395.3359375, "sentence_hessian_coeff_abs/max": 1003520.0, "sentence_hessian_coeff_abs/median": 66560.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 193536.0, "sentence_hessian_coeff_abs/p99": 925696.25, "sentence_hessian_coeff_abs/var": 35053334528.0, "step": 49, "token_fisher_curvature": 221757.609375, "token_fisher_curvature/max": 137363456.0, "token_fisher_curvature/median": 1.0733601507606494e-17, "token_fisher_curvature/min": 9.183549615799121e-41, "token_fisher_curvature/p25": 1.34416849539867e-23, "token_fisher_curvature/p75": 1.4725998198628076e-12, "token_fisher_curvature/p85": 1.122714365919819e-09, "token_fisher_curvature/p90": 2.644956111907959e-07, "token_fisher_curvature/p95": 0.00867319107055664, "token_fisher_curvature/p99": 247808.0, "token_fisher_curvature/var": 16379585495040.0, "token_fisher_kl_divergence": 9.620855962566566e-06, "token_fisher_kl_divergence/max": 0.005950927734375, "token_fisher_kl_divergence/median": 4.67005655890839e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.837975143923809e-34, "token_fisher_kl_divergence/p75": 6.389970231972139e-23, "token_fisher_kl_divergence/p85": 4.8695295480384186e-20, "token_fisher_kl_divergence/p90": 1.1492543028346347e-17, "token_fisher_kl_divergence/p95": 3.7597008839540536e-13, "token_fisher_kl_divergence/p99": 1.0728836059570312e-05, "token_fisher_kl_divergence/var": 3.0826164731934114e-08, "token_full_update_term": 0.00033258585608564317, "token_full_update_term/max": 0.11474609375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.0174851417541504e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.5951463200613034e-15, "token_full_update_term/p85": 1.3287149158713873e-12, "token_full_update_term/p90": 5.070432962384075e-11, "token_full_update_term/p95": 1.2648797564907e-08, "token_full_update_term/p99": 0.00311279296875, "token_full_update_term/var": 1.991938734136056e-05, "token_hessian_coeff": 36285.76171875, "token_hessian_coeff/max": 137363456.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -17825792.0, "token_hessian_coeff/p25": -5.243346095085144e-07, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.014893293380737305, "token_hessian_coeff/var": 11833727516672.0, "token_hessian_coeff_abs": 215977.421875, "token_hessian_coeff_abs/max": 137363456.0, "token_hessian_coeff_abs/median": 7.815970093361102e-13, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 5.304813385009766e-06, "token_hessian_coeff_abs/p99": 3716224.0, "token_hessian_coeff_abs/var": 11788395479040.0 }, { "accuracy_reward": 0.9479166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.04989035800099373, "adam_stats/lm_head/lr_effective_max": 3.921823008568026e-05, "adam_stats/lm_head/lr_effective_mean": -1.2134299641475277e-11, "adam_stats/lm_head/lr_effective_min": -3.8882408261997625e-05, "adam_stats/lm_head/lr_effective_std": 9.408667551724648e-07, "adam_stats/lr_effective_max": 4.64631084469147e-05, "adam_stats/lr_effective_mean": 8.426411651774046e-11, "adam_stats/lr_effective_min": -4.6035711420699954e-05, "adam_stats/m_t_max": 0.0005302919307723641, "adam_stats/m_t_mean": -3.723780398617871e-12, "adam_stats/m_t_min": -0.0006257298518903553, "adam_stats/v_t_max": 2.555891842348501e-05, "adam_stats/v_t_mean": 1.7339118422304844e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.9479166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.04989035800099373, "all_logprobs": -0.010128687135875225, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00017547607421875, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.015777355059981346, "clip_ratio": 0.0, "completion_length": 436.75, "completion_length/correct": 422.8022155761719, "completion_length/correct/max": 1001.0, "completion_length/correct/median": 434.0, "completion_length/correct/min": 141.0, "completion_length/correct/p25": 256.0, "completion_length/correct/p75": 562.5, "completion_length/correct/var": 30673.626953125, "completion_length/incorrect": 690.6000366210938, "completion_length/incorrect/max": 878.0, "completion_length/incorrect/median": 647.0, "completion_length/incorrect/min": 614.0, "completion_length/incorrect/p25": 614.0, "completion_length/incorrect/p75": 700.0, "completion_length/incorrect/var": 12210.798828125, "completion_length/max": 1001.0, "completion_length/median": 463.0, "completion_length/min": 141.0, "completion_length/p25": 265.0, "completion_length/p75": 584.25, "completion_length/var": 33151.28515625, "curvature_clip_ratio_token_fisher": 0.018007060512900352, "curvature_clip_ratio_token_hessian": 0.012783819809556007, "curvature_clip_ratio_total_fisher": 0.018007060512900352, "curvature_clip_ratio_total_full": 0.018007060512900352, "curvature_clip_ratio_total_hessian": 0.012783819809556007, "epoch": 0.08, "feature_vector_variance/max_squared_error": 61545.8359375, "feature_vector_variance/metric": 31093.98046875, "generated_tokens/total": 2887048.0, "global_fisher_curvature": 176128.0, "global_fisher_curvature/max": 176128.0, "global_fisher_curvature/median": 176128.0, "global_fisher_curvature/min": 176128.0, "global_fisher_curvature/p25": 176128.0, "global_fisher_curvature/p75": 176128.0, "global_fisher_curvature/p85": 176128.0, "global_fisher_curvature/p90": 176128.0, "global_fisher_curvature/p95": 176128.0, "global_fisher_curvature/p99": 176128.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.241964340209961e-06, "global_fisher_kl_divergence/max": 7.241964340209961e-06, "global_fisher_kl_divergence/median": 7.241964340209961e-06, "global_fisher_kl_divergence/min": 7.241964340209961e-06, "global_fisher_kl_divergence/p25": 7.241964340209961e-06, "global_fisher_kl_divergence/p75": 7.241964340209961e-06, "global_fisher_kl_divergence/p85": 7.241964340209961e-06, "global_fisher_kl_divergence/p90": 7.241964340209961e-06, "global_fisher_kl_divergence/p95": 7.241964340209961e-06, "global_fisher_kl_divergence/p99": 7.241964340209961e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.1240234375, "global_full_update_term/max": 0.1240234375, "global_full_update_term/median": 0.1240234375, "global_full_update_term/min": 0.1240234375, "global_full_update_term/p25": 0.1240234375, "global_full_update_term/p75": 0.1240234375, "global_full_update_term/p85": 0.1240234375, "global_full_update_term/p90": 0.1240234375, "global_full_update_term/p95": 0.1240234375, "global_full_update_term/p99": 0.1240234375, "global_full_update_term/var": NaN, "global_hessian_coeff": 18560.0, "global_hessian_coeff/max": 18560.0, "global_hessian_coeff/median": 18560.0, "global_hessian_coeff/min": 18560.0, "global_hessian_coeff/p25": 18560.0, "global_hessian_coeff/p75": 18560.0, "global_hessian_coeff/p99": 18560.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 18560.0, "global_hessian_coeff_abs/max": 18560.0, "global_hessian_coeff_abs/median": 18560.0, "global_hessian_coeff_abs/min": 18560.0, "global_hessian_coeff_abs/p25": 18560.0, "global_hessian_coeff_abs/p75": 18560.0, "global_hessian_coeff_abs/p99": 18560.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.06497923284769058, "learning_rate": 8.80236133250198e-06, "loss": -0.9479, "masked_global_fisher_curvature": 199.0, "masked_global_fisher_curvature/max": 199.0, "masked_global_fisher_curvature/median": 199.0, "masked_global_fisher_curvature/min": 199.0, "masked_global_fisher_curvature/p25": 199.0, "masked_global_fisher_curvature/p75": 199.0, "masked_global_fisher_curvature/p85": 199.0, "masked_global_fisher_curvature/p90": 199.0, "masked_global_fisher_curvature/p95": 199.0, "masked_global_fisher_curvature/p99": 199.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/max": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/median": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/min": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p25": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p75": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p85": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p90": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p95": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p99": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0038604736328125, "masked_global_full_update_term/max": 0.0038604736328125, "masked_global_full_update_term/median": 0.0038604736328125, "masked_global_full_update_term/min": 0.0038604736328125, "masked_global_full_update_term/p25": 0.0038604736328125, "masked_global_full_update_term/p75": 0.0038604736328125, "masked_global_full_update_term/p85": 0.0038604736328125, "masked_global_full_update_term/p90": 0.0038604736328125, "masked_global_full_update_term/p95": 0.0038604736328125, "masked_global_full_update_term/p99": 0.0038604736328125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -6496.0, "masked_global_hessian_coeff/max": -6496.0, "masked_global_hessian_coeff/median": -6496.0, "masked_global_hessian_coeff/min": -6496.0, "masked_global_hessian_coeff/p25": -6496.0, "masked_global_hessian_coeff/p75": -6496.0, "masked_global_hessian_coeff/p99": -6496.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 6496.0, "masked_global_hessian_coeff_abs/max": 6496.0, "masked_global_hessian_coeff_abs/median": 6496.0, "masked_global_hessian_coeff_abs/min": 6496.0, "masked_global_hessian_coeff_abs/p25": 6496.0, "masked_global_hessian_coeff_abs/p75": 6496.0, "masked_global_hessian_coeff_abs/p99": 6496.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.5615336894989014, "masked_per_sentence_gradient_norm/max": 8.75, "masked_per_sentence_gradient_norm/median": 1.9921875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 1.283203125, "masked_per_sentence_gradient_norm/p75": 3.046875, "masked_per_sentence_gradient_norm/var": 5.138683319091797, "masked_per_token_gradient_norm": 0.054982706904411316, "masked_per_token_gradient_norm/max": 11.625, "masked_per_token_gradient_norm/median": 8.512870408594608e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 2.3657312579457388e-17, "masked_per_token_gradient_norm/p25": 4.5297099404706387e-13, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.334134817123413e-07, "masked_per_token_gradient_norm/var": 0.3375556468963623, "masked_sentence_fisher_curvature": 547.3202514648438, "masked_sentence_fisher_curvature/max": 3232.0, "masked_sentence_fisher_curvature/median": 414.0, "masked_sentence_fisher_curvature/min": 0.0576171875, "masked_sentence_fisher_curvature/p25": 254.0, "masked_sentence_fisher_curvature/p75": 677.0, "masked_sentence_fisher_curvature/p85": 879.0, "masked_sentence_fisher_curvature/p90": 1064.0, "masked_sentence_fisher_curvature/p95": 1324.0, "masked_sentence_fisher_curvature/p99": 3232.0, "masked_sentence_fisher_curvature/var": 285616.9375, "masked_sentence_fisher_kl_divergence": 2.2439051505784846e-08, "masked_sentence_fisher_kl_divergence/max": 1.3224780559539795e-07, "masked_sentence_fisher_kl_divergence/median": 1.6996636986732483e-08, "masked_sentence_fisher_kl_divergence/min": 2.3590018827235326e-12, "masked_sentence_fisher_kl_divergence/p25": 1.0419171303510666e-08, "masked_sentence_fisher_kl_divergence/p75": 2.7794158086180687e-08, "masked_sentence_fisher_kl_divergence/p85": 3.608874976634979e-08, "masked_sentence_fisher_kl_divergence/p90": 4.3655745685100555e-08, "masked_sentence_fisher_kl_divergence/p95": 5.436595529317856e-08, "masked_sentence_fisher_kl_divergence/p99": 1.3224780559539795e-07, "masked_sentence_fisher_kl_divergence/var": 4.787479981066957e-16, "masked_sentence_full_gradient_variance/max_squared_error": 11.17590618133545, "masked_sentence_full_gradient_variance/metric": 11.17590618133545, "masked_sentence_full_gradient_variance/p75": 11.17590618133545, "masked_sentence_full_gradient_variance/p90": 11.17590618133545, "masked_sentence_full_gradient_variance/p95": 11.17590618133545, "masked_sentence_full_gradient_variance/p99": 11.17590618133545, "masked_sentence_full_update_term": 0.0013853231212124228, "masked_sentence_full_update_term/max": 0.0045166015625, "masked_sentence_full_update_term/median": 0.001251220703125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0006761550903320312, "masked_sentence_full_update_term/p75": 0.00177764892578125, "masked_sentence_full_update_term/p85": 0.002254486083984375, "masked_sentence_full_update_term/p90": 0.00323486328125, "masked_sentence_full_update_term/p95": 0.003936767578125, "masked_sentence_full_update_term/p99": 0.004255677107721567, "masked_sentence_full_update_term/var": 1.290963041356008e-06, "masked_sentence_hessian_coeff": -19016.85546875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -16896.0, "masked_sentence_hessian_coeff/min": -83968.0, "masked_sentence_hessian_coeff/p25": -25216.0, "masked_sentence_hessian_coeff/p75": -10496.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 248544208.0, "masked_sentence_hessian_coeff_abs": 19016.85546875, "masked_sentence_hessian_coeff_abs/max": 83968.0, "masked_sentence_hessian_coeff_abs/median": 16896.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 10496.0, "masked_sentence_hessian_coeff_abs/p75": 25216.0, "masked_sentence_hessian_coeff_abs/p99": 83968.0, "masked_sentence_hessian_coeff_abs/var": 248544208.0, "masked_token_fisher_curvature": 631.1987915039062, "masked_token_fisher_curvature/max": 234496.0, "masked_token_fisher_curvature/median": 6.06475590234079e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 9.305781891221561e-25, "masked_token_fisher_curvature/p75": 1.3322676295501878e-13, "masked_token_fisher_curvature/p85": 6.702904897792905e-11, "masked_token_fisher_curvature/p90": 5.180481821298599e-09, "masked_token_fisher_curvature/p95": 0.00013522803783416748, "masked_token_fisher_curvature/p99": 2096.0, "masked_token_fisher_curvature/var": 77154448.0, "masked_token_fisher_kl_divergence": 2.5902702205371497e-08, "masked_token_fisher_kl_divergence/max": 9.59634780883789e-06, "masked_token_fisher_kl_divergence/median": 2.4849118514461872e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.81800565147079e-35, "masked_token_fisher_kl_divergence/p75": 5.454222164021526e-24, "masked_token_fisher_kl_divergence/p85": 2.747532103383166e-21, "masked_token_fisher_kl_divergence/p90": 2.1260526976082939e-19, "masked_token_fisher_kl_divergence/p95": 5.55653613398821e-15, "masked_token_fisher_kl_divergence/p99": 8.614733815193176e-08, "masked_token_fisher_kl_divergence/var": 1.2999760230954227e-13, "masked_token_full_update_term": 1.9642866391222924e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 4.824699667560495e-18, "masked_token_full_update_term/min": -9.5367431640625e-07, "masked_token_full_update_term/p25": -7.610061635487855e-22, "masked_token_full_update_term/p75": 1.687538997430238e-13, "masked_token_full_update_term/p85": 1.2562395568238571e-11, "masked_token_full_update_term/p90": 1.5988987911441654e-10, "masked_token_full_update_term/p95": 3.95320967072621e-08, "masked_token_full_update_term/p99": 0.00035858154296875, "masked_token_full_update_term/var": 4.558089372608265e-08, "masked_token_hessian_coeff": -24219.48828125, "masked_token_hessian_coeff/max": 48.0, "masked_token_hessian_coeff/median": -3.41970007866621e-09, "masked_token_hessian_coeff/min": -5079040.0, "masked_token_hessian_coeff/p25": -2.8133392333984375e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0155029296875, "masked_token_hessian_coeff/var": 71258570752.0, "masked_token_hessian_coeff_abs": 24219.509765625, "masked_token_hessian_coeff_abs/max": 5079040.0, "masked_token_hessian_coeff_abs/median": 1.0337680578231812e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 2.887645678129047e-11, "masked_token_hessian_coeff_abs/p75": 9.584426879882812e-05, "masked_token_hessian_coeff_abs/p99": 466944.0, "masked_token_hessian_coeff_abs/var": 71258570752.0, "mean_logprobs": -0.01080322265625, "mean_logprobs/var": 9.679794311523438e-05, "num_completions/total": 4800, "per_sentence_gradient_norm": 73.80859375, "per_sentence_gradient_norm/max": 274.0, "per_sentence_gradient_norm/median": 46.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 29.5, "per_sentence_gradient_norm/p75": 98.875, "per_sentence_gradient_norm/var": 4719.82275390625, "per_token_feature_norm": 190.5676727294922, "per_token_feature_norm/max": 264.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 104.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 150.50106811523438, "per_token_gradient_norm": 1.4825875759124756, "per_token_gradient_norm/max": 292.0, "per_token_gradient_norm/median": 1.0622898116707802e-09, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 3.5344990823027445e-17, "per_token_gradient_norm/p25": 5.364597654988756e-13, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 4.6566128730773926e-07, "per_token_gradient_norm/var": 198.14376831054688, "per_token_policy_error_norm": 0.00586561718955636, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005803576670587063, "policy_entropy": 0.010588200762867928, "policy_entropy/max": 1.6328125, "policy_entropy/median": 4.420144250616431e-10, "policy_entropy/min": 3.044024654195142e-21, "policy_entropy/p25": 1.0302869668521453e-12, "policy_entropy/p75": 9.266659617424011e-08, "policy_entropy/var": 0.005624879617244005, "policy_loss": -0.9479166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.04989035800099373, "policy_sharpness": 9.709896087646484, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8697142601013184, "reward": 0.9479166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.04989035800099373, "rewards/accuracy_reward": 0.9479166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.04989035800099373, "sentence_fisher_curvature": 389453.25, "sentence_fisher_curvature/max": 2670592.0, "sentence_fisher_curvature/median": 268288.0, "sentence_fisher_curvature/min": 65.5, "sentence_fisher_curvature/p25": 116736.0, "sentence_fisher_curvature/p75": 529408.0, "sentence_fisher_curvature/p85": 720896.0, "sentence_fisher_curvature/p90": 804864.0, "sentence_fisher_curvature/p95": 1058816.0, "sentence_fisher_curvature/p99": 2110261.0, "sentence_fisher_curvature/var": 177278648320.0, "sentence_fisher_kl_divergence": 1.5983969205990434e-05, "sentence_fisher_kl_divergence/max": 0.00010967254638671875, "sentence_fisher_kl_divergence/median": 1.1026859283447266e-05, "sentence_fisher_kl_divergence/min": 2.6921043172478676e-09, "sentence_fisher_kl_divergence/p25": 4.798173904418945e-06, "sentence_fisher_kl_divergence/p75": 2.1696090698242188e-05, "sentence_fisher_kl_divergence/p85": 2.956390380859375e-05, "sentence_fisher_kl_divergence/p90": 3.314018249511719e-05, "sentence_fisher_kl_divergence/p95": 4.3451786041259766e-05, "sentence_fisher_kl_divergence/p99": 8.656986028654501e-05, "sentence_fisher_kl_divergence/var": 2.9872196383173844e-10, "sentence_full_gradient_variance/max_squared_error": 9935.3671875, "sentence_full_gradient_variance/metric": 9935.3671875, "sentence_full_gradient_variance/p75": 9935.3671875, "sentence_full_gradient_variance/p90": 9935.3671875, "sentence_full_gradient_variance/p95": 9935.3671875, "sentence_full_gradient_variance/p99": 9935.3671875, "sentence_full_update_term": 0.0465087890625, "sentence_full_update_term/max": 0.17578125, "sentence_full_update_term/median": 0.0301513671875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.01861572265625, "sentence_full_update_term/p75": 0.0704345703125, "sentence_full_update_term/p85": 0.0810546875, "sentence_full_update_term/p90": 0.103271484375, "sentence_full_update_term/p95": 0.1246337890625, "sentence_full_update_term/p99": 0.15073250234127045, "sentence_full_update_term/var": 0.00156345684081316, "sentence_hessian_coeff": 29300.0, "sentence_hessian_coeff/max": 1310720.0, "sentence_hessian_coeff/median": -74240.0, "sentence_hessian_coeff/min": -532480.0, "sentence_hessian_coeff/p25": -145408.0, "sentence_hessian_coeff/p75": 147712.0, "sentence_hessian_coeff/p99": 1100595.875, "sentence_hessian_coeff/var": 80957046784.0, "sentence_hessian_coeff_abs": 201422.671875, "sentence_hessian_coeff_abs/max": 1310720.0, "sentence_hessian_coeff_abs/median": 145408.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 102528.0, "sentence_hessian_coeff_abs/p75": 214784.0, "sentence_hessian_coeff_abs/p99": 1100595.875, "sentence_hessian_coeff_abs/var": 40826413056.0, "step": 50, "token_fisher_curvature": 370373.75, "token_fisher_curvature/max": 138412032.0, "token_fisher_curvature/median": 8.605854744103691e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.2924697071141057e-24, "token_fisher_curvature/p75": 2.717825964282383e-13, "token_fisher_curvature/p85": 2.0272494793971418e-10, "token_fisher_curvature/p90": 5.25269570061937e-08, "token_fisher_curvature/p95": 0.049072265625, "token_fisher_curvature/p99": 3144448.0, "token_fisher_curvature/var": 24776743583744.0, "token_fisher_kl_divergence": 1.520206751592923e-05, "token_fisher_kl_divergence/max": 0.00567626953125, "token_fisher_kl_divergence/median": 3.530152550864028e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.303830510910161e-35, "token_fisher_kl_divergence/p75": 1.1166938269465874e-23, "token_fisher_kl_divergence/p85": 8.308822457942025e-21, "token_fisher_kl_divergence/p90": 2.1589281638736014e-18, "token_fisher_kl_divergence/p95": 2.0179413695586845e-12, "token_fisher_kl_divergence/p99": 0.00012892857193946838, "token_fisher_kl_divergence/var": 4.175107193304939e-08, "token_full_update_term": 0.0005792242591269314, "token_full_update_term/max": 0.11181640625, "token_full_update_term/median": 7.535205098774256e-18, "token_full_update_term/min": -9.5367431640625e-07, "token_full_update_term/p25": -7.560947786617519e-25, "token_full_update_term/p75": 2.7000623958883807e-13, "token_full_update_term/p85": 2.5124791136477143e-11, "token_full_update_term/p90": 5.675246939063072e-10, "token_full_update_term/p95": 1.2367963790893555e-06, "token_full_update_term/p99": 0.015470027923583984, "token_full_update_term/var": 3.1215338822221383e-05, "token_hessian_coeff": 13118.9306640625, "token_hessian_coeff/max": 136314880.0, "token_hessian_coeff/median": -4.307366907596588e-09, "token_hessian_coeff/min": -17825792.0, "token_hessian_coeff/p25": -3.933906555175781e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.039109230041503906, "token_hessian_coeff/var": 17318160629760.0, "token_hessian_coeff_abs": 372994.5625, "token_hessian_coeff_abs/max": 136314880.0, "token_hessian_coeff_abs/median": 1.3131648302078247e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 3.4788172342814505e-11, "token_hessian_coeff_abs/p75": 0.00014400482177734375, "token_hessian_coeff_abs/p99": 11665408.0, "token_hessian_coeff_abs/var": 17179205435392.0 }, { "accuracy_reward": 0.7291666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.19956141710281372, "adam_stats/lm_head/lr_effective_max": 3.874300091410987e-05, "adam_stats/lm_head/lr_effective_mean": 2.4973703660613467e-11, "adam_stats/lm_head/lr_effective_min": -3.942701732739806e-05, "adam_stats/lm_head/lr_effective_std": 9.156649412034312e-07, "adam_stats/lr_effective_max": 4.2921303247567266e-05, "adam_stats/lr_effective_mean": 1.2866924192778129e-10, "adam_stats/lr_effective_min": -4.242670183884911e-05, "adam_stats/m_t_max": 0.00044312115642242134, "adam_stats/m_t_mean": -1.8873971396188294e-12, "adam_stats/m_t_min": -0.0005099383415654302, "adam_stats/v_t_max": 2.5533476218697615e-05, "adam_stats/v_t_mean": 1.732436785174818e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7291666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.19956141710281372, "all_logprobs": -0.007985591888427734, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.25, "all_logprobs/p1": -0.10205078125, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -5.936622619628906e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012598107568919659, "clip_ratio": 0.0, "completion_length": 514.40625, "completion_length/correct": 519.1428833007812, "completion_length/correct/max": 964.0, "completion_length/correct/median": 410.0, "completion_length/correct/min": 290.0, "completion_length/correct/p25": 360.5, "completion_length/correct/p75": 670.5, "completion_length/correct/var": 44607.92578125, "completion_length/incorrect": 501.65386962890625, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 351.0, "completion_length/incorrect/min": 217.0, "completion_length/incorrect/p25": 261.5, "completion_length/incorrect/p75": 664.75, "completion_length/incorrect/var": 68815.59375, "completion_length/max": 1024.0, "completion_length/median": 410.0, "completion_length/min": 217.0, "completion_length/p25": 336.0, "completion_length/p75": 672.0, "completion_length/var": 50569.84375, "curvature_clip_ratio_token_fisher": 0.010610939003527164, "curvature_clip_ratio_token_hessian": 0.006439462769776583, "curvature_clip_ratio_total_fisher": 0.010610939003527164, "curvature_clip_ratio_total_full": 0.010610939003527164, "curvature_clip_ratio_total_hessian": 0.006439462769776583, "epoch": 0.0816, "feature_vector_variance/max_squared_error": 63135.3046875, "feature_vector_variance/metric": 30999.82421875, "generated_tokens/total": 2936431.0, "global_fisher_curvature": 100864.0, "global_fisher_curvature/max": 100864.0, "global_fisher_curvature/median": 100864.0, "global_fisher_curvature/min": 100864.0, "global_fisher_curvature/p25": 100864.0, "global_fisher_curvature/p75": 100864.0, "global_fisher_curvature/p85": 100864.0, "global_fisher_curvature/p90": 100864.0, "global_fisher_curvature/p95": 100864.0, "global_fisher_curvature/p99": 100864.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.904104232788086e-06, "global_fisher_kl_divergence/max": 3.904104232788086e-06, "global_fisher_kl_divergence/median": 3.904104232788086e-06, "global_fisher_kl_divergence/min": 3.904104232788086e-06, "global_fisher_kl_divergence/p25": 3.904104232788086e-06, "global_fisher_kl_divergence/p75": 3.904104232788086e-06, "global_fisher_kl_divergence/p85": 3.904104232788086e-06, "global_fisher_kl_divergence/p90": 3.904104232788086e-06, "global_fisher_kl_divergence/p95": 3.904104232788086e-06, "global_fisher_kl_divergence/p99": 3.904104232788086e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.040771484375, "global_full_update_term/max": 0.040771484375, "global_full_update_term/median": 0.040771484375, "global_full_update_term/min": 0.040771484375, "global_full_update_term/p25": 0.040771484375, "global_full_update_term/p75": 0.040771484375, "global_full_update_term/p85": 0.040771484375, "global_full_update_term/p90": 0.040771484375, "global_full_update_term/p95": 0.040771484375, "global_full_update_term/p99": 0.040771484375, "global_full_update_term/var": NaN, "global_hessian_coeff": 25216.0, "global_hessian_coeff/max": 25216.0, "global_hessian_coeff/median": 25216.0, "global_hessian_coeff/min": 25216.0, "global_hessian_coeff/p25": 25216.0, "global_hessian_coeff/p75": 25216.0, "global_hessian_coeff/p99": 25216.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 25216.0, "global_hessian_coeff_abs/max": 25216.0, "global_hessian_coeff_abs/median": 25216.0, "global_hessian_coeff_abs/min": 25216.0, "global_hessian_coeff_abs/p25": 25216.0, "global_hessian_coeff_abs/p75": 25216.0, "global_hessian_coeff_abs/p99": 25216.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05070110410451889, "learning_rate": 8.543798257200491e-06, "loss": -0.7292, "masked_global_fisher_curvature": 1432.0, "masked_global_fisher_curvature/max": 1432.0, "masked_global_fisher_curvature/median": 1432.0, "masked_global_fisher_curvature/min": 1432.0, "masked_global_fisher_curvature/p25": 1432.0, "masked_global_fisher_curvature/p75": 1432.0, "masked_global_fisher_curvature/p85": 1432.0, "masked_global_fisher_curvature/p90": 1432.0, "masked_global_fisher_curvature/p95": 1432.0, "masked_global_fisher_curvature/p99": 1432.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/max": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/median": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/min": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/p25": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/p75": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/p85": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/p90": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/p95": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/p99": 5.541369318962097e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0031890869140625, "masked_global_full_update_term/max": 0.0031890869140625, "masked_global_full_update_term/median": 0.0031890869140625, "masked_global_full_update_term/min": 0.0031890869140625, "masked_global_full_update_term/p25": 0.0031890869140625, "masked_global_full_update_term/p75": 0.0031890869140625, "masked_global_full_update_term/p85": 0.0031890869140625, "masked_global_full_update_term/p90": 0.0031890869140625, "masked_global_full_update_term/p95": 0.0031890869140625, "masked_global_full_update_term/p99": 0.0031890869140625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -5184.0, "masked_global_hessian_coeff/max": -5184.0, "masked_global_hessian_coeff/median": -5184.0, "masked_global_hessian_coeff/min": -5184.0, "masked_global_hessian_coeff/p25": -5184.0, "masked_global_hessian_coeff/p75": -5184.0, "masked_global_hessian_coeff/p99": -5184.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 5184.0, "masked_global_hessian_coeff_abs/max": 5184.0, "masked_global_hessian_coeff_abs/median": 5184.0, "masked_global_hessian_coeff_abs/min": 5184.0, "masked_global_hessian_coeff_abs/p25": 5184.0, "masked_global_hessian_coeff_abs/p75": 5184.0, "masked_global_hessian_coeff_abs/p99": 5184.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.0442748069763184, "masked_per_sentence_gradient_norm/max": 13.25, "masked_per_sentence_gradient_norm/median": 1.21875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.6640625, "masked_per_sentence_gradient_norm/var": 6.482700824737549, "masked_per_token_gradient_norm": 0.042292676866054535, "masked_per_token_gradient_norm/max": 12.1875, "masked_per_token_gradient_norm/median": 4.524736141320318e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 7.543712854385376e-08, "masked_per_token_gradient_norm/var": 0.2619476914405823, "masked_sentence_fisher_curvature": 506.1954345703125, "masked_sentence_fisher_curvature/max": 1320.0, "masked_sentence_fisher_curvature/median": 446.0, "masked_sentence_fisher_curvature/min": 0.00165557861328125, "masked_sentence_fisher_curvature/p25": 203.25, "masked_sentence_fisher_curvature/p75": 807.0, "masked_sentence_fisher_curvature/p85": 908.0, "masked_sentence_fisher_curvature/p90": 1056.0, "masked_sentence_fisher_curvature/p95": 1136.0, "masked_sentence_fisher_curvature/p99": 1320.0, "masked_sentence_fisher_curvature/var": 136577.9375, "masked_sentence_fisher_kl_divergence": 1.960882833884625e-08, "masked_sentence_fisher_kl_divergence/max": 5.122274160385132e-08, "masked_sentence_fisher_kl_divergence/median": 1.7229467630386353e-08, "masked_sentence_fisher_kl_divergence/min": 6.394884621840902e-14, "masked_sentence_fisher_kl_divergence/p25": 7.887138053774834e-09, "masked_sentence_fisher_kl_divergence/p75": 3.1257513910532e-08, "masked_sentence_fisher_kl_divergence/p85": 3.5157427191734314e-08, "masked_sentence_fisher_kl_divergence/p90": 4.0978193283081055e-08, "masked_sentence_fisher_kl_divergence/p95": 4.400499165058136e-08, "masked_sentence_fisher_kl_divergence/p99": 5.122274160385132e-08, "masked_sentence_fisher_kl_divergence/var": 2.0511211190695242e-16, "masked_sentence_full_gradient_variance/max_squared_error": 10.051261901855469, "masked_sentence_full_gradient_variance/metric": 10.051261901855469, "masked_sentence_full_gradient_variance/p75": 10.051261901855469, "masked_sentence_full_gradient_variance/p90": 10.051261901855469, "masked_sentence_full_gradient_variance/p95": 10.051261901855469, "masked_sentence_full_gradient_variance/p99": 10.051261901855469, "masked_sentence_full_update_term": 0.0010876331944018602, "masked_sentence_full_update_term/max": 0.00811767578125, "masked_sentence_full_update_term/median": 0.000705718994140625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0016689300537109375, "masked_sentence_full_update_term/p85": 0.002277374267578125, "masked_sentence_full_update_term/p90": 0.0025787353515625, "masked_sentence_full_update_term/p95": 0.003692626953125, "masked_sentence_full_update_term/p99": 0.004174817353487015, "masked_sentence_full_update_term/var": 1.7654243720244267e-06, "masked_sentence_hessian_coeff": -14271.7724609375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -11584.0, "masked_sentence_hessian_coeff/min": -42240.0, "masked_sentence_hessian_coeff/p25": -24224.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 184170448.0, "masked_sentence_hessian_coeff_abs": 14271.7724609375, "masked_sentence_hessian_coeff_abs/max": 42240.0, "masked_sentence_hessian_coeff_abs/median": 11584.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 24224.0, "masked_sentence_hessian_coeff_abs/p99": 42240.0, "masked_sentence_hessian_coeff_abs/var": 184170448.0, "masked_token_fisher_curvature": 579.135986328125, "masked_token_fisher_curvature/max": 251904.0, "masked_token_fisher_curvature/median": 1.5449880957918438e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 6.333101564859118e-24, "masked_token_fisher_curvature/p75": 1.8829382497642655e-13, "masked_token_fisher_curvature/p85": 8.776623872108757e-11, "masked_token_fisher_curvature/p90": 8.207280188798904e-09, "masked_token_fisher_curvature/p95": 6.389617919921875e-05, "masked_token_fisher_curvature/p99": 2048.0, "masked_token_fisher_curvature/var": 73029776.0, "masked_token_fisher_kl_divergence": 2.2435235891293814e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 5.99534287967969e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.4525514135556207e-34, "masked_token_fisher_kl_divergence/p75": 7.289529148123556e-24, "masked_token_fisher_kl_divergence/p85": 3.3881317890172014e-21, "masked_token_fisher_kl_divergence/p90": 3.1848438816761693e-19, "masked_token_fisher_kl_divergence/p95": 2.4702462297909733e-15, "masked_token_fisher_kl_divergence/p99": 7.916241884231567e-08, "masked_token_fisher_kl_divergence/var": 1.0961748137883545e-13, "masked_token_full_update_term": 1.5017080841062125e-05, "masked_token_full_update_term/max": 0.004241943359375, "masked_token_full_update_term/median": 4.277516383634217e-20, "masked_token_full_update_term/min": -1.5795230865478516e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.863576125695545e-14, "masked_token_full_update_term/p85": 3.495870259939693e-12, "masked_token_full_update_term/p90": 5.411493475548923e-11, "masked_token_full_update_term/p95": 7.566995918750763e-09, "masked_token_full_update_term/p99": 0.00023621320724487305, "masked_token_full_update_term/var": 3.44927215678581e-08, "masked_token_hessian_coeff": -19283.056640625, "masked_token_hessian_coeff/max": 132.0, "masked_token_hessian_coeff/median": -7.09405867382884e-11, "masked_token_hessian_coeff/min": -5505024.0, "masked_token_hessian_coeff/p25": -5.677342414855957e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.004127979278564453, "masked_token_hessian_coeff/var": 58181783552.0, "masked_token_hessian_coeff_abs": 19283.076171875, "masked_token_hessian_coeff_abs/max": 5505024.0, "masked_token_hessian_coeff_abs/median": 5.3551048040390015e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.0265579223632812e-05, "masked_token_hessian_coeff_abs/p99": 208144.0, "masked_token_hessian_coeff_abs/var": 58181783552.0, "mean_logprobs": -0.00836181640625, "mean_logprobs/var": 3.814697265625e-05, "num_completions/total": 4896, "per_sentence_gradient_norm": 36.69498825073242, "per_sentence_gradient_norm/max": 292.0, "per_sentence_gradient_norm/median": 25.625, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 42.5, "per_sentence_gradient_norm/var": 2566.314697265625, "per_token_feature_norm": 190.37327575683594, "per_token_feature_norm/max": 266.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 99.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 136.3446502685547, "per_token_gradient_norm": 0.8262104988098145, "per_token_gradient_norm/max": 288.0, "per_token_gradient_norm/median": 5.4569682106375694e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 9.778887033462524e-08, "per_token_gradient_norm/var": 113.2091064453125, "per_token_policy_error_norm": 0.004685039632022381, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004875744692981243, "policy_entropy": 0.007972720079123974, "policy_entropy/max": 1.609375, "policy_entropy/median": 6.220943760126829e-10, "policy_entropy/min": 1.768181277393352e-20, "policy_entropy/p25": 1.9610979506978765e-12, "policy_entropy/p75": 1.0151416063308716e-07, "policy_entropy/var": 0.00392196187749505, "policy_loss": -0.7291666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.19956141710281372, "policy_sharpness": 9.757863998413086, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.568516731262207, "reward": 0.7291666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.19956141710281372, "rewards/accuracy_reward": 0.7291666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.19956141710281372, "sentence_fisher_curvature": 196214.46875, "sentence_fisher_curvature/max": 1056768.0, "sentence_fisher_curvature/median": 154624.0, "sentence_fisher_curvature/min": 60.75, "sentence_fisher_curvature/p25": 4544.0, "sentence_fisher_curvature/p75": 313856.0, "sentence_fisher_curvature/p85": 412160.0, "sentence_fisher_curvature/p90": 493568.0, "sentence_fisher_curvature/p95": 591872.0, "sentence_fisher_curvature/p99": 772711.3125, "sentence_fisher_curvature/var": 46851395584.0, "sentence_fisher_kl_divergence": 7.5960842877975665e-06, "sentence_fisher_kl_divergence/max": 4.100799560546875e-05, "sentence_fisher_kl_divergence/median": 5.990266799926758e-06, "sentence_fisher_kl_divergence/min": 2.35741026699543e-09, "sentence_fisher_kl_divergence/p25": 1.7601996660232544e-07, "sentence_fisher_kl_divergence/p75": 1.214444637298584e-05, "sentence_fisher_kl_divergence/p85": 1.5914440155029297e-05, "sentence_fisher_kl_divergence/p90": 1.9073486328125e-05, "sentence_fisher_kl_divergence/p95": 2.288818359375e-05, "sentence_fisher_kl_divergence/p99": 2.9909646400483325e-05, "sentence_fisher_kl_divergence/var": 7.026280252064865e-11, "sentence_full_gradient_variance/max_squared_error": 3825.854248046875, "sentence_full_gradient_variance/metric": 3825.854248046875, "sentence_full_gradient_variance/p75": 3825.854248046875, "sentence_full_gradient_variance/p90": 3825.854248046875, "sentence_full_gradient_variance/p95": 3825.854248046875, "sentence_full_gradient_variance/p99": 3825.854248046875, "sentence_full_update_term": 0.021464746445417404, "sentence_full_update_term/max": 0.1611328125, "sentence_full_update_term/median": 0.015869140625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0311279296875, "sentence_full_update_term/p85": 0.04583740234375, "sentence_full_update_term/p90": 0.053955078125, "sentence_full_update_term/p95": 0.061767578125, "sentence_full_update_term/p99": 0.10222186893224716, "sentence_full_update_term/var": 0.0006709650042466819, "sentence_hessian_coeff": 28444.208984375, "sentence_hessian_coeff/max": 851968.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -241664.0, "sentence_hessian_coeff/p25": -64768.0, "sentence_hessian_coeff/p75": 57664.0, "sentence_hessian_coeff/p99": 511489.09375, "sentence_hessian_coeff/var": 29023653888.0, "sentence_hessian_coeff_abs": 107336.4609375, "sentence_hessian_coeff_abs/max": 851968.0, "sentence_hessian_coeff_abs/median": 64768.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 174080.0, "sentence_hessian_coeff_abs/p99": 511489.09375, "sentence_hessian_coeff_abs/var": 18198851584.0, "step": 51, "token_fisher_curvature": 216361.40625, "token_fisher_curvature/max": 138412032.0, "token_fisher_curvature/median": 1.938011383317839e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 7.237830359838992e-24, "token_fisher_curvature/p75": 3.126388037344441e-13, "token_fisher_curvature/p85": 1.646185410209e-10, "token_fisher_curvature/p90": 2.6891939342021942e-08, "token_fisher_curvature/p95": 0.0012031197547912598, "token_fisher_curvature/p99": 323584.0, "token_fisher_curvature/var": 16234858938368.0, "token_fisher_kl_divergence": 8.38075538922567e-06, "token_fisher_kl_divergence/max": 0.00537109375, "token_fisher_kl_divergence/median": 7.494178599599612e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.798616950437702e-34, "token_fisher_kl_divergence/p75": 1.209751645858803e-23, "token_fisher_kl_divergence/p85": 6.3792168840089494e-21, "token_fisher_kl_divergence/p90": 1.043544591017298e-18, "token_fisher_kl_divergence/p95": 4.653916141350578e-14, "token_fisher_kl_divergence/p99": 1.2516975402832031e-05, "token_fisher_kl_divergence/var": 2.4364087636286058e-08, "token_full_update_term": 0.00031707031303085387, "token_full_update_term/max": 0.10888671875, "token_full_update_term/median": 6.945670167485263e-20, "token_full_update_term/min": -1.5795230865478516e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.6288307348495437e-14, "token_full_update_term/p85": 5.7127635955112055e-12, "token_full_update_term/p90": 9.595169103704393e-11, "token_full_update_term/p95": 3.4458935260772705e-08, "token_full_update_term/p99": 0.004608154296875, "token_full_update_term/var": 1.733175122353714e-05, "token_hessian_coeff": 16482.427734375, "token_hessian_coeff/max": 137363456.0, "token_hessian_coeff/median": -8.503775461576879e-11, "token_hessian_coeff/min": -17956864.0, "token_hessian_coeff/p25": -7.12275505065918e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.006644248962402344, "token_hessian_coeff/var": 11853574963200.0, "token_hessian_coeff_abs": 229152.5, "token_hessian_coeff_abs/max": 137363456.0, "token_hessian_coeff_abs/median": 6.315531209111214e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.6285648345947266e-05, "token_hessian_coeff_abs/p99": 5079040.0, "token_hessian_coeff_abs/var": 11801337004032.0 }, { "accuracy_reward": 0.7395833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.19462718069553375, "adam_stats/lm_head/lr_effective_max": 3.768935494008474e-05, "adam_stats/lm_head/lr_effective_mean": 1.7079162736854947e-11, "adam_stats/lm_head/lr_effective_min": -3.8612477510469034e-05, "adam_stats/lm_head/lr_effective_std": 8.989329671749147e-07, "adam_stats/lr_effective_max": 3.863375241053291e-05, "adam_stats/lr_effective_mean": 1.459014853377738e-10, "adam_stats/lr_effective_min": -3.8612477510469034e-05, "adam_stats/m_t_max": 0.00044763716869056225, "adam_stats/m_t_mean": -3.1110220736346728e-12, "adam_stats/m_t_min": -0.0005665189819410443, "adam_stats/v_t_max": 2.5508181352051906e-05, "adam_stats/v_t_mean": 1.7308268533688942e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7395833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.19462718069553375, "all_logprobs": -0.008156164549291134, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.75, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -7.534027099609375e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.013140521012246609, "clip_ratio": 0.0, "completion_length": 525.5208740234375, "completion_length/correct": 500.7182922363281, "completion_length/correct/max": 961.0, "completion_length/correct/median": 475.0, "completion_length/correct/min": 228.0, "completion_length/correct/p25": 366.5, "completion_length/correct/p75": 612.0, "completion_length/correct/var": 40515.578125, "completion_length/incorrect": 595.9599609375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 371.0, "completion_length/incorrect/min": 324.0, "completion_length/incorrect/p25": 339.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 107657.9609375, "completion_length/max": 1024.0, "completion_length/median": 420.0, "completion_length/min": 228.0, "completion_length/p25": 362.5, "completion_length/p75": 614.25, "completion_length/var": 58816.84375, "curvature_clip_ratio_token_fisher": 0.011080277152359486, "curvature_clip_ratio_token_hessian": 0.0075916750356554985, "curvature_clip_ratio_total_fisher": 0.011080277152359486, "curvature_clip_ratio_total_full": 0.011080277152359486, "curvature_clip_ratio_total_hessian": 0.0075916750356554985, "epoch": 0.0832, "feature_vector_variance/max_squared_error": 64110.40234375, "feature_vector_variance/metric": 31322.431640625, "generated_tokens/total": 2986881.0, "global_fisher_curvature": 114176.0, "global_fisher_curvature/max": 114176.0, "global_fisher_curvature/median": 114176.0, "global_fisher_curvature/min": 114176.0, "global_fisher_curvature/p25": 114176.0, "global_fisher_curvature/p75": 114176.0, "global_fisher_curvature/p85": 114176.0, "global_fisher_curvature/p90": 114176.0, "global_fisher_curvature/p95": 114176.0, "global_fisher_curvature/p99": 114176.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.172325134277344e-06, "global_fisher_kl_divergence/max": 4.172325134277344e-06, "global_fisher_kl_divergence/median": 4.172325134277344e-06, "global_fisher_kl_divergence/min": 4.172325134277344e-06, "global_fisher_kl_divergence/p25": 4.172325134277344e-06, "global_fisher_kl_divergence/p75": 4.172325134277344e-06, "global_fisher_kl_divergence/p85": 4.172325134277344e-06, "global_fisher_kl_divergence/p90": 4.172325134277344e-06, "global_fisher_kl_divergence/p95": 4.172325134277344e-06, "global_fisher_kl_divergence/p99": 4.172325134277344e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0498046875, "global_full_update_term/max": 0.0498046875, "global_full_update_term/median": 0.0498046875, "global_full_update_term/min": 0.0498046875, "global_full_update_term/p25": 0.0498046875, "global_full_update_term/p75": 0.0498046875, "global_full_update_term/p85": 0.0498046875, "global_full_update_term/p90": 0.0498046875, "global_full_update_term/p95": 0.0498046875, "global_full_update_term/p99": 0.0498046875, "global_full_update_term/var": NaN, "global_hessian_coeff": 32512.0, "global_hessian_coeff/max": 32512.0, "global_hessian_coeff/median": 32512.0, "global_hessian_coeff/min": 32512.0, "global_hessian_coeff/p25": 32512.0, "global_hessian_coeff/p75": 32512.0, "global_hessian_coeff/p99": 32512.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 32512.0, "global_hessian_coeff_abs/max": 32512.0, "global_hessian_coeff_abs/median": 32512.0, "global_hessian_coeff_abs/min": 32512.0, "global_hessian_coeff_abs/p25": 32512.0, "global_hessian_coeff_abs/p75": 32512.0, "global_hessian_coeff_abs/p99": 32512.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.03537997975945473, "learning_rate": 8.283963474507402e-06, "loss": -0.7396, "masked_global_fisher_curvature": 199.0, "masked_global_fisher_curvature/max": 199.0, "masked_global_fisher_curvature/median": 199.0, "masked_global_fisher_curvature/min": 199.0, "masked_global_fisher_curvature/p25": 199.0, "masked_global_fisher_curvature/p75": 199.0, "masked_global_fisher_curvature/p85": 199.0, "masked_global_fisher_curvature/p90": 199.0, "masked_global_fisher_curvature/p95": 199.0, "masked_global_fisher_curvature/p99": 199.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/max": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/median": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/min": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/p25": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/p75": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/p85": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/p90": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/p95": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/p99": 7.275957614183426e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.004364013671875, "masked_global_full_update_term/max": 0.004364013671875, "masked_global_full_update_term/median": 0.004364013671875, "masked_global_full_update_term/min": 0.004364013671875, "masked_global_full_update_term/p25": 0.004364013671875, "masked_global_full_update_term/p75": 0.004364013671875, "masked_global_full_update_term/p85": 0.004364013671875, "masked_global_full_update_term/p90": 0.004364013671875, "masked_global_full_update_term/p95": 0.004364013671875, "masked_global_full_update_term/p99": 0.004364013671875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -5152.0, "masked_global_hessian_coeff/max": -5152.0, "masked_global_hessian_coeff/median": -5152.0, "masked_global_hessian_coeff/min": -5152.0, "masked_global_hessian_coeff/p25": -5152.0, "masked_global_hessian_coeff/p75": -5152.0, "masked_global_hessian_coeff/p99": -5152.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 5152.0, "masked_global_hessian_coeff_abs/max": 5152.0, "masked_global_hessian_coeff_abs/median": 5152.0, "masked_global_hessian_coeff_abs/min": 5152.0, "masked_global_hessian_coeff_abs/p25": 5152.0, "masked_global_hessian_coeff_abs/p75": 5152.0, "masked_global_hessian_coeff_abs/p99": 5152.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.227386474609375, "masked_per_sentence_gradient_norm/max": 11.6875, "masked_per_sentence_gradient_norm/median": 0.97265625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.07421875, "masked_per_sentence_gradient_norm/var": 8.232614517211914, "masked_per_token_gradient_norm": 0.04365759715437889, "masked_per_token_gradient_norm/max": 12.8125, "masked_per_token_gradient_norm/median": 3.751665644813329e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.2293457984924316e-07, "masked_per_token_gradient_norm/var": 0.2703339755535126, "masked_sentence_fisher_curvature": 455.81573486328125, "masked_sentence_fisher_curvature/max": 1640.0, "masked_sentence_fisher_curvature/median": 298.0, "masked_sentence_fisher_curvature/min": 0.87109375, "masked_sentence_fisher_curvature/p25": 151.5, "masked_sentence_fisher_curvature/p75": 675.0, "masked_sentence_fisher_curvature/p85": 1023.0, "masked_sentence_fisher_curvature/p90": 1152.0, "masked_sentence_fisher_curvature/p95": 1264.0, "masked_sentence_fisher_curvature/p99": 1434.8006591796875, "masked_sentence_fisher_curvature/var": 175742.078125, "masked_sentence_fisher_kl_divergence": 1.6638967537119242e-08, "masked_sentence_fisher_kl_divergence/max": 6.007030606269836e-08, "masked_sentence_fisher_kl_divergence/median": 1.0884832590818405e-08, "masked_sentence_fisher_kl_divergence/min": 3.183231456205249e-11, "masked_sentence_fisher_kl_divergence/p25": 5.529727786779404e-09, "masked_sentence_fisher_kl_divergence/p75": 2.4650944396853447e-08, "masked_sentence_fisher_kl_divergence/p85": 3.731111064553261e-08, "masked_sentence_fisher_kl_divergence/p90": 4.21423465013504e-08, "masked_sentence_fisher_kl_divergence/p95": 4.6100467443466187e-08, "masked_sentence_fisher_kl_divergence/p99": 5.232871203020295e-08, "masked_sentence_fisher_kl_divergence/var": 2.343128448446054e-16, "masked_sentence_full_gradient_variance/max_squared_error": 12.557327270507812, "masked_sentence_full_gradient_variance/metric": 12.557327270507812, "masked_sentence_full_gradient_variance/p75": 12.557327270507812, "masked_sentence_full_gradient_variance/p90": 12.557327270507812, "masked_sentence_full_gradient_variance/p95": 12.557327270507812, "masked_sentence_full_gradient_variance/p99": 12.557327270507812, "masked_sentence_full_update_term": 0.0012329667806625366, "masked_sentence_full_update_term/max": 0.005889892578125, "masked_sentence_full_update_term/median": 0.00043487548828125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0018291473388671875, "masked_sentence_full_update_term/p85": 0.00287628173828125, "masked_sentence_full_update_term/p90": 0.00328826904296875, "masked_sentence_full_update_term/p95": 0.0048675537109375, "masked_sentence_full_update_term/p99": 0.005889892578125, "masked_sentence_full_update_term/var": 2.4825062610034365e-06, "masked_sentence_hessian_coeff": -14514.2919921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -11264.0, "masked_sentence_hessian_coeff/min": -55040.0, "masked_sentence_hessian_coeff/p25": -23968.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 200362496.0, "masked_sentence_hessian_coeff_abs": 14514.2919921875, "masked_sentence_hessian_coeff_abs/max": 55040.0, "masked_sentence_hessian_coeff_abs/median": 9856.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 23968.0, "masked_sentence_hessian_coeff_abs/p99": 46771.2265625, "masked_sentence_hessian_coeff_abs/var": 200362496.0, "masked_token_fisher_curvature": 579.1631469726562, "masked_token_fisher_curvature/max": 270336.0, "masked_token_fisher_curvature/median": 2.4530074152484538e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 6.979336418416171e-24, "masked_token_fisher_curvature/p75": 3.6415315207705135e-13, "masked_token_fisher_curvature/p85": 1.4097167877480388e-10, "masked_token_fisher_curvature/p90": 1.4901161193847656e-08, "masked_token_fisher_curvature/p95": 0.00018215179443359375, "masked_token_fisher_curvature/p99": 2192.0, "masked_token_fisher_curvature/var": 77391368.0, "masked_token_fisher_kl_divergence": 2.114703256950179e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 8.953571274258484e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.5428293796987723e-34, "masked_token_fisher_kl_divergence/p75": 1.333828737741757e-23, "masked_token_fisher_kl_divergence/p85": 5.135137242729196e-21, "masked_token_fisher_kl_divergence/p90": 5.454892180317694e-19, "masked_token_fisher_kl_divergence/p95": 6.647460359943125e-15, "masked_token_fisher_kl_divergence/p99": 8.009374141693115e-08, "masked_token_fisher_kl_divergence/var": 1.0314968687140033e-13, "masked_token_full_update_term": 1.5055494259286206e-05, "masked_token_full_update_term/max": 0.0042724609375, "masked_token_full_update_term/median": 3.218725199566341e-20, "masked_token_full_update_term/min": -1.6316771507263184e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.684341886080802e-14, "masked_token_full_update_term/p85": 5.343281372915953e-12, "masked_token_full_update_term/p90": 8.185452315956354e-11, "masked_token_full_update_term/p95": 1.0011717677116394e-08, "masked_token_full_update_term/p99": 0.000286296010017395, "masked_token_full_update_term/var": 3.3873053695288036e-08, "masked_token_hessian_coeff": -19902.76171875, "masked_token_hessian_coeff/max": 1096.0, "masked_token_hessian_coeff/median": -4.979483492206782e-11, "masked_token_hessian_coeff/min": -5537792.0, "masked_token_hessian_coeff/p25": -6.586313247680664e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.00271761417388916, "masked_token_hessian_coeff/var": 60679901184.0, "masked_token_hessian_coeff_abs": 19902.83984375, "masked_token_hessian_coeff_abs/max": 5537792.0, "masked_token_hessian_coeff_abs/median": 5.296897143125534e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.956390380859375e-05, "masked_token_hessian_coeff_abs/p99": 328096.0, "masked_token_hessian_coeff_abs/var": 60679901184.0, "mean_logprobs": -0.00830078125, "mean_logprobs/var": 3.1948089599609375e-05, "num_completions/total": 4992, "per_sentence_gradient_norm": 38.68912887573242, "per_sentence_gradient_norm/max": 196.0, "per_sentence_gradient_norm/median": 33.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 51.0, "per_sentence_gradient_norm/var": 1869.0279541015625, "per_token_feature_norm": 191.4546661376953, "per_token_feature_norm/max": 270.0, "per_token_feature_norm/median": 191.0, "per_token_feature_norm/min": 98.0, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 198.0, "per_token_feature_norm/var": 156.6483154296875, "per_token_gradient_norm": 0.9769077897071838, "per_token_gradient_norm/max": 290.0, "per_token_gradient_norm/median": 4.5702108764089644e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.5087425708770752e-07, "per_token_gradient_norm/var": 137.32713317871094, "per_token_policy_error_norm": 0.004752384964376688, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004666945431381464, "policy_entropy": 0.008416925556957722, "policy_entropy/max": 1.140625, "policy_entropy/median": 7.385096978396177e-10, "policy_entropy/min": 1.0270274485458392e-20, "policy_entropy/p25": 1.5063505998114124e-12, "policy_entropy/p75": 1.30385160446167e-07, "policy_entropy/var": 0.0042308601550757885, "policy_loss": -0.7395833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.19462718069553375, "policy_sharpness": 9.747392654418945, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6101258993148804, "reward": 0.7395833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.19462718069553375, "rewards/accuracy_reward": 0.7395833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.19462718069553375, "sentence_fisher_curvature": 274850.0, "sentence_fisher_curvature/max": 1056768.0, "sentence_fisher_curvature/median": 245760.0, "sentence_fisher_curvature/min": 2.578125, "sentence_fisher_curvature/p25": 6064.0, "sentence_fisher_curvature/p75": 406016.0, "sentence_fisher_curvature/p85": 563200.0, "sentence_fisher_curvature/p90": 636928.0, "sentence_fisher_curvature/p95": 776192.0, "sentence_fisher_curvature/p99": 951705.9375, "sentence_fisher_curvature/var": 65270509568.0, "sentence_fisher_kl_divergence": 1.003406123345485e-05, "sentence_fisher_kl_divergence/max": 3.8623809814453125e-05, "sentence_fisher_kl_divergence/median": 8.940696716308594e-06, "sentence_fisher_kl_divergence/min": 9.413270163349807e-11, "sentence_fisher_kl_divergence/p25": 2.2118911147117615e-07, "sentence_fisher_kl_divergence/p75": 1.4826655387878418e-05, "sentence_fisher_kl_divergence/p85": 2.0563602447509766e-05, "sentence_fisher_kl_divergence/p90": 2.3245811462402344e-05, "sentence_fisher_kl_divergence/p95": 2.8342008590698242e-05, "sentence_fisher_kl_divergence/p99": 3.4773362131090835e-05, "sentence_fisher_kl_divergence/var": 8.699845011062024e-11, "sentence_full_gradient_variance/max_squared_error": 3302.40771484375, "sentence_full_gradient_variance/metric": 3302.40771484375, "sentence_full_gradient_variance/p75": 3302.40771484375, "sentence_full_gradient_variance/p90": 3302.40771484375, "sentence_full_gradient_variance/p95": 3302.40771484375, "sentence_full_gradient_variance/p99": 3302.40771484375, "sentence_full_update_term": 0.024611156433820724, "sentence_full_update_term/max": 0.09765625, "sentence_full_update_term/median": 0.022705078125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.03436279296875, "sentence_full_update_term/p85": 0.04254150390625, "sentence_full_update_term/p90": 0.0599365234375, "sentence_full_update_term/p95": 0.0751953125, "sentence_full_update_term/p99": 0.09626465290784836, "sentence_full_update_term/var": 0.0005931148189119995, "sentence_hessian_coeff": 37429.5, "sentence_hessian_coeff/max": 622592.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -215040.0, "sentence_hessian_coeff/p25": -59648.0, "sentence_hessian_coeff/p75": 86656.0, "sentence_hessian_coeff/p99": 500019.59375, "sentence_hessian_coeff/var": 26680315904.0, "sentence_hessian_coeff_abs": 107576.171875, "sentence_hessian_coeff_abs/max": 622592.0, "sentence_hessian_coeff_abs/median": 70144.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 143616.0, "sentence_hessian_coeff_abs/p99": 500019.59375, "sentence_hessian_coeff_abs/var": 16401582080.0, "step": 52, "token_fisher_curvature": 257804.375, "token_fisher_curvature/max": 147849216.0, "token_fisher_curvature/median": 3.2526065174565133e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 8.090860366534302e-24, "token_fisher_curvature/p75": 5.506706202140776e-13, "token_fisher_curvature/p85": 3.601599019020796e-10, "token_fisher_curvature/p90": 6.612390279769897e-08, "token_fisher_curvature/p95": 0.00424504280090332, "token_fisher_curvature/p99": 485376.0, "token_fisher_curvature/var": 18205380706304.0, "token_fisher_kl_divergence": 9.414623491466045e-06, "token_fisher_kl_divergence/max": 0.005401611328125, "token_fisher_kl_divergence/median": 1.1832913578315177e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.9566033911882175e-34, "token_fisher_kl_divergence/p75": 2.005912985441092e-23, "token_fisher_kl_divergence/p85": 1.3129010682441655e-20, "token_fisher_kl_divergence/p90": 2.4123498337802474e-18, "token_fisher_kl_divergence/p95": 1.5512244266879804e-13, "token_fisher_kl_divergence/p99": 1.7762184143066406e-05, "token_fisher_kl_divergence/var": 2.429042389451297e-08, "token_full_update_term": 0.0003633619053289294, "token_full_update_term/max": 0.109375, "token_full_update_term/median": 5.3151317440207346e-20, "token_full_update_term/min": -1.6316771507263184e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 8.393286066166183e-14, "token_full_update_term/p85": 8.071765478234738e-12, "token_full_update_term/p90": 1.6652634826641588e-10, "token_full_update_term/p95": 5.9138983488082886e-08, "token_full_update_term/p99": 0.00567626953125, "token_full_update_term/var": 1.9344912288943306e-05, "token_hessian_coeff": 18670.294921875, "token_hessian_coeff/max": 146800640.0, "token_hessian_coeff/median": -6.45741238258779e-11, "token_hessian_coeff/min": -18219008.0, "token_hessian_coeff/p25": -8.225440979003906e-06, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.005219221115112305, "token_hessian_coeff/var": 12380364865536.0, "token_hessian_coeff_abs": 239794.9375, "token_hessian_coeff_abs/max": 146800640.0, "token_hessian_coeff_abs/median": 6.315531209111214e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.8623809814453125e-05, "token_hessian_coeff_abs/p99": 5832704.0, "token_hessian_coeff_abs/var": 12323211182080.0 }, { "accuracy_reward": 0.7708333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.17850877344608307, "adam_stats/lm_head/lr_effective_max": 3.9160760934464633e-05, "adam_stats/lm_head/lr_effective_mean": 2.8173475705162154e-11, "adam_stats/lm_head/lr_effective_min": -3.6444151191972196e-05, "adam_stats/lm_head/lr_effective_std": 9.242476153303869e-07, "adam_stats/lr_effective_max": 4.1753515688469633e-05, "adam_stats/lr_effective_mean": 1.6156616322593464e-10, "adam_stats/lr_effective_min": -3.987305535702035e-05, "adam_stats/m_t_max": 0.0004906114772893488, "adam_stats/m_t_mean": -3.0918666064916334e-12, "adam_stats/m_t_min": -0.0005538094555959105, "adam_stats/v_t_max": 2.5483443096163683e-05, "adam_stats/v_t_mean": 1.7294752869406738e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7708333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.17850877344608307, "all_logprobs": -0.010148176923394203, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.9375, "all_logprobs/p1": -0.251953125, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0001506805419921875, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.015056585893034935, "clip_ratio": 0.0, "completion_length": 527.0, "completion_length/correct": 438.93243408203125, "completion_length/correct/max": 1023.0, "completion_length/correct/median": 397.0, "completion_length/correct/min": 250.0, "completion_length/correct/p25": 336.0, "completion_length/correct/p75": 440.0, "completion_length/correct/var": 28864.71875, "completion_length/incorrect": 823.227294921875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 876.0, "completion_length/incorrect/min": 372.0, "completion_length/incorrect/p25": 633.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 62701.98828125, "completion_length/max": 1024.0, "completion_length/median": 416.0, "completion_length/min": 250.0, "completion_length/p25": 356.0, "completion_length/p75": 592.5, "completion_length/var": 62403.328125, "curvature_clip_ratio_token_fisher": 0.013480392284691334, "curvature_clip_ratio_token_hessian": 0.009191176854074001, "curvature_clip_ratio_total_fisher": 0.013480392284691334, "curvature_clip_ratio_total_full": 0.013480392284691334, "curvature_clip_ratio_total_hessian": 0.009191176854074001, "epoch": 0.0848, "feature_vector_variance/max_squared_error": 62031.01171875, "feature_vector_variance/metric": 30965.384765625, "generated_tokens/total": 3037473.0, "global_fisher_curvature": 149504.0, "global_fisher_curvature/max": 149504.0, "global_fisher_curvature/median": 149504.0, "global_fisher_curvature/min": 149504.0, "global_fisher_curvature/p25": 149504.0, "global_fisher_curvature/p75": 149504.0, "global_fisher_curvature/p85": 149504.0, "global_fisher_curvature/p90": 149504.0, "global_fisher_curvature/p95": 149504.0, "global_fisher_curvature/p99": 149504.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.125999450683594e-06, "global_fisher_kl_divergence/max": 5.125999450683594e-06, "global_fisher_kl_divergence/median": 5.125999450683594e-06, "global_fisher_kl_divergence/min": 5.125999450683594e-06, "global_fisher_kl_divergence/p25": 5.125999450683594e-06, "global_fisher_kl_divergence/p75": 5.125999450683594e-06, "global_fisher_kl_divergence/p85": 5.125999450683594e-06, "global_fisher_kl_divergence/p90": 5.125999450683594e-06, "global_fisher_kl_divergence/p95": 5.125999450683594e-06, "global_fisher_kl_divergence/p99": 5.125999450683594e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.08837890625, "global_full_update_term/max": 0.08837890625, "global_full_update_term/median": 0.08837890625, "global_full_update_term/min": 0.08837890625, "global_full_update_term/p25": 0.08837890625, "global_full_update_term/p75": 0.08837890625, "global_full_update_term/p85": 0.08837890625, "global_full_update_term/p90": 0.08837890625, "global_full_update_term/p95": 0.08837890625, "global_full_update_term/p99": 0.08837890625, "global_full_update_term/var": NaN, "global_hessian_coeff": 23680.0, "global_hessian_coeff/max": 23680.0, "global_hessian_coeff/median": 23680.0, "global_hessian_coeff/min": 23680.0, "global_hessian_coeff/p25": 23680.0, "global_hessian_coeff/p75": 23680.0, "global_hessian_coeff/p99": 23680.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 23680.0, "global_hessian_coeff_abs/max": 23680.0, "global_hessian_coeff_abs/median": 23680.0, "global_hessian_coeff_abs/min": 23680.0, "global_hessian_coeff_abs/p25": 23680.0, "global_hessian_coeff_abs/p75": 23680.0, "global_hessian_coeff_abs/p99": 23680.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05342591926455498, "learning_rate": 8.02317355308094e-06, "loss": -0.7708, "masked_global_fisher_curvature": 544.0, "masked_global_fisher_curvature/max": 544.0, "masked_global_fisher_curvature/median": 544.0, "masked_global_fisher_curvature/min": 544.0, "masked_global_fisher_curvature/p25": 544.0, "masked_global_fisher_curvature/p75": 544.0, "masked_global_fisher_curvature/p85": 544.0, "masked_global_fisher_curvature/p90": 544.0, "masked_global_fisher_curvature/p95": 544.0, "masked_global_fisher_curvature/p99": 544.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/max": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/median": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/min": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/p25": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/p75": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/p85": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/p90": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/p95": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/p99": 1.862645149230957e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.003997802734375, "masked_global_full_update_term/max": 0.003997802734375, "masked_global_full_update_term/median": 0.003997802734375, "masked_global_full_update_term/min": 0.003997802734375, "masked_global_full_update_term/p25": 0.003997802734375, "masked_global_full_update_term/p75": 0.003997802734375, "masked_global_full_update_term/p85": 0.003997802734375, "masked_global_full_update_term/p90": 0.003997802734375, "masked_global_full_update_term/p95": 0.003997802734375, "masked_global_full_update_term/p99": 0.003997802734375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -5440.0, "masked_global_hessian_coeff/max": -5440.0, "masked_global_hessian_coeff/median": -5440.0, "masked_global_hessian_coeff/min": -5440.0, "masked_global_hessian_coeff/p25": -5440.0, "masked_global_hessian_coeff/p75": -5440.0, "masked_global_hessian_coeff/p99": -5440.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 5440.0, "masked_global_hessian_coeff_abs/max": 5440.0, "masked_global_hessian_coeff_abs/median": 5440.0, "masked_global_hessian_coeff_abs/min": 5440.0, "masked_global_hessian_coeff_abs/p25": 5440.0, "masked_global_hessian_coeff_abs/p75": 5440.0, "masked_global_hessian_coeff_abs/p99": 5440.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.1666667461395264, "masked_per_sentence_gradient_norm/max": 9.875, "masked_per_sentence_gradient_norm/median": 1.703125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.34375, "masked_per_sentence_gradient_norm/p75": 3.3125, "masked_per_sentence_gradient_norm/var": 4.429439067840576, "masked_per_token_gradient_norm": 0.04280770570039749, "masked_per_token_gradient_norm/max": 14.0, "masked_per_token_gradient_norm/median": 6.821210263296962e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 7.404014468193054e-08, "masked_per_token_gradient_norm/var": 0.24586252868175507, "masked_sentence_fisher_curvature": 521.4427490234375, "masked_sentence_fisher_curvature/max": 1776.0, "masked_sentence_fisher_curvature/median": 444.0, "masked_sentence_fisher_curvature/min": 16.0, "masked_sentence_fisher_curvature/p25": 309.0, "masked_sentence_fisher_curvature/p75": 615.0, "masked_sentence_fisher_curvature/p85": 799.0, "masked_sentence_fisher_curvature/p90": 860.0, "masked_sentence_fisher_curvature/p95": 1069.0, "masked_sentence_fisher_curvature/p99": 1722.8001708984375, "masked_sentence_fisher_curvature/var": 112322.3828125, "masked_sentence_fisher_kl_divergence": 1.7895786186272744e-08, "masked_sentence_fisher_kl_divergence/max": 6.100162863731384e-08, "masked_sentence_fisher_kl_divergence/median": 1.525040715932846e-08, "masked_sentence_fisher_kl_divergence/min": 5.493347998708487e-10, "masked_sentence_fisher_kl_divergence/p25": 1.0608346201479435e-08, "masked_sentence_fisher_kl_divergence/p75": 2.10711732506752e-08, "masked_sentence_fisher_kl_divergence/p85": 2.741580829024315e-08, "masked_sentence_fisher_kl_divergence/p90": 2.9569491744041443e-08, "masked_sentence_fisher_kl_divergence/p95": 3.6612618714571e-08, "masked_sentence_fisher_kl_divergence/p99": 5.901093302895788e-08, "masked_sentence_fisher_kl_divergence/var": 1.323124226521933e-16, "masked_sentence_full_gradient_variance/max_squared_error": 8.817977905273438, "masked_sentence_full_gradient_variance/metric": 8.817977905273438, "masked_sentence_full_gradient_variance/p75": 8.817977905273438, "masked_sentence_full_gradient_variance/p90": 8.817977905273438, "masked_sentence_full_gradient_variance/p95": 8.817977905273438, "masked_sentence_full_gradient_variance/p99": 8.817977905273438, "masked_sentence_full_update_term": 0.0012311985483393073, "masked_sentence_full_update_term/max": 0.00787353515625, "masked_sentence_full_update_term/median": 0.00102996826171875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00014209747314453125, "masked_sentence_full_update_term/p75": 0.0018291473388671875, "masked_sentence_full_update_term/p85": 0.002040863037109375, "masked_sentence_full_update_term/p90": 0.00238800048828125, "masked_sentence_full_update_term/p95": 0.003269195556640625, "masked_sentence_full_update_term/p99": 0.0060180723667144775, "masked_sentence_full_update_term/var": 1.8550406366557581e-06, "masked_sentence_hessian_coeff": -15835.6669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -16064.0, "masked_sentence_hessian_coeff/min": -56064.0, "masked_sentence_hessian_coeff/p25": -24096.0, "masked_sentence_hessian_coeff/p75": -3852.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 161357920.0, "masked_sentence_hessian_coeff_abs": 15835.6669921875, "masked_sentence_hessian_coeff_abs/max": 56064.0, "masked_sentence_hessian_coeff_abs/median": 16000.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 3852.0, "masked_sentence_hessian_coeff_abs/p75": 24096.0, "masked_sentence_hessian_coeff_abs/p99": 54604.8046875, "masked_sentence_hessian_coeff_abs/var": 161357920.0, "masked_token_fisher_curvature": 655.0689697265625, "masked_token_fisher_curvature/max": 290816.0, "masked_token_fisher_curvature/median": 4.1470733097570545e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.9217246446905147e-23, "masked_token_fisher_curvature/p75": 4.654054919228656e-13, "masked_token_fisher_curvature/p85": 2.000497545395774e-10, "masked_token_fisher_curvature/p90": 2.074466465273872e-08, "masked_token_fisher_curvature/p95": 0.0001888275146484375, "masked_token_fisher_curvature/p99": 5335.375, "masked_token_fisher_curvature/var": 86427304.0, "masked_token_fisher_kl_divergence": 2.24715872576553e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.4199496293978212e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.6851887013388314e-33, "masked_token_fisher_kl_divergence/p75": 1.5923226791645783e-23, "masked_token_fisher_kl_divergence/p85": 6.863737927811885e-21, "masked_token_fisher_kl_divergence/p90": 7.121694201836547e-19, "masked_token_fisher_kl_divergence/p95": 6.467049118441537e-15, "masked_token_fisher_kl_divergence/p99": 1.8313585314899683e-07, "masked_token_fisher_kl_divergence/var": 1.0169225487730027e-13, "masked_token_full_update_term": 1.4592058505513705e-05, "masked_token_full_update_term/max": 0.004241943359375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.4156103134155273e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.8179902028236938e-14, "masked_token_full_update_term/p85": 2.5295321393059567e-12, "masked_token_full_update_term/p90": 4.82254236544577e-11, "masked_token_full_update_term/p95": 5.529727786779404e-09, "masked_token_full_update_term/p99": 0.000293731689453125, "masked_token_full_update_term/var": 2.985559177659525e-08, "masked_token_hessian_coeff": -19880.232421875, "masked_token_hessian_coeff/max": 616.0, "masked_token_hessian_coeff/median": -7.496225862269057e-13, "masked_token_hessian_coeff/min": -5570560.0, "masked_token_hessian_coeff/p25": -2.6971101760864258e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.005218505859375, "masked_token_hessian_coeff/var": 55913062400.0, "masked_token_hessian_coeff_abs": 19880.392578125, "masked_token_hessian_coeff_abs/max": 5570560.0, "masked_token_hessian_coeff_abs/median": 1.1714291758835316e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.52587890625e-05, "masked_token_hessian_coeff_abs/p99": 368456.0, "masked_token_hessian_coeff_abs/var": 55913054208.0, "mean_logprobs": -0.00994873046875, "mean_logprobs/var": 5.2928924560546875e-05, "num_completions/total": 5088, "per_sentence_gradient_norm": 42.632164001464844, "per_sentence_gradient_norm/max": 224.0, "per_sentence_gradient_norm/median": 27.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 2.265625, "per_sentence_gradient_norm/p75": 56.0625, "per_sentence_gradient_norm/var": 2549.440673828125, "per_token_feature_norm": 190.2107391357422, "per_token_feature_norm/max": 262.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 104.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 137.60504150390625, "per_token_gradient_norm": 1.0506644248962402, "per_token_gradient_norm/max": 272.0, "per_token_gradient_norm/median": 9.43600753089413e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 9.825453162193298e-08, "per_token_gradient_norm/var": 134.94317626953125, "per_token_policy_error_norm": 0.006052285898476839, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005918211303651333, "policy_entropy": 0.010720946826040745, "policy_entropy/max": 1.2734375, "policy_entropy/median": 9.968061931431293e-10, "policy_entropy/min": 2.6469779601696886e-20, "policy_entropy/p25": 3.82982534574694e-12, "policy_entropy/p75": 1.7229467630386353e-07, "policy_entropy/var": 0.005440680310130119, "policy_loss": -0.7708333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.17850877344608307, "policy_sharpness": 9.712515830993652, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8767355680465698, "reward": 0.7708333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.17850877344608307, "rewards/accuracy_reward": 0.7708333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.17850877344608307, "sentence_fisher_curvature": 294080.125, "sentence_fisher_curvature/max": 1359872.0, "sentence_fisher_curvature/median": 147456.0, "sentence_fisher_curvature/min": 194.0, "sentence_fisher_curvature/p25": 11776.0, "sentence_fisher_curvature/p75": 488448.0, "sentence_fisher_curvature/p85": 647168.0, "sentence_fisher_curvature/p90": 724992.0, "sentence_fisher_curvature/p95": 902144.0, "sentence_fisher_curvature/p99": 1305395.375, "sentence_fisher_curvature/var": 103994179584.0, "sentence_fisher_kl_divergence": 1.0088770977745298e-05, "sentence_fisher_kl_divergence/max": 4.673004150390625e-05, "sentence_fisher_kl_divergence/median": 5.066394805908203e-06, "sentence_fisher_kl_divergence/min": 6.664777174592018e-09, "sentence_fisher_kl_divergence/p25": 4.041939973831177e-07, "sentence_fisher_kl_divergence/p75": 1.671910285949707e-05, "sentence_fisher_kl_divergence/p85": 2.2232532501220703e-05, "sentence_fisher_kl_divergence/p90": 2.491474151611328e-05, "sentence_fisher_kl_divergence/p95": 3.0994415283203125e-05, "sentence_fisher_kl_divergence/p99": 4.4691569200949743e-05, "sentence_fisher_kl_divergence/var": 1.223800505378847e-10, "sentence_full_gradient_variance/max_squared_error": 4239.38525390625, "sentence_full_gradient_variance/metric": 4239.38525390625, "sentence_full_gradient_variance/p75": 4239.38525390625, "sentence_full_gradient_variance/p90": 4239.38525390625, "sentence_full_gradient_variance/p95": 4239.38525390625, "sentence_full_gradient_variance/p99": 4239.38525390625, "sentence_full_update_term": 0.028901418671011925, "sentence_full_update_term/max": 0.14453125, "sentence_full_update_term/median": 0.0150146484375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.001708984375, "sentence_full_update_term/p75": 0.03997802734375, "sentence_full_update_term/p85": 0.062744140625, "sentence_full_update_term/p90": 0.083740234375, "sentence_full_update_term/p95": 0.1064453125, "sentence_full_update_term/p99": 0.13432620465755463, "sentence_full_update_term/var": 0.0012180133489891887, "sentence_hessian_coeff": 16311.333984375, "sentence_hessian_coeff/max": 995328.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -362496.0, "sentence_hessian_coeff/p25": -90752.0, "sentence_hessian_coeff/p75": 71040.0, "sentence_hessian_coeff/p99": 684033.0, "sentence_hessian_coeff/var": 42579746816.0, "sentence_hessian_coeff_abs": 128220.671875, "sentence_hessian_coeff_abs/max": 995328.0, "sentence_hessian_coeff_abs/median": 81408.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 16576.0, "sentence_hessian_coeff_abs/p75": 161280.0, "sentence_hessian_coeff_abs/p99": 684033.0, "sentence_hessian_coeff_abs/var": 26235006976.0, "step": 53, "token_fisher_curvature": 268193.5, "token_fisher_curvature/max": 150994944.0, "token_fisher_curvature/median": 5.502326025363935e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.872982349126496e-23, "token_fisher_curvature/p75": 7.993605777301127e-13, "token_fisher_curvature/p85": 4.984030965715647e-10, "token_fisher_curvature/p90": 1.0220901458524168e-07, "token_fisher_curvature/p95": 0.012939453125, "token_fisher_curvature/p99": 1271968.0, "token_fisher_curvature/var": 18356830732288.0, "token_fisher_kl_divergence": 9.199932719639037e-06, "token_fisher_kl_divergence/max": 0.00518798828125, "token_fisher_kl_divergence/median": 1.8853775634782182e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.0101893794541774e-33, "token_fisher_kl_divergence/p75": 2.750375536738817e-23, "token_fisher_kl_divergence/p85": 1.7152417181899582e-20, "token_fisher_kl_divergence/p90": 3.514022060802872e-18, "token_fisher_kl_divergence/p95": 4.440892098500626e-13, "token_fisher_kl_divergence/p99": 4.3694861233234406e-05, "token_fisher_kl_divergence/var": 2.1594015464643235e-08, "token_full_update_term": 0.0003855471732094884, "token_full_update_term/max": 0.10693359375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.4156103134155273e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.886579864025407e-14, "token_full_update_term/p85": 4.718003765447065e-12, "token_full_update_term/p90": 1.0413714335300028e-10, "token_full_update_term/p95": 4.284083843231201e-08, "token_full_update_term/p99": 0.008977651596069336, "token_full_update_term/var": 1.8836199160432443e-05, "token_hessian_coeff": -7170.08837890625, "token_hessian_coeff/max": 149946368.0, "token_hessian_coeff/median": -1.4424017535930034e-12, "token_hessian_coeff/min": -18874368.0, "token_hessian_coeff/p25": -3.7550926208496094e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.01040339469909668, "token_hessian_coeff/var": 12286279286784.0, "token_hessian_coeff_abs": 282413.15625, "token_hessian_coeff_abs/max": 149946368.0, "token_hessian_coeff_abs/median": 1.57160684466362e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.2172927856445312e-05, "token_hessian_coeff_abs/p99": 8781824.0, "token_hessian_coeff_abs/var": 12206571782144.0 }, { "accuracy_reward": 0.59375, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24375000596046448, "adam_stats/lm_head/lr_effective_max": 3.838419070234522e-05, "adam_stats/lm_head/lr_effective_mean": 1.1789421869301542e-11, "adam_stats/lm_head/lr_effective_min": -3.8689220673404634e-05, "adam_stats/lm_head/lr_effective_std": 8.806662208371563e-07, "adam_stats/lr_effective_max": 4.137924042879604e-05, "adam_stats/lr_effective_mean": 1.4776894985413236e-10, "adam_stats/lr_effective_min": -3.932266554329544e-05, "adam_stats/m_t_max": 0.00047329883091151714, "adam_stats/m_t_mean": 3.5076162894359664e-13, "adam_stats/m_t_min": -0.00040682315011508763, "adam_stats/v_t_max": 2.5460907636443153e-05, "adam_stats/v_t_mean": 1.7289624593130881e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.59375, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24375000596046448, "all_logprobs": -0.00906551443040371, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0625, "all_logprobs/p1": -0.21578124165534973, "all_logprobs/p10": -1.0728836059570312e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00012302398681640625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.0122849615290761, "clip_ratio": 0.0, "completion_length": 496.59375, "completion_length/correct": 323.631591796875, "completion_length/correct/max": 683.0, "completion_length/correct/median": 267.0, "completion_length/correct/min": 216.0, "completion_length/correct/p25": 231.0, "completion_length/correct/p75": 388.0, "completion_length/correct/var": 16237.5234375, "completion_length/incorrect": 749.3846435546875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 798.0, "completion_length/incorrect/min": 305.0, "completion_length/incorrect/p25": 642.0, "completion_length/incorrect/p75": 957.0, "completion_length/incorrect/var": 52793.71484375, "completion_length/max": 1024.0, "completion_length/median": 388.0, "completion_length/min": 216.0, "completion_length/p25": 260.25, "completion_length/p75": 683.5, "completion_length/var": 74872.5703125, "curvature_clip_ratio_token_fisher": 0.005705535411834717, "curvature_clip_ratio_token_hessian": 0.004027436953037977, "curvature_clip_ratio_total_fisher": 0.005705535411834717, "curvature_clip_ratio_total_full": 0.005705535411834717, "curvature_clip_ratio_total_hessian": 0.004027436953037977, "epoch": 0.0864, "feature_vector_variance/max_squared_error": 68377.828125, "feature_vector_variance/metric": 31227.732421875, "generated_tokens/total": 3085146.0, "global_fisher_curvature": 73216.0, "global_fisher_curvature/max": 73216.0, "global_fisher_curvature/median": 73216.0, "global_fisher_curvature/min": 73216.0, "global_fisher_curvature/p25": 73216.0, "global_fisher_curvature/p75": 73216.0, "global_fisher_curvature/p85": 73216.0, "global_fisher_curvature/p90": 73216.0, "global_fisher_curvature/p95": 73216.0, "global_fisher_curvature/p99": 73216.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.3543834686279297e-06, "global_fisher_kl_divergence/max": 2.3543834686279297e-06, "global_fisher_kl_divergence/median": 2.3543834686279297e-06, "global_fisher_kl_divergence/min": 2.3543834686279297e-06, "global_fisher_kl_divergence/p25": 2.3543834686279297e-06, "global_fisher_kl_divergence/p75": 2.3543834686279297e-06, "global_fisher_kl_divergence/p85": 2.3543834686279297e-06, "global_fisher_kl_divergence/p90": 2.3543834686279297e-06, "global_fisher_kl_divergence/p95": 2.3543834686279297e-06, "global_fisher_kl_divergence/p99": 2.3543834686279297e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0233154296875, "global_full_update_term/max": 0.0233154296875, "global_full_update_term/median": 0.0233154296875, "global_full_update_term/min": 0.0233154296875, "global_full_update_term/p25": 0.0233154296875, "global_full_update_term/p75": 0.0233154296875, "global_full_update_term/p85": 0.0233154296875, "global_full_update_term/p90": 0.0233154296875, "global_full_update_term/p95": 0.0233154296875, "global_full_update_term/p99": 0.0233154296875, "global_full_update_term/var": NaN, "global_hessian_coeff": -9664.0, "global_hessian_coeff/max": -9664.0, "global_hessian_coeff/median": -9664.0, "global_hessian_coeff/min": -9664.0, "global_hessian_coeff/p25": -9664.0, "global_hessian_coeff/p75": -9664.0, "global_hessian_coeff/p99": -9664.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 9664.0, "global_hessian_coeff_abs/max": 9664.0, "global_hessian_coeff_abs/median": 9664.0, "global_hessian_coeff_abs/min": 9664.0, "global_hessian_coeff_abs/p25": 9664.0, "global_hessian_coeff_abs/p75": 9664.0, "global_hessian_coeff_abs/p99": 9664.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.07223144918680191, "learning_rate": 7.76174622526876e-06, "loss": -0.5938, "masked_global_fisher_curvature": 486.0, "masked_global_fisher_curvature/max": 486.0, "masked_global_fisher_curvature/median": 486.0, "masked_global_fisher_curvature/min": 486.0, "masked_global_fisher_curvature/p25": 486.0, "masked_global_fisher_curvature/p75": 486.0, "masked_global_fisher_curvature/p85": 486.0, "masked_global_fisher_curvature/p90": 486.0, "masked_global_fisher_curvature/p95": 486.0, "masked_global_fisher_curvature/p99": 486.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/max": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/median": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/min": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/p25": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/p75": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/p85": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/p90": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/p95": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/p99": 1.5599653124809265e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00164031982421875, "masked_global_full_update_term/max": 0.00164031982421875, "masked_global_full_update_term/median": 0.00164031982421875, "masked_global_full_update_term/min": 0.00164031982421875, "masked_global_full_update_term/p25": 0.00164031982421875, "masked_global_full_update_term/p75": 0.00164031982421875, "masked_global_full_update_term/p85": 0.00164031982421875, "masked_global_full_update_term/p90": 0.00164031982421875, "masked_global_full_update_term/p95": 0.00164031982421875, "masked_global_full_update_term/p99": 0.00164031982421875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -4928.0, "masked_global_hessian_coeff/max": -4928.0, "masked_global_hessian_coeff/median": -4928.0, "masked_global_hessian_coeff/min": -4928.0, "masked_global_hessian_coeff/p25": -4928.0, "masked_global_hessian_coeff/p75": -4928.0, "masked_global_hessian_coeff/p99": -4928.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 4928.0, "masked_global_hessian_coeff_abs/max": 4928.0, "masked_global_hessian_coeff_abs/median": 4928.0, "masked_global_hessian_coeff_abs/min": 4928.0, "masked_global_hessian_coeff_abs/p25": 4928.0, "masked_global_hessian_coeff_abs/p75": 4928.0, "masked_global_hessian_coeff_abs/p99": 4928.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.1297709941864014, "masked_per_sentence_gradient_norm/max": 4.28125, "masked_per_sentence_gradient_norm/median": 1.03125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 1.8046875, "masked_per_sentence_gradient_norm/var": 1.3876535892486572, "masked_per_token_gradient_norm": 0.023272821679711342, "masked_per_token_gradient_norm/max": 14.625, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.6921043172478676e-10, "masked_per_token_gradient_norm/var": 0.14451900124549866, "masked_sentence_fisher_curvature": 515.3635864257812, "masked_sentence_fisher_curvature/max": 2128.0, "masked_sentence_fisher_curvature/median": 422.0, "masked_sentence_fisher_curvature/min": 0.1533203125, "masked_sentence_fisher_curvature/p25": 252.25, "masked_sentence_fisher_curvature/p75": 784.0, "masked_sentence_fisher_curvature/p85": 1022.0, "masked_sentence_fisher_curvature/p90": 1056.0, "masked_sentence_fisher_curvature/p95": 1432.0, "masked_sentence_fisher_curvature/p99": 1626.401611328125, "masked_sentence_fisher_curvature/var": 189141.5, "masked_sentence_fisher_kl_divergence": 1.6587303974802126e-08, "masked_sentence_fisher_kl_divergence/max": 6.845220923423767e-08, "masked_sentence_fisher_kl_divergence/median": 1.3562384992837906e-08, "masked_sentence_fisher_kl_divergence/min": 4.945377440890297e-12, "masked_sentence_fisher_kl_divergence/p25": 8.127244655042887e-09, "masked_sentence_fisher_kl_divergence/p75": 2.5262124836444855e-08, "masked_sentence_fisher_kl_divergence/p85": 3.294553607702255e-08, "masked_sentence_fisher_kl_divergence/p90": 3.3993273973464966e-08, "masked_sentence_fisher_kl_divergence/p95": 4.6100467443466187e-08, "masked_sentence_fisher_kl_divergence/p99": 5.230545596646152e-08, "masked_sentence_fisher_kl_divergence/var": 1.9593061886585063e-16, "masked_sentence_full_gradient_variance/max_squared_error": 2.49919056892395, "masked_sentence_full_gradient_variance/metric": 2.49919056892395, "masked_sentence_full_gradient_variance/p75": 2.49919056892395, "masked_sentence_full_gradient_variance/p90": 2.49919056892395, "masked_sentence_full_gradient_variance/p95": 2.49919056892395, "masked_sentence_full_gradient_variance/p99": 2.49919056892395, "masked_sentence_full_update_term": 0.000556228100322187, "masked_sentence_full_update_term/max": 0.00225830078125, "masked_sentence_full_update_term/median": 0.0004024505615234375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.000843048095703125, "masked_sentence_full_update_term/p85": 0.00141143798828125, "masked_sentence_full_update_term/p90": 0.0015869140625, "masked_sentence_full_update_term/p95": 0.001605987548828125, "masked_sentence_full_update_term/p99": 0.0020553595386445522, "masked_sentence_full_update_term/var": 3.774998162953125e-07, "masked_sentence_hessian_coeff": -13156.9794921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -6240.0, "masked_sentence_hessian_coeff/min": -54784.0, "masked_sentence_hessian_coeff/p25": -21888.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 241101472.0, "masked_sentence_hessian_coeff_abs": 13156.9794921875, "masked_sentence_hessian_coeff_abs/max": 54784.0, "masked_sentence_hessian_coeff_abs/median": 5728.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 21888.0, "masked_sentence_hessian_coeff_abs/p99": 51379.2109375, "masked_sentence_hessian_coeff_abs/var": 241101472.0, "masked_token_fisher_curvature": 548.33642578125, "masked_token_fisher_curvature/max": 309248.0, "masked_token_fisher_curvature/median": 2.927345865710862e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.8404768629304866e-23, "masked_token_fisher_curvature/p75": 4.654054919228656e-13, "masked_token_fisher_curvature/p85": 1.7007550923153758e-10, "masked_token_fisher_curvature/p90": 2.3050233721733093e-08, "masked_token_fisher_curvature/p95": 0.00023651123046875, "masked_token_fisher_curvature/p99": 2656.0, "masked_token_fisher_curvature/var": 77594848.0, "masked_token_fisher_kl_divergence": 1.7644889993562174e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 9.426887817391091e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 5.92825311006696e-34, "masked_token_fisher_kl_divergence/p75": 1.4992648602523627e-23, "masked_token_fisher_kl_divergence/p85": 5.479244377551255e-21, "masked_token_fisher_kl_divergence/p90": 7.420008617947671e-19, "masked_token_fisher_kl_divergence/p95": 7.605027718682322e-15, "masked_token_fisher_kl_divergence/p99": 8.568167686462402e-08, "masked_token_fisher_kl_divergence/var": 8.035957424663842e-14, "masked_token_full_update_term": 7.638903298357036e-06, "masked_token_full_update_term/max": 0.004150390625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.116445779800415e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 7.047314121155779e-19, "masked_token_full_update_term/p85": 1.2434497875801753e-14, "masked_token_full_update_term/p90": 1.0018652574217413e-12, "masked_token_full_update_term/p95": 1.3005774235352874e-10, "masked_token_full_update_term/p99": 4.26173210144043e-06, "masked_token_full_update_term/var": 1.6013652626156727e-08, "masked_token_hessian_coeff": -10829.029296875, "masked_token_hessian_coeff/max": 3376.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -5603328.0, "masked_token_hessian_coeff/p25": -2.750311978161335e-09, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.000736236572265625, "masked_token_hessian_coeff/var": 32713269248.0, "masked_token_hessian_coeff_abs": 10829.248046875, "masked_token_hessian_coeff_abs/max": 5603328.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 3.702007234096527e-08, "masked_token_hessian_coeff_abs/p99": 1944.0, "masked_token_hessian_coeff_abs/var": 32713263104.0, "mean_logprobs": -0.00787353515625, "mean_logprobs/var": 3.552436828613281e-05, "num_completions/total": 5184, "per_sentence_gradient_norm": 30.75960350036621, "per_sentence_gradient_norm/max": 169.0, "per_sentence_gradient_norm/median": 8.9375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 35.28125, "per_sentence_gradient_norm/var": 2040.2254638671875, "per_token_feature_norm": 190.33395385742188, "per_token_feature_norm/max": 280.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 101.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 155.3631134033203, "per_token_gradient_norm": 0.444178968667984, "per_token_gradient_norm/max": 266.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 3.2378011383116245e-10, "per_token_gradient_norm/var": 54.16453170776367, "per_token_policy_error_norm": 0.005382903385907412, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004813054110854864, "policy_entropy": 0.010097759775817394, "policy_entropy/max": 1.6953125, "policy_entropy/median": 7.166818249970675e-10, "policy_entropy/min": 5.802175688691957e-20, "policy_entropy/p25": 2.5579538487363607e-12, "policy_entropy/p75": 1.3597309589385986e-07, "policy_entropy/var": 0.00548637006431818, "policy_loss": -0.59375, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24375000596046448, "policy_sharpness": 9.722439765930176, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7974696159362793, "reward": 0.59375, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24375000596046448, "rewards/accuracy_reward": 0.59375, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24375000596046448, "sentence_fisher_curvature": 153983.15625, "sentence_fisher_curvature/max": 1130496.0, "sentence_fisher_curvature/median": 14720.0, "sentence_fisher_curvature/min": 37.0, "sentence_fisher_curvature/p25": 1030.0, "sentence_fisher_curvature/p75": 305664.0, "sentence_fisher_curvature/p85": 419328.0, "sentence_fisher_curvature/p90": 447488.0, "sentence_fisher_curvature/p95": 587776.0, "sentence_fisher_curvature/p99": 768615.5625, "sentence_fisher_curvature/var": 50014298112.0, "sentence_fisher_kl_divergence": 4.955533768224996e-06, "sentence_fisher_kl_divergence/max": 3.647804260253906e-05, "sentence_fisher_kl_divergence/median": 4.731118679046631e-07, "sentence_fisher_kl_divergence/min": 1.1932570487260818e-09, "sentence_fisher_kl_divergence/p25": 3.323657438158989e-08, "sentence_fisher_kl_divergence/p75": 9.8496675491333e-06, "sentence_fisher_kl_divergence/p85": 1.3485550880432129e-05, "sentence_fisher_kl_divergence/p90": 1.4394521713256836e-05, "sentence_fisher_kl_divergence/p95": 1.8924474716186523e-05, "sentence_fisher_kl_divergence/p99": 2.470020262990147e-05, "sentence_fisher_kl_divergence/var": 5.1827837160844936e-11, "sentence_full_gradient_variance/max_squared_error": 2858.626708984375, "sentence_full_gradient_variance/metric": 2858.626708984375, "sentence_full_gradient_variance/p75": 2858.626708984375, "sentence_full_gradient_variance/p90": 2858.626708984375, "sentence_full_gradient_variance/p95": 2858.626708984375, "sentence_full_gradient_variance/p99": 2858.626708984375, "sentence_full_update_term": 0.016429346054792404, "sentence_full_update_term/max": 0.1025390625, "sentence_full_update_term/median": 0.0038299560546875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.02099609375, "sentence_full_update_term/p85": 0.04345703125, "sentence_full_update_term/p90": 0.0545654296875, "sentence_full_update_term/p95": 0.0689697265625, "sentence_full_update_term/p99": 0.07841804623603821, "sentence_full_update_term/var": 0.0005587671766988933, "sentence_hessian_coeff": -22492.0, "sentence_hessian_coeff/max": 1003520.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -232448.0, "sentence_hessian_coeff/p25": -102272.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 378984.40625, "sentence_hessian_coeff/var": 23500969984.0, "sentence_hessian_coeff_abs": 89713.3359375, "sentence_hessian_coeff_abs/max": 1003520.0, "sentence_hessian_coeff_abs/median": 68608.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 126464.0, "sentence_hessian_coeff_abs/p99": 378984.40625, "sentence_hessian_coeff_abs/var": 15878983680.0, "step": 54, "token_fisher_curvature": 110887.4921875, "token_fisher_curvature/max": 137363456.0, "token_fisher_curvature/median": 3.3068166260807885e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.9955732277841793e-23, "token_fisher_curvature/p75": 5.613287612504791e-13, "token_fisher_curvature/p85": 2.510205376893282e-10, "token_fisher_curvature/p90": 4.6566128730773926e-08, "token_fisher_curvature/p95": 0.0015869140625, "token_fisher_curvature/p99": 29568.0, "token_fisher_curvature/var": 6731856871424.0, "token_fisher_kl_divergence": 3.5688699426827952e-06, "token_fisher_kl_divergence/max": 0.004425048828125, "token_fisher_kl_divergence/median": 1.064962222048366e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 6.409735596163769e-34, "token_fisher_kl_divergence/p75": 1.809457589959748e-23, "token_fisher_kl_divergence/p85": 8.099752558119247e-21, "token_fisher_kl_divergence/p90": 1.497554250745603e-18, "token_fisher_kl_divergence/p95": 5.10702591327572e-14, "token_fisher_kl_divergence/p99": 9.499490261077881e-07, "token_fisher_kl_divergence/var": 6.9712782213571245e-09, "token_full_update_term": 0.00015939006698317826, "token_full_update_term/max": 0.09814453125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.116445779800415e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.5178830414797062e-18, "token_full_update_term/p85": 1.6542323066914832e-14, "token_full_update_term/p90": 1.6839862837514374e-12, "token_full_update_term/p95": 2.3101165425032377e-10, "token_full_update_term/p99": 0.000301361083984375, "token_full_update_term/var": 7.2294974415854085e-06, "token_hessian_coeff": -16845.474609375, "token_hessian_coeff/max": 133169152.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -19398656.0, "token_hessian_coeff/p25": -3.6088749766349792e-09, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00135040283203125, "token_hessian_coeff/var": 4057970245632.0, "token_hessian_coeff_abs": 109830.3046875, "token_hessian_coeff_abs/max": 133169152.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 4.563480615615845e-08, "token_hessian_coeff_abs/p99": 389120.0, "token_hessian_coeff_abs/var": 4046191329280.0 }, { "accuracy_reward": 0.71875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20427633821964264, "adam_stats/lm_head/lr_effective_max": 3.37397214025259e-05, "adam_stats/lm_head/lr_effective_mean": 1.3045483096552069e-11, "adam_stats/lm_head/lr_effective_min": -3.419425047468394e-05, "adam_stats/lm_head/lr_effective_std": 8.610874147052527e-07, "adam_stats/lr_effective_max": 3.7810863432241604e-05, "adam_stats/lr_effective_mean": 1.0974797459706309e-10, "adam_stats/lr_effective_min": -3.7035628338344395e-05, "adam_stats/m_t_max": 0.0005297287134453654, "adam_stats/m_t_mean": 8.537486039308928e-13, "adam_stats/m_t_min": -0.00043260070378892124, "adam_stats/v_t_max": 2.5435645511606708e-05, "adam_stats/v_t_mean": 1.727940815605955e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.71875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20427633821964264, "all_logprobs": -0.010509559884667397, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.75, "all_logprobs/p1": -0.25063496828079224, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00020313262939453125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.018566975370049477, "clip_ratio": 0.0, "completion_length": 598.2083740234375, "completion_length/correct": 486.6811828613281, "completion_length/correct/max": 951.0, "completion_length/correct/median": 438.0, "completion_length/correct/min": 194.0, "completion_length/correct/p25": 305.0, "completion_length/correct/p75": 684.0, "completion_length/correct/var": 47111.484375, "completion_length/incorrect": 883.2222290039062, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 426.0, "completion_length/incorrect/p25": 809.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 41651.41015625, "completion_length/max": 1024.0, "completion_length/median": 543.0, "completion_length/min": 194.0, "completion_length/p25": 382.25, "completion_length/p75": 830.75, "completion_length/var": 77242.6328125, "curvature_clip_ratio_token_fisher": 0.01048269122838974, "curvature_clip_ratio_token_hessian": 0.008131921291351318, "curvature_clip_ratio_total_fisher": 0.01048269122838974, "curvature_clip_ratio_total_full": 0.01048269122838974, "curvature_clip_ratio_total_hessian": 0.008131921291351318, "epoch": 0.088, "feature_vector_variance/max_squared_error": 67578.8359375, "feature_vector_variance/metric": 31231.986328125, "generated_tokens/total": 3142574.0, "global_fisher_curvature": 176128.0, "global_fisher_curvature/max": 176128.0, "global_fisher_curvature/median": 176128.0, "global_fisher_curvature/min": 176128.0, "global_fisher_curvature/p25": 176128.0, "global_fisher_curvature/p75": 176128.0, "global_fisher_curvature/p85": 176128.0, "global_fisher_curvature/p90": 176128.0, "global_fisher_curvature/p95": 176128.0, "global_fisher_curvature/p99": 176128.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.304813385009766e-06, "global_fisher_kl_divergence/max": 5.304813385009766e-06, "global_fisher_kl_divergence/median": 5.304813385009766e-06, "global_fisher_kl_divergence/min": 5.304813385009766e-06, "global_fisher_kl_divergence/p25": 5.304813385009766e-06, "global_fisher_kl_divergence/p75": 5.304813385009766e-06, "global_fisher_kl_divergence/p85": 5.304813385009766e-06, "global_fisher_kl_divergence/p90": 5.304813385009766e-06, "global_fisher_kl_divergence/p95": 5.304813385009766e-06, "global_fisher_kl_divergence/p99": 5.304813385009766e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.091796875, "global_full_update_term/max": 0.091796875, "global_full_update_term/median": 0.091796875, "global_full_update_term/min": 0.091796875, "global_full_update_term/p25": 0.091796875, "global_full_update_term/p75": 0.091796875, "global_full_update_term/p85": 0.091796875, "global_full_update_term/p90": 0.091796875, "global_full_update_term/p95": 0.091796875, "global_full_update_term/p99": 0.091796875, "global_full_update_term/var": NaN, "global_hessian_coeff": 43776.0, "global_hessian_coeff/max": 43776.0, "global_hessian_coeff/median": 43776.0, "global_hessian_coeff/min": 43776.0, "global_hessian_coeff/p25": 43776.0, "global_hessian_coeff/p75": 43776.0, "global_hessian_coeff/p99": 43776.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 43776.0, "global_hessian_coeff_abs/max": 43776.0, "global_hessian_coeff_abs/median": 43776.0, "global_hessian_coeff_abs/min": 43776.0, "global_hessian_coeff_abs/p25": 43776.0, "global_hessian_coeff_abs/p75": 43776.0, "global_hessian_coeff_abs/p99": 43776.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.07437389343976974, "learning_rate": 7.5e-06, "loss": -0.7188, "masked_global_fisher_curvature": 960.0, "masked_global_fisher_curvature/max": 960.0, "masked_global_fisher_curvature/median": 960.0, "masked_global_fisher_curvature/min": 960.0, "masked_global_fisher_curvature/p25": 960.0, "masked_global_fisher_curvature/p75": 960.0, "masked_global_fisher_curvature/p85": 960.0, "masked_global_fisher_curvature/p90": 960.0, "masked_global_fisher_curvature/p95": 960.0, "masked_global_fisher_curvature/p99": 960.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/max": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/median": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/min": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/p25": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/p75": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/p85": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/p90": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/p95": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/p99": 2.8870999813079834e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0024261474609375, "masked_global_full_update_term/max": 0.0024261474609375, "masked_global_full_update_term/median": 0.0024261474609375, "masked_global_full_update_term/min": 0.0024261474609375, "masked_global_full_update_term/p25": 0.0024261474609375, "masked_global_full_update_term/p75": 0.0024261474609375, "masked_global_full_update_term/p85": 0.0024261474609375, "masked_global_full_update_term/p90": 0.0024261474609375, "masked_global_full_update_term/p95": 0.0024261474609375, "masked_global_full_update_term/p99": 0.0024261474609375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -7552.0, "masked_global_hessian_coeff/max": -7552.0, "masked_global_hessian_coeff/median": -7552.0, "masked_global_hessian_coeff/min": -7552.0, "masked_global_hessian_coeff/p25": -7552.0, "masked_global_hessian_coeff/p75": -7552.0, "masked_global_hessian_coeff/p99": -7552.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 7552.0, "masked_global_hessian_coeff_abs/max": 7552.0, "masked_global_hessian_coeff_abs/median": 7552.0, "masked_global_hessian_coeff_abs/min": 7552.0, "masked_global_hessian_coeff_abs/p25": 7552.0, "masked_global_hessian_coeff_abs/p75": 7552.0, "masked_global_hessian_coeff_abs/p99": 7552.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.8230794668197632, "masked_per_sentence_gradient_norm/max": 8.4375, "masked_per_sentence_gradient_norm/median": 1.4921875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.546875, "masked_per_sentence_gradient_norm/var": 3.4158706665039062, "masked_per_token_gradient_norm": 0.043662719428539276, "masked_per_token_gradient_norm/max": 13.3125, "masked_per_token_gradient_norm/median": 7.771561172376096e-14, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.2281816452741623e-08, "masked_per_token_gradient_norm/var": 0.2879859507083893, "masked_sentence_fisher_curvature": 740.71875, "masked_sentence_fisher_curvature/max": 2352.0, "masked_sentence_fisher_curvature/median": 520.0, "masked_sentence_fisher_curvature/min": 31.5, "masked_sentence_fisher_curvature/p25": 253.5, "masked_sentence_fisher_curvature/p75": 969.0, "masked_sentence_fisher_curvature/p85": 1536.0, "masked_sentence_fisher_curvature/p90": 1908.0, "masked_sentence_fisher_curvature/p95": 2036.0, "masked_sentence_fisher_curvature/p99": 2352.0, "masked_sentence_fisher_curvature/var": 404626.4375, "masked_sentence_fisher_kl_divergence": 2.2315287395713312e-08, "masked_sentence_fisher_kl_divergence/max": 7.078051567077637e-08, "masked_sentence_fisher_kl_divergence/median": 1.57160684466362e-08, "masked_sentence_fisher_kl_divergence/min": 9.458744898438454e-10, "masked_sentence_fisher_kl_divergence/p25": 7.654307410120964e-09, "masked_sentence_fisher_kl_divergence/p75": 2.916203811764717e-08, "masked_sentence_fisher_kl_divergence/p85": 4.6333298087120056e-08, "masked_sentence_fisher_kl_divergence/p90": 5.75091689825058e-08, "masked_sentence_fisher_kl_divergence/p95": 6.123445928096771e-08, "masked_sentence_fisher_kl_divergence/p99": 7.078051567077637e-08, "masked_sentence_fisher_kl_divergence/var": 3.6738635306968626e-16, "masked_sentence_full_gradient_variance/max_squared_error": 6.546680450439453, "masked_sentence_full_gradient_variance/metric": 6.546680450439453, "masked_sentence_full_gradient_variance/p75": 6.546680450439453, "masked_sentence_full_gradient_variance/p90": 6.546680450439453, "masked_sentence_full_gradient_variance/p95": 6.546680450439453, "masked_sentence_full_gradient_variance/p99": 6.546680450439453, "masked_sentence_full_update_term": 0.0009547770023345947, "masked_sentence_full_update_term/max": 0.0040283203125, "masked_sentence_full_update_term/median": 0.000743865966796875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00139617919921875, "masked_sentence_full_update_term/p85": 0.001987457275390625, "masked_sentence_full_update_term/p90": 0.00229644775390625, "masked_sentence_full_update_term/p95": 0.00267791748046875, "masked_sentence_full_update_term/p99": 0.0038543706759810448, "masked_sentence_full_update_term/var": 8.657758598928922e-07, "masked_sentence_hessian_coeff": -20861.833984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -14784.0, "masked_sentence_hessian_coeff/min": -83968.0, "masked_sentence_hessian_coeff/p25": -31616.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 505407840.0, "masked_sentence_hessian_coeff_abs": 20861.833984375, "masked_sentence_hessian_coeff_abs/max": 83968.0, "masked_sentence_hessian_coeff_abs/median": 14464.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 31616.0, "masked_sentence_hessian_coeff_abs/p99": 83968.0, "masked_sentence_hessian_coeff_abs/var": 505407840.0, "masked_token_fisher_curvature": 717.475341796875, "masked_token_fisher_curvature/max": 323584.0, "masked_token_fisher_curvature/median": 1.111307226797642e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.809457589959748e-24, "masked_token_fisher_curvature/p75": 1.936228954946273e-13, "masked_token_fisher_curvature/p85": 1.3005774235352874e-10, "masked_token_fisher_curvature/p90": 2.3865140974521637e-08, "masked_token_fisher_curvature/p95": 0.0005121231079101562, "masked_token_fisher_curvature/p99": 3936.0, "masked_token_fisher_curvature/var": 116922184.0, "masked_token_fisher_kl_divergence": 2.1621774592972542e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 3.3526588471893e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 5.454293787815414e-35, "masked_token_fisher_kl_divergence/p75": 5.841963076155758e-24, "masked_token_fisher_kl_divergence/p85": 3.917527381051139e-21, "masked_token_fisher_kl_divergence/p90": 7.182839392716467e-19, "masked_token_fisher_kl_divergence/p95": 1.5459855617905305e-14, "masked_token_fisher_kl_divergence/p99": 1.1874362826347351e-07, "masked_token_fisher_kl_divergence/var": 1.0624420993584488e-13, "masked_token_full_update_term": 1.4101478882366791e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -8.791685104370117e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.5248219353836134e-15, "masked_token_full_update_term/p85": 3.7836400679225335e-13, "masked_token_full_update_term/p90": 8.526512829121202e-12, "masked_token_full_update_term/p95": 1.9063008949160576e-09, "masked_token_full_update_term/p99": 0.000247955322265625, "masked_token_full_update_term/var": 3.163453854426734e-08, "masked_token_hessian_coeff": -20930.939453125, "masked_token_hessian_coeff/max": 1664.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -6160384.0, "masked_token_hessian_coeff/p25": -2.9616057872772217e-07, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.002655029296875, "masked_token_hessian_coeff/var": 70888046592.0, "masked_token_hessian_coeff_abs": 20931.140625, "masked_token_hessian_coeff_abs/max": 6160384.0, "masked_token_hessian_coeff_abs/median": 7.247535904753022e-12, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.996755599975586e-06, "masked_token_hessian_coeff_abs/p99": 301056.0, "masked_token_hessian_coeff_abs/var": 70888030208.0, "mean_logprobs": -0.0108642578125, "mean_logprobs/var": 4.9591064453125e-05, "num_completions/total": 5280, "per_sentence_gradient_norm": 66.27604675292969, "per_sentence_gradient_norm/max": 266.0, "per_sentence_gradient_norm/median": 45.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 114.875, "per_sentence_gradient_norm/var": 4403.021484375, "per_token_feature_norm": 190.3779754638672, "per_token_feature_norm/max": 276.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 99.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 160.49525451660156, "per_token_gradient_norm": 0.9966968297958374, "per_token_gradient_norm/max": 288.0, "per_token_gradient_norm/median": 1.2168044349891716e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.618172973394394e-08, "per_token_gradient_norm/var": 136.8017578125, "per_token_policy_error_norm": 0.006031261757016182, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.00581206614151597, "policy_entropy": 0.010604066774249077, "policy_entropy/max": 3.0, "policy_entropy/median": 5.748006515204906e-10, "policy_entropy/min": 1.4756902127946014e-21, "policy_entropy/p25": 8.384404281969182e-13, "policy_entropy/p75": 1.1827796697616577e-07, "policy_entropy/var": 0.0060950773768126965, "policy_loss": -0.71875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20427633821964264, "policy_sharpness": 9.707895278930664, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8615524768829346, "reward": 0.71875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20427633821964264, "rewards/accuracy_reward": 0.71875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20427633821964264, "sentence_fisher_curvature": 347991.5, "sentence_fisher_curvature/max": 2031616.0, "sentence_fisher_curvature/median": 286720.0, "sentence_fisher_curvature/min": 46.0, "sentence_fisher_curvature/p25": 2238.0, "sentence_fisher_curvature/p75": 518144.0, "sentence_fisher_curvature/p85": 662528.0, "sentence_fisher_curvature/p90": 819200.0, "sentence_fisher_curvature/p95": 985088.0, "sentence_fisher_curvature/p99": 1471285.0, "sentence_fisher_curvature/var": 136807522304.0, "sentence_fisher_kl_divergence": 1.0478397598490119e-05, "sentence_fisher_kl_divergence/max": 6.103515625e-05, "sentence_fisher_kl_divergence/median": 8.64267349243164e-06, "sentence_fisher_kl_divergence/min": 1.382431946694851e-09, "sentence_fisher_kl_divergence/p25": 6.728805601596832e-08, "sentence_fisher_kl_divergence/p75": 1.558661460876465e-05, "sentence_fisher_kl_divergence/p85": 1.9997358322143555e-05, "sentence_fisher_kl_divergence/p90": 2.467632293701172e-05, "sentence_fisher_kl_divergence/p95": 2.962350845336914e-05, "sentence_fisher_kl_divergence/p99": 4.42743839812465e-05, "sentence_fisher_kl_divergence/var": 1.2384399061815543e-10, "sentence_full_gradient_variance/max_squared_error": 8551.6708984375, "sentence_full_gradient_variance/metric": 8551.6708984375, "sentence_full_gradient_variance/p75": 8551.6708984375, "sentence_full_gradient_variance/p90": 8551.6708984375, "sentence_full_gradient_variance/p95": 8551.6708984375, "sentence_full_gradient_variance/p99": 8551.6708984375, "sentence_full_update_term": 0.036876678466796875, "sentence_full_update_term/max": 0.1826171875, "sentence_full_update_term/median": 0.0283203125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.059814453125, "sentence_full_update_term/p85": 0.0693359375, "sentence_full_update_term/p90": 0.07861328125, "sentence_full_update_term/p95": 0.0986328125, "sentence_full_update_term/p99": 0.16962894797325134, "sentence_full_update_term/var": 0.0014147162437438965, "sentence_hessian_coeff": 33092.0, "sentence_hessian_coeff/max": 1056768.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -325632.0, "sentence_hessian_coeff/p25": -43968.0, "sentence_hessian_coeff/p75": 87168.0, "sentence_hessian_coeff/p99": 537294.5, "sentence_hessian_coeff/var": 37441159168.0, "sentence_hessian_coeff_abs": 121601.3359375, "sentence_hessian_coeff_abs/max": 1056768.0, "sentence_hessian_coeff_abs/median": 84480.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 158976.0, "sentence_hessian_coeff_abs/p99": 537294.5, "sentence_hessian_coeff_abs/var": 23605231616.0, "step": 55, "token_fisher_curvature": 282386.3125, "token_fisher_curvature/max": 149946368.0, "token_fisher_curvature/median": 1.4230153513872246e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.223047896236262e-24, "token_fisher_curvature/p75": 3.161915174132446e-13, "token_fisher_curvature/p85": 2.8194335754960775e-10, "token_fisher_curvature/p90": 9.220093488693237e-08, "token_fisher_curvature/p95": 0.01226806640625, "token_fisher_curvature/p99": 427320.0, "token_fisher_curvature/var": 20115810680832.0, "token_fisher_kl_divergence": 8.508566679665819e-06, "token_fisher_kl_divergence/max": 0.0045166015625, "token_fisher_kl_divergence/median": 4.279570410823989e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 6.69561582228375e-35, "token_fisher_kl_divergence/p75": 9.512577044359818e-24, "token_fisher_kl_divergence/p85": 8.470329472543003e-21, "token_fisher_kl_divergence/p90": 2.778268066994105e-18, "token_fisher_kl_divergence/p95": 3.694822225952521e-13, "token_fisher_kl_divergence/p99": 1.285388134419918e-05, "token_fisher_kl_divergence/var": 1.8264417533941923e-08, "token_full_update_term": 0.0003507932706270367, "token_full_update_term/max": 0.09912109375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -8.791685104370117e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.373101715136272e-15, "token_full_update_term/p85": 5.966893645847904e-13, "token_full_update_term/p90": 1.693933882052079e-11, "token_full_update_term/p95": 1.1583324521780014e-08, "token_full_update_term/p99": 0.004386305809020996, "token_full_update_term/var": 1.7392923837178387e-05, "token_hessian_coeff": 24012.103515625, "token_hessian_coeff/max": 149946368.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -19529728.0, "token_hessian_coeff/p25": -3.557652235031128e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.006805419921875, "token_hessian_coeff/var": 13182450008064.0, "token_hessian_coeff_abs": 254341.515625, "token_hessian_coeff_abs/max": 149946368.0, "token_hessian_coeff_abs/median": 1.0402345651527867e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.5331974029541016e-06, "token_hessian_coeff_abs/p99": 5397888.0, "token_hessian_coeff_abs/var": 13118333779968.0 }, { "accuracy_reward": 0.6041666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24166668951511383, "adam_stats/lm_head/lr_effective_max": 3.317523805890232e-05, "adam_stats/lm_head/lr_effective_mean": -2.636081977702709e-11, "adam_stats/lm_head/lr_effective_min": -3.470580486464314e-05, "adam_stats/lm_head/lr_effective_std": 8.381729230677593e-07, "adam_stats/lr_effective_max": 3.6052882933290675e-05, "adam_stats/lr_effective_mean": 5.821948478867967e-11, "adam_stats/lr_effective_min": -3.664729229058139e-05, "adam_stats/m_t_max": 0.0005139491404406726, "adam_stats/m_t_mean": -4.294098239422846e-14, "adam_stats/m_t_min": -0.00037932945997454226, "adam_stats/v_t_max": 2.5410816306248307e-05, "adam_stats/v_t_mean": 1.726507825594581e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6041666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24166668951511383, "all_logprobs": -0.010037346743047237, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.15625, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -1.1920928955078125e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00012159347534179688, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.016242342069745064, "clip_ratio": 0.0, "completion_length": 522.9479370117188, "completion_length/correct": 458.7930908203125, "completion_length/correct/max": 831.0, "completion_length/correct/median": 454.0, "completion_length/correct/min": 258.0, "completion_length/correct/p25": 438.25, "completion_length/correct/p75": 528.25, "completion_length/correct/var": 12641.95703125, "completion_length/incorrect": 620.868408203125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 522.0, "completion_length/incorrect/min": 254.0, "completion_length/incorrect/p25": 278.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 112975.09375, "completion_length/max": 1024.0, "completion_length/median": 465.0, "completion_length/min": 254.0, "completion_length/p25": 338.0, "completion_length/p75": 562.25, "completion_length/var": 57934.19140625, "curvature_clip_ratio_token_fisher": 0.007668864447623491, "curvature_clip_ratio_token_hessian": 0.006194849032908678, "curvature_clip_ratio_total_fisher": 0.007668864447623491, "curvature_clip_ratio_total_full": 0.007668864447623491, "curvature_clip_ratio_total_hessian": 0.006194849032908678, "epoch": 0.0896, "feature_vector_variance/max_squared_error": 69260.3671875, "feature_vector_variance/metric": 31484.3828125, "generated_tokens/total": 3192777.0, "global_fisher_curvature": 130560.0, "global_fisher_curvature/max": 130560.0, "global_fisher_curvature/median": 130560.0, "global_fisher_curvature/min": 130560.0, "global_fisher_curvature/p25": 130560.0, "global_fisher_curvature/p75": 130560.0, "global_fisher_curvature/p85": 130560.0, "global_fisher_curvature/p90": 130560.0, "global_fisher_curvature/p95": 130560.0, "global_fisher_curvature/p99": 130560.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.6656856536865234e-06, "global_fisher_kl_divergence/max": 3.6656856536865234e-06, "global_fisher_kl_divergence/median": 3.6656856536865234e-06, "global_fisher_kl_divergence/min": 3.6656856536865234e-06, "global_fisher_kl_divergence/p25": 3.6656856536865234e-06, "global_fisher_kl_divergence/p75": 3.6656856536865234e-06, "global_fisher_kl_divergence/p85": 3.6656856536865234e-06, "global_fisher_kl_divergence/p90": 3.6656856536865234e-06, "global_fisher_kl_divergence/p95": 3.6656856536865234e-06, "global_fisher_kl_divergence/p99": 3.6656856536865234e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.04931640625, "global_full_update_term/max": 0.04931640625, "global_full_update_term/median": 0.04931640625, "global_full_update_term/min": 0.04931640625, "global_full_update_term/p25": 0.04931640625, "global_full_update_term/p75": 0.04931640625, "global_full_update_term/p85": 0.04931640625, "global_full_update_term/p90": 0.04931640625, "global_full_update_term/p95": 0.04931640625, "global_full_update_term/p99": 0.04931640625, "global_full_update_term/var": NaN, "global_hessian_coeff": 19968.0, "global_hessian_coeff/max": 19968.0, "global_hessian_coeff/median": 19968.0, "global_hessian_coeff/min": 19968.0, "global_hessian_coeff/p25": 19968.0, "global_hessian_coeff/p75": 19968.0, "global_hessian_coeff/p99": 19968.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 19968.0, "global_hessian_coeff_abs/max": 19968.0, "global_hessian_coeff_abs/median": 19968.0, "global_hessian_coeff_abs/min": 19968.0, "global_hessian_coeff_abs/p25": 19968.0, "global_hessian_coeff_abs/p75": 19968.0, "global_hessian_coeff_abs/p99": 19968.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05121906101703644, "learning_rate": 7.238253774731245e-06, "loss": -0.6042, "masked_global_fisher_curvature": 440.0, "masked_global_fisher_curvature/max": 440.0, "masked_global_fisher_curvature/median": 440.0, "masked_global_fisher_curvature/min": 440.0, "masked_global_fisher_curvature/p25": 440.0, "masked_global_fisher_curvature/p75": 440.0, "masked_global_fisher_curvature/p85": 440.0, "masked_global_fisher_curvature/p90": 440.0, "masked_global_fisher_curvature/p95": 440.0, "masked_global_fisher_curvature/p99": 440.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/max": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/median": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/min": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/p25": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/p75": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/p85": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/p90": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/p95": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/p99": 1.2398231774568558e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0030975341796875, "masked_global_full_update_term/max": 0.0030975341796875, "masked_global_full_update_term/median": 0.0030975341796875, "masked_global_full_update_term/min": 0.0030975341796875, "masked_global_full_update_term/p25": 0.0030975341796875, "masked_global_full_update_term/p75": 0.0030975341796875, "masked_global_full_update_term/p85": 0.0030975341796875, "masked_global_full_update_term/p90": 0.0030975341796875, "masked_global_full_update_term/p95": 0.0030975341796875, "masked_global_full_update_term/p99": 0.0030975341796875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -5760.0, "masked_global_hessian_coeff/max": -5760.0, "masked_global_hessian_coeff/median": -5760.0, "masked_global_hessian_coeff/min": -5760.0, "masked_global_hessian_coeff/p25": -5760.0, "masked_global_hessian_coeff/p75": -5760.0, "masked_global_hessian_coeff/p99": -5760.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 5760.0, "masked_global_hessian_coeff_abs/max": 5760.0, "masked_global_hessian_coeff_abs/median": 5760.0, "masked_global_hessian_coeff_abs/min": 5760.0, "masked_global_hessian_coeff_abs/p25": 5760.0, "masked_global_hessian_coeff_abs/p75": 5760.0, "masked_global_hessian_coeff_abs/p99": 5760.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.7017822265625, "masked_per_sentence_gradient_norm/max": 10.4375, "masked_per_sentence_gradient_norm/median": 1.4609375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.62109375, "masked_per_sentence_gradient_norm/var": 3.662893533706665, "masked_per_token_gradient_norm": 0.04159514233469963, "masked_per_token_gradient_norm/max": 15.1875, "masked_per_token_gradient_norm/median": 9.575673587391975e-16, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 7.30506144464016e-09, "masked_per_token_gradient_norm/var": 0.31394779682159424, "masked_sentence_fisher_curvature": 620.4791870117188, "masked_sentence_fisher_curvature/max": 2080.0, "masked_sentence_fisher_curvature/median": 484.0, "masked_sentence_fisher_curvature/min": 15.1875, "masked_sentence_fisher_curvature/p25": 174.75, "masked_sentence_fisher_curvature/p75": 1039.0, "masked_sentence_fisher_curvature/p85": 1160.0, "masked_sentence_fisher_curvature/p90": 1356.0, "masked_sentence_fisher_curvature/p95": 1436.0, "masked_sentence_fisher_curvature/p99": 1715.201171875, "masked_sentence_fisher_curvature/var": 238499.140625, "masked_sentence_fisher_kl_divergence": 1.7444108380004764e-08, "masked_sentence_fisher_kl_divergence/max": 5.844049155712128e-08, "masked_sentence_fisher_kl_divergence/median": 1.3620592653751373e-08, "masked_sentence_fisher_kl_divergence/min": 4.2746250983327627e-10, "masked_sentence_fisher_kl_divergence/p25": 4.9112713895738125e-09, "masked_sentence_fisher_kl_divergence/p75": 2.916203811764717e-08, "masked_sentence_fisher_kl_divergence/p85": 3.259629011154175e-08, "masked_sentence_fisher_kl_divergence/p90": 3.8067810237407684e-08, "masked_sentence_fisher_kl_divergence/p95": 4.039611667394638e-08, "masked_sentence_fisher_kl_divergence/p99": 4.826582511441302e-08, "masked_sentence_fisher_kl_divergence/var": 1.8853665651103103e-16, "masked_sentence_full_gradient_variance/max_squared_error": 6.303027629852295, "masked_sentence_full_gradient_variance/metric": 6.303027629852295, "masked_sentence_full_gradient_variance/p75": 6.303027629852295, "masked_sentence_full_gradient_variance/p90": 6.303027629852295, "masked_sentence_full_gradient_variance/p95": 6.303027629852295, "masked_sentence_full_gradient_variance/p99": 6.303027629852295, "masked_sentence_full_update_term": 0.0008536776294931769, "masked_sentence_full_update_term/max": 0.0048828125, "masked_sentence_full_update_term/median": 0.000701904296875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0013332366943359375, "masked_sentence_full_update_term/p85": 0.0018367767333984375, "masked_sentence_full_update_term/p90": 0.002044677734375, "masked_sentence_full_update_term/p95": 0.002674102783203125, "masked_sentence_full_update_term/p99": 0.003433232195675373, "masked_sentence_full_update_term/var": 9.25693257158855e-07, "masked_sentence_hessian_coeff": -15882.6669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -14272.0, "masked_sentence_hessian_coeff/min": -48896.0, "masked_sentence_hessian_coeff/p25": -29056.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 219575936.0, "masked_sentence_hessian_coeff_abs": 15882.6669921875, "masked_sentence_hessian_coeff_abs/max": 48896.0, "masked_sentence_hessian_coeff_abs/median": 14080.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 29056.0, "masked_sentence_hessian_coeff_abs/p99": 40870.42578125, "masked_sentence_hessian_coeff_abs/var": 219575936.0, "masked_token_fisher_curvature": 754.3756103515625, "masked_token_fisher_curvature/max": 354304.0, "masked_token_fisher_curvature/median": 1.2536087619363645e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 3.799860938915471e-24, "masked_token_fisher_curvature/p75": 2.1316282072803006e-13, "masked_token_fisher_curvature/p85": 1.07775122160092e-10, "masked_token_fisher_curvature/p90": 1.6367266653105617e-08, "masked_token_fisher_curvature/p95": 0.000244140625, "masked_token_fisher_curvature/p99": 3642.5, "masked_token_fisher_curvature/var": 142309696.0, "masked_token_fisher_kl_divergence": 2.1202646749429732e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 3.530152550864028e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.0682892660272949e-34, "masked_token_fisher_kl_divergence/p75": 5.997059441009451e-24, "masked_token_fisher_kl_divergence/p85": 3.0307897643942934e-21, "masked_token_fisher_kl_divergence/p90": 4.594094947670511e-19, "masked_token_fisher_kl_divergence/p95": 6.855627177060342e-15, "masked_token_fisher_kl_divergence/p99": 1.0236362868454307e-07, "masked_token_fisher_kl_divergence/var": 1.124027086685242e-13, "masked_token_full_update_term": 1.3068147382000461e-05, "masked_token_full_update_term/max": 0.0042724609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.0728836059570312e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.544975507059235e-16, "masked_token_full_update_term/p85": 2.7711166694643907e-13, "masked_token_full_update_term/p90": 8.01492205937393e-12, "masked_token_full_update_term/p95": 6.77744083077414e-10, "masked_token_full_update_term/p99": 5.626678466796875e-05, "masked_token_full_update_term/var": 3.194429964992196e-08, "masked_token_hessian_coeff": -20082.28515625, "masked_token_hessian_coeff/max": 96.5, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -6750208.0, "masked_token_hessian_coeff/p25": -1.2293457984924316e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.00127410888671875, "masked_token_hessian_coeff/var": 75081007104.0, "masked_token_hessian_coeff_abs": 20082.318359375, "masked_token_hessian_coeff_abs/max": 6750208.0, "masked_token_hessian_coeff_abs/median": 4.5075054799781356e-14, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.3262033462524414e-06, "masked_token_hessian_coeff_abs/p99": 43776.0, "masked_token_hessian_coeff_abs/var": 75081007104.0, "mean_logprobs": -0.00946044921875, "mean_logprobs/var": 6.4849853515625e-05, "num_completions/total": 5376, "per_sentence_gradient_norm": 35.07421875, "per_sentence_gradient_norm/max": 191.0, "per_sentence_gradient_norm/median": 25.875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 52.25, "per_sentence_gradient_norm/var": 1767.3197021484375, "per_token_feature_norm": 191.2540740966797, "per_token_feature_norm/max": 276.0, "per_token_feature_norm/median": 191.0, "per_token_feature_norm/min": 96.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 198.0, "per_token_feature_norm/var": 160.9635009765625, "per_token_gradient_norm": 0.7516607642173767, "per_token_gradient_norm/max": 280.0, "per_token_gradient_norm/median": 1.5334955527634975e-15, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 9.138602763414383e-09, "per_token_gradient_norm/var": 103.5406494140625, "per_token_policy_error_norm": 0.005702901165932417, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005589855369180441, "policy_entropy": 0.010377095080912113, "policy_entropy/max": 2.21875, "policy_entropy/median": 5.566107574850321e-10, "policy_entropy/min": 4.319868030996932e-20, "policy_entropy/p25": 1.3216094885137863e-12, "policy_entropy/p75": 1.0337680578231812e-07, "policy_entropy/var": 0.0065057724714279175, "policy_loss": -0.6041666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24166668951511383, "policy_sharpness": 9.725211143493652, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7826792001724243, "reward": 0.6041666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24166668951511383, "rewards/accuracy_reward": 0.6041666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24166668951511383, "sentence_fisher_curvature": 233353.71875, "sentence_fisher_curvature/max": 1228800.0, "sentence_fisher_curvature/median": 166912.0, "sentence_fisher_curvature/min": 15.1875, "sentence_fisher_curvature/p25": 263.75, "sentence_fisher_curvature/p75": 393216.0, "sentence_fisher_curvature/p85": 491520.0, "sentence_fisher_curvature/p90": 530432.0, "sentence_fisher_curvature/p95": 655360.0, "sentence_fisher_curvature/p99": 991437.5625, "sentence_fisher_curvature/var": 63932551168.0, "sentence_fisher_kl_divergence": 6.560208476003027e-06, "sentence_fisher_kl_divergence/max": 3.457069396972656e-05, "sentence_fisher_kl_divergence/median": 4.708766937255859e-06, "sentence_fisher_kl_divergence/min": 4.2746250983327627e-10, "sentence_fisher_kl_divergence/p25": 7.399648893624544e-09, "sentence_fisher_kl_divergence/p75": 1.1071562767028809e-05, "sentence_fisher_kl_divergence/p85": 1.3828277587890625e-05, "sentence_fisher_kl_divergence/p90": 1.4901161193847656e-05, "sentence_fisher_kl_divergence/p95": 1.84476375579834e-05, "sentence_fisher_kl_divergence/p99": 2.7889034754480235e-05, "sentence_fisher_kl_divergence/var": 5.0532449752394015e-11, "sentence_full_gradient_variance/max_squared_error": 2936.36083984375, "sentence_full_gradient_variance/metric": 2936.36083984375, "sentence_full_gradient_variance/p75": 2936.36083984375, "sentence_full_gradient_variance/p90": 2936.36083984375, "sentence_full_gradient_variance/p95": 2936.36083984375, "sentence_full_gradient_variance/p99": 2936.36083984375, "sentence_full_update_term": 0.019201278686523438, "sentence_full_update_term/max": 0.11962890625, "sentence_full_update_term/median": 0.015625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.028900146484375, "sentence_full_update_term/p85": 0.039794921875, "sentence_full_update_term/p90": 0.04443359375, "sentence_full_update_term/p95": 0.0634765625, "sentence_full_update_term/p99": 0.08344738185405731, "sentence_full_update_term/var": 0.0004914114251732826, "sentence_hessian_coeff": 18843.333984375, "sentence_hessian_coeff/max": 622592.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -415744.0, "sentence_hessian_coeff/p25": -29824.0, "sentence_hessian_coeff/p75": 72192.0, "sentence_hessian_coeff/p99": 486400.4375, "sentence_hessian_coeff/var": 24582666240.0, "sentence_hessian_coeff_abs": 91570.0, "sentence_hessian_coeff_abs/max": 622592.0, "sentence_hessian_coeff_abs/median": 34560.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 135168.0, "sentence_hessian_coeff_abs/p99": 486400.4375, "sentence_hessian_coeff_abs/var": 16468147200.0, "step": 56, "token_fisher_curvature": 209699.25, "token_fisher_curvature/max": 150994944.0, "token_fisher_curvature/median": 1.4907779871675686e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 4.239300639334267e-24, "token_fisher_curvature/p75": 2.877698079828406e-13, "token_fisher_curvature/p85": 1.8917489796876907e-10, "token_fisher_curvature/p90": 4.349203663878143e-08, "token_fisher_curvature/p95": 0.0019508004188537598, "token_fisher_curvature/p99": 125360.0, "token_fisher_curvature/var": 15372758548480.0, "token_fisher_kl_divergence": 5.896081802347908e-06, "token_fisher_kl_divergence/max": 0.004241943359375, "token_fisher_kl_divergence/median": 4.200684320301888e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.1886598875514971e-34, "token_fisher_kl_divergence/p75": 8.116709760676584e-24, "token_fisher_kl_divergence/p85": 5.320425699941074e-21, "token_fisher_kl_divergence/p90": 1.2251272790849387e-18, "token_fisher_kl_divergence/p95": 5.477736320091964e-14, "token_fisher_kl_divergence/p99": 3.529246896505356e-06, "token_fisher_kl_divergence/var": 1.2146995409523242e-08, "token_full_update_term": 0.00025306493625976145, "token_full_update_term/max": 0.0966796875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.0728836059570312e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 7.73686670285656e-16, "token_full_update_term/p85": 4.1033842990145786e-13, "token_full_update_term/p90": 1.2391865311656147e-11, "token_full_update_term/p95": 2.153683453798294e-09, "token_full_update_term/p99": 0.002090156078338623, "token_full_update_term/var": 1.2077030078216922e-05, "token_hessian_coeff": 11023.234375, "token_hessian_coeff/max": 153092096.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -19922944.0, "token_hessian_coeff/p25": -1.5366822481155396e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.003021240234375, "token_hessian_coeff/var": 10076059336704.0, "token_hessian_coeff_abs": 202240.65625, "token_hessian_coeff_abs/max": 153092096.0, "token_hessian_coeff_abs/median": 7.638334409421077e-14, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.6540288925170898e-06, "token_hessian_coeff_abs/p99": 2588352.0, "token_hessian_coeff_abs/var": 10035279167488.0 }, { "accuracy_reward": 0.625, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2368421107530594, "adam_stats/lm_head/lr_effective_max": 3.2485349947819486e-05, "adam_stats/lm_head/lr_effective_mean": -4.1225654834331493e-11, "adam_stats/lm_head/lr_effective_min": -3.311014734208584e-05, "adam_stats/lm_head/lr_effective_std": 7.973550850692845e-07, "adam_stats/lr_effective_max": 3.5914621548727155e-05, "adam_stats/lr_effective_mean": 7.221759995568178e-11, "adam_stats/lr_effective_min": -3.4438231523381546e-05, "adam_stats/m_t_max": 0.0004553062899503857, "adam_stats/m_t_mean": -1.3644448526757558e-13, "adam_stats/m_t_min": -0.0003673039027489722, "adam_stats/v_t_max": 2.5385805201949552e-05, "adam_stats/v_t_mean": 1.7249747637226864e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.625, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2368421107530594, "all_logprobs": -0.00979045033454895, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -4.5, "all_logprobs/p1": -0.2178613245487213, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0001583099365234375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.011956948786973953, "clip_ratio": 0.0, "completion_length": 514.5625, "completion_length/correct": 446.0333557128906, "completion_length/correct/max": 929.0, "completion_length/correct/median": 395.0, "completion_length/correct/min": 184.0, "completion_length/correct/p25": 343.0, "completion_length/correct/p75": 530.5, "completion_length/correct/var": 34030.203125, "completion_length/incorrect": 628.7777709960938, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 532.0, "completion_length/incorrect/min": 302.0, "completion_length/incorrect/p25": 387.0, "completion_length/incorrect/p75": 876.5, "completion_length/incorrect/var": 70365.546875, "completion_length/max": 1024.0, "completion_length/median": 415.0, "completion_length/min": 184.0, "completion_length/p25": 349.0, "completion_length/p75": 676.75, "completion_length/var": 54968.1640625, "curvature_clip_ratio_token_fisher": 0.009089436382055283, "curvature_clip_ratio_token_hessian": 0.0069233570247888565, "curvature_clip_ratio_total_fisher": 0.009089436382055283, "curvature_clip_ratio_total_full": 0.009089436382055283, "curvature_clip_ratio_total_hessian": 0.0069233570247888565, "epoch": 0.0912, "feature_vector_variance/max_squared_error": 59261.1640625, "feature_vector_variance/metric": 31269.23046875, "generated_tokens/total": 3242175.0, "global_fisher_curvature": 129536.0, "global_fisher_curvature/max": 129536.0, "global_fisher_curvature/median": 129536.0, "global_fisher_curvature/min": 129536.0, "global_fisher_curvature/p25": 129536.0, "global_fisher_curvature/p75": 129536.0, "global_fisher_curvature/p85": 129536.0, "global_fisher_curvature/p90": 129536.0, "global_fisher_curvature/p95": 129536.0, "global_fisher_curvature/p99": 129536.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.3974647521972656e-06, "global_fisher_kl_divergence/max": 3.3974647521972656e-06, "global_fisher_kl_divergence/median": 3.3974647521972656e-06, "global_fisher_kl_divergence/min": 3.3974647521972656e-06, "global_fisher_kl_divergence/p25": 3.3974647521972656e-06, "global_fisher_kl_divergence/p75": 3.3974647521972656e-06, "global_fisher_kl_divergence/p85": 3.3974647521972656e-06, "global_fisher_kl_divergence/p90": 3.3974647521972656e-06, "global_fisher_kl_divergence/p95": 3.3974647521972656e-06, "global_fisher_kl_divergence/p99": 3.3974647521972656e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.038330078125, "global_full_update_term/max": 0.038330078125, "global_full_update_term/median": 0.038330078125, "global_full_update_term/min": 0.038330078125, "global_full_update_term/p25": 0.038330078125, "global_full_update_term/p75": 0.038330078125, "global_full_update_term/p85": 0.038330078125, "global_full_update_term/p90": 0.038330078125, "global_full_update_term/p95": 0.038330078125, "global_full_update_term/p99": 0.038330078125, "global_full_update_term/var": NaN, "global_hessian_coeff": 37120.0, "global_hessian_coeff/max": 37120.0, "global_hessian_coeff/median": 37120.0, "global_hessian_coeff/min": 37120.0, "global_hessian_coeff/p25": 37120.0, "global_hessian_coeff/p75": 37120.0, "global_hessian_coeff/p99": 37120.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 37120.0, "global_hessian_coeff_abs/max": 37120.0, "global_hessian_coeff_abs/median": 37120.0, "global_hessian_coeff_abs/min": 37120.0, "global_hessian_coeff_abs/p25": 37120.0, "global_hessian_coeff_abs/p75": 37120.0, "global_hessian_coeff_abs/p99": 37120.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04906139150261879, "learning_rate": 6.976826446919061e-06, "loss": -0.625, "masked_global_fisher_curvature": 896.0, "masked_global_fisher_curvature/max": 896.0, "masked_global_fisher_curvature/median": 896.0, "masked_global_fisher_curvature/min": 896.0, "masked_global_fisher_curvature/p25": 896.0, "masked_global_fisher_curvature/p75": 896.0, "masked_global_fisher_curvature/p85": 896.0, "masked_global_fisher_curvature/p90": 896.0, "masked_global_fisher_curvature/p95": 896.0, "masked_global_fisher_curvature/p99": 896.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/max": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/median": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/min": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/p25": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/p75": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/p85": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/p90": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/p95": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/p99": 2.3515895009040833e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0023956298828125, "masked_global_full_update_term/max": 0.0023956298828125, "masked_global_full_update_term/median": 0.0023956298828125, "masked_global_full_update_term/min": 0.0023956298828125, "masked_global_full_update_term/p25": 0.0023956298828125, "masked_global_full_update_term/p75": 0.0023956298828125, "masked_global_full_update_term/p85": 0.0023956298828125, "masked_global_full_update_term/p90": 0.0023956298828125, "masked_global_full_update_term/p95": 0.0023956298828125, "masked_global_full_update_term/p99": 0.0023956298828125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -4352.0, "masked_global_hessian_coeff/max": -4352.0, "masked_global_hessian_coeff/median": -4352.0, "masked_global_hessian_coeff/min": -4352.0, "masked_global_hessian_coeff/p25": -4352.0, "masked_global_hessian_coeff/p75": -4352.0, "masked_global_hessian_coeff/p99": -4352.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 4352.0, "masked_global_hessian_coeff_abs/max": 4352.0, "masked_global_hessian_coeff_abs/median": 4352.0, "masked_global_hessian_coeff_abs/min": 4352.0, "masked_global_hessian_coeff_abs/p25": 4352.0, "masked_global_hessian_coeff_abs/p75": 4352.0, "masked_global_hessian_coeff_abs/p99": 4352.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.6210989952087402, "masked_per_sentence_gradient_norm/max": 10.375, "masked_per_sentence_gradient_norm/median": 0.7421875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 4.3671875, "masked_per_sentence_gradient_norm/var": 11.137917518615723, "masked_per_token_gradient_norm": 0.04098312184214592, "masked_per_token_gradient_norm/max": 14.0625, "masked_per_token_gradient_norm/median": 3.7192471324942744e-15, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 6.868503987789154e-09, "masked_per_token_gradient_norm/var": 0.30532851815223694, "masked_sentence_fisher_curvature": 714.33984375, "masked_sentence_fisher_curvature/max": 3296.0, "masked_sentence_fisher_curvature/median": 552.0, "masked_sentence_fisher_curvature/min": 6.375, "masked_sentence_fisher_curvature/p25": 251.0, "masked_sentence_fisher_curvature/p75": 1015.0, "masked_sentence_fisher_curvature/p85": 1152.0, "masked_sentence_fisher_curvature/p90": 1328.0, "masked_sentence_fisher_curvature/p95": 2084.0, "masked_sentence_fisher_curvature/p99": 2916.001220703125, "masked_sentence_fisher_curvature/var": 419056.03125, "masked_sentence_fisher_kl_divergence": 1.87222717329405e-08, "masked_sentence_fisher_kl_divergence/max": 8.614733815193176e-08, "masked_sentence_fisher_kl_divergence/median": 1.4435499906539917e-08, "masked_sentence_fisher_kl_divergence/min": 1.673470251262188e-10, "masked_sentence_fisher_kl_divergence/p25": 6.570189725607634e-09, "masked_sentence_fisher_kl_divergence/p75": 2.6600901037454605e-08, "masked_sentence_fisher_kl_divergence/p85": 3.026798367500305e-08, "masked_sentence_fisher_kl_divergence/p90": 3.480818122625351e-08, "masked_sentence_fisher_kl_divergence/p95": 5.459878593683243e-08, "masked_sentence_fisher_kl_divergence/p99": 7.641504851108039e-08, "masked_sentence_fisher_kl_divergence/var": 2.878852700084961e-16, "masked_sentence_full_gradient_variance/max_squared_error": 17.079557418823242, "masked_sentence_full_gradient_variance/metric": 17.079557418823242, "masked_sentence_full_gradient_variance/p75": 17.079557418823242, "masked_sentence_full_gradient_variance/p90": 17.079557418823242, "masked_sentence_full_gradient_variance/p95": 17.079557418823242, "masked_sentence_full_gradient_variance/p99": 17.079557418823242, "masked_sentence_full_update_term": 0.0012308111181482673, "masked_sentence_full_update_term/max": 0.005584716796875, "masked_sentence_full_update_term/median": 0.00020694732666015625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.002460479736328125, "masked_sentence_full_update_term/p85": 0.00347137451171875, "masked_sentence_full_update_term/p90": 0.003509521484375, "masked_sentence_full_update_term/p95": 0.00397491455078125, "masked_sentence_full_update_term/p99": 0.005091859493404627, "masked_sentence_full_update_term/var": 2.3906527530925814e-06, "masked_sentence_hessian_coeff": -15380.6669921875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -7456.0, "masked_sentence_hessian_coeff/min": -84480.0, "masked_sentence_hessian_coeff/p25": -26368.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 347888640.0, "masked_sentence_hessian_coeff_abs": 15380.6669921875, "masked_sentence_hessian_coeff_abs/max": 84480.0, "masked_sentence_hessian_coeff_abs/median": 7456.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 26368.0, "masked_sentence_hessian_coeff_abs/p99": 72320.0390625, "masked_sentence_hessian_coeff_abs/var": 347888640.0, "masked_token_fisher_curvature": 831.9150390625, "masked_token_fisher_curvature/max": 374784.0, "masked_token_fisher_curvature/median": 6.2341624917916505e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.468617140587942e-24, "masked_token_fisher_curvature/p75": 1.1368683772161603e-13, "masked_token_fisher_curvature/p85": 8.185452315956354e-11, "masked_token_fisher_curvature/p90": 1.3969838619232178e-08, "masked_token_fisher_curvature/p95": 0.00035858154296875, "masked_token_fisher_curvature/p99": 3296.625, "masked_token_fisher_curvature/var": 170364032.0, "masked_token_fisher_kl_divergence": 2.1793869819930478e-08, "masked_token_fisher_kl_divergence/max": 9.834766387939453e-06, "masked_token_fisher_kl_divergence/median": 1.6368863783335995e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 6.46992090692587e-35, "masked_token_fisher_kl_divergence/p75": 2.9726803263624432e-24, "masked_token_fisher_kl_divergence/p85": 2.1440521477374477e-21, "masked_token_fisher_kl_divergence/p90": 3.6591823321385775e-19, "masked_token_fisher_kl_divergence/p95": 9.381384558082573e-15, "masked_token_fisher_kl_divergence/p99": 8.638926374260336e-08, "masked_token_fisher_kl_divergence/var": 1.1692228673969296e-13, "masked_token_full_update_term": 1.2630424862436485e-05, "masked_token_full_update_term/max": 0.004180908203125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.516674041748047e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.972516759986888e-16, "masked_token_full_update_term/p85": 2.3092638912203256e-13, "masked_token_full_update_term/p90": 5.201172825763933e-12, "masked_token_full_update_term/p95": 6.315445943982922e-10, "masked_token_full_update_term/p99": 5.7821162045001984e-05, "masked_token_full_update_term/var": 3.033891715631398e-08, "masked_token_hessian_coeff": -20182.037109375, "masked_token_hessian_coeff/max": 544.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -6946816.0, "masked_token_hessian_coeff/p25": -1.257285475730896e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0009713619947433472, "masked_token_hessian_coeff/var": 77675364352.0, "masked_token_hessian_coeff_abs": 20182.091796875, "masked_token_hessian_coeff_abs/max": 6946816.0, "masked_token_hessian_coeff_abs/median": 1.9095836023552692e-13, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.1771917343139648e-06, "masked_token_hessian_coeff_abs/p99": 53248.0, "masked_token_hessian_coeff_abs/var": 77675364352.0, "mean_logprobs": -0.00994873046875, "mean_logprobs/var": 2.5391578674316406e-05, "num_completions/total": 5472, "per_sentence_gradient_norm": 46.380210876464844, "per_sentence_gradient_norm/max": 241.0, "per_sentence_gradient_norm/median": 28.125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 70.375, "per_sentence_gradient_norm/var": 2992.710205078125, "per_token_feature_norm": 190.390380859375, "per_token_feature_norm/max": 253.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 103.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 152.80018615722656, "per_token_gradient_norm": 0.8778712153434753, "per_token_gradient_norm/max": 274.0, "per_token_gradient_norm/median": 5.88418203051333e-15, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 8.847564458847046e-09, "per_token_gradient_norm/var": 122.65068817138672, "per_token_policy_error_norm": 0.0060492390766739845, "per_token_policy_error_norm/max": 1.953125, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005546597298234701, "policy_entropy": 0.010645248927175999, "policy_entropy/max": 1.453125, "policy_entropy/median": 4.3291947804391384e-10, "policy_entropy/min": 7.940933880509066e-21, "policy_entropy/p25": 9.379164112033322e-13, "policy_entropy/p75": 9.546056389808655e-08, "policy_entropy/var": 0.005483812186866999, "policy_loss": -0.625, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2368421107530594, "policy_sharpness": 9.706287384033203, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.927004098892212, "reward": 0.625, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2368421107530594, "rewards/accuracy_reward": 0.625, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2368421107530594, "sentence_fisher_curvature": 279210.53125, "sentence_fisher_curvature/max": 1187840.0, "sentence_fisher_curvature/median": 127488.0, "sentence_fisher_curvature/min": 14.625, "sentence_fisher_curvature/p25": 552.0, "sentence_fisher_curvature/p75": 520192.0, "sentence_fisher_curvature/p85": 690176.0, "sentence_fisher_curvature/p90": 727040.0, "sentence_fisher_curvature/p95": 788480.0, "sentence_fisher_curvature/p99": 1094451.5, "sentence_fisher_curvature/var": 96945659904.0, "sentence_fisher_kl_divergence": 7.317901236092439e-06, "sentence_fisher_kl_divergence/max": 3.123283386230469e-05, "sentence_fisher_kl_divergence/median": 3.337860107421875e-06, "sentence_fisher_kl_divergence/min": 3.838067641481757e-10, "sentence_fisher_kl_divergence/p25": 1.4435499906539917e-08, "sentence_fisher_kl_divergence/p75": 1.3634562492370605e-05, "sentence_fisher_kl_divergence/p85": 1.8090009689331055e-05, "sentence_fisher_kl_divergence/p90": 1.9073486328125e-05, "sentence_fisher_kl_divergence/p95": 2.065300941467285e-05, "sentence_fisher_kl_divergence/p99": 2.8628119252971373e-05, "sentence_fisher_kl_divergence/var": 6.662251306188693e-11, "sentence_full_gradient_variance/max_squared_error": 5036.16015625, "sentence_full_gradient_variance/metric": 5036.16015625, "sentence_full_gradient_variance/p75": 5036.16015625, "sentence_full_gradient_variance/p90": 5036.16015625, "sentence_full_gradient_variance/p95": 5036.16015625, "sentence_full_gradient_variance/p99": 5036.16015625, "sentence_full_update_term": 0.023876827210187912, "sentence_full_update_term/max": 0.11181640625, "sentence_full_update_term/median": 0.0179443359375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.03814697265625, "sentence_full_update_term/p85": 0.04840087890625, "sentence_full_update_term/p90": 0.0521240234375, "sentence_full_update_term/p95": 0.08984375, "sentence_full_update_term/p99": 0.10393068939447403, "sentence_full_update_term/var": 0.0007438780157826841, "sentence_hessian_coeff": 45203.66796875, "sentence_hessian_coeff/max": 638976.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -264192.0, "sentence_hessian_coeff/p25": -30464.0, "sentence_hessian_coeff/p75": 154368.0, "sentence_hessian_coeff/p99": 576717.0, "sentence_hessian_coeff/var": 29888946176.0, "sentence_hessian_coeff_abs": 113569.671875, "sentence_hessian_coeff_abs/max": 638976.0, "sentence_hessian_coeff_abs/median": 91648.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 189952.0, "sentence_hessian_coeff_abs/p99": 576717.0, "sentence_hessian_coeff_abs/var": 18919991296.0, "step": 57, "token_fisher_curvature": 251712.53125, "token_fisher_curvature/max": 154140672.0, "token_fisher_curvature/median": 7.826584432629735e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.908056841006738e-24, "token_fisher_curvature/p75": 1.5631940186722204e-13, "token_fisher_curvature/p85": 1.673470251262188e-10, "token_fisher_curvature/p90": 5.9138983488082886e-08, "token_fisher_curvature/p95": 0.00933837890625, "token_fisher_curvature/p99": 278912.0, "token_fisher_curvature/var": 18356151255040.0, "token_fisher_kl_divergence": 6.5956960497715045e-06, "token_fisher_kl_divergence/max": 0.0040283203125, "token_fisher_kl_divergence/median": 2.0510383535746307e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 7.63601130294158e-35, "token_fisher_kl_divergence/p75": 4.084204274480574e-24, "token_fisher_kl_divergence/p85": 4.393983413881683e-21, "token_fisher_kl_divergence/p90": 1.5517643593698782e-18, "token_fisher_kl_divergence/p95": 2.4513724383723456e-13, "token_fisher_kl_divergence/p99": 7.311813533306122e-06, "token_fisher_kl_divergence/var": 1.260200033215142e-08, "token_full_update_term": 0.00028753772494383156, "token_full_update_term/max": 0.09375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.516674041748047e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 6.730727086790012e-16, "token_full_update_term/p85": 3.7836400679225335e-13, "token_full_update_term/p90": 9.606537787476555e-12, "token_full_update_term/p95": 3.5070115700364113e-09, "token_full_update_term/p99": 0.0032968521118164062, "token_full_update_term/var": 1.3471580132318195e-05, "token_hessian_coeff": 21968.037109375, "token_hessian_coeff/max": 152043520.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -20185088.0, "token_hessian_coeff/p25": -1.6298145055770874e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.002441883087158203, "token_hessian_coeff/var": 11607859003392.0, "token_hessian_coeff_abs": 226755.1875, "token_hessian_coeff_abs/max": 152043520.0, "token_hessian_coeff_abs/median": 3.1796787425264483e-13, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.5497207641601562e-06, "token_hessian_coeff_abs/p99": 4259840.0, "token_hessian_coeff_abs/var": 11556923375616.0 }, { "accuracy_reward": 0.4270833432674408, "accuracy_reward/correct": 0.9999999403953552, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 0.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24725878238677979, "adam_stats/lm_head/lr_effective_max": 3.2447500416310504e-05, "adam_stats/lm_head/lr_effective_mean": -3.6960629001825396e-11, "adam_stats/lm_head/lr_effective_min": -3.2119089155457914e-05, "adam_stats/lm_head/lr_effective_std": 7.498014724660607e-07, "adam_stats/lr_effective_max": 3.369958358234726e-05, "adam_stats/lr_effective_mean": 2.549320650413467e-11, "adam_stats/lr_effective_min": -3.441675289650448e-05, "adam_stats/m_t_max": 0.0003867920895572752, "adam_stats/m_t_mean": 2.720239127267793e-13, "adam_stats/m_t_min": -0.0003983137139584869, "adam_stats/v_t_max": 2.5361143343616277e-05, "adam_stats/v_t_mean": 1.7235286548650253e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.4270833432674408, "advantages/max": 1.0, "advantages/median": 0.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24725878238677979, "all_logprobs": -0.011811803095042706, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.5, "all_logprobs/p1": -0.251953125, "all_logprobs/p10": -3.6954879760742188e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0004329681396484375, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.018769310787320137, "clip_ratio": 0.0, "completion_length": 490.1875, "completion_length/correct": 430.78045654296875, "completion_length/correct/max": 991.0, "completion_length/correct/median": 346.0, "completion_length/correct/min": 229.0, "completion_length/correct/p25": 302.0, "completion_length/correct/p75": 577.0, "completion_length/correct/var": 26611.125, "completion_length/incorrect": 534.4727172851562, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 457.0, "completion_length/incorrect/min": 215.0, "completion_length/incorrect/p25": 364.5, "completion_length/incorrect/p75": 722.0, "completion_length/incorrect/var": 66258.703125, "completion_length/max": 1024.0, "completion_length/median": 398.0, "completion_length/min": 215.0, "completion_length/p25": 305.75, "completion_length/p75": 580.0, "completion_length/var": 51526.0703125, "curvature_clip_ratio_token_fisher": 0.008160143159329891, "curvature_clip_ratio_token_hessian": 0.0058013517409563065, "curvature_clip_ratio_total_fisher": 0.008160143159329891, "curvature_clip_ratio_total_full": 0.008160143159329891, "curvature_clip_ratio_total_hessian": 0.0058013517409563065, "epoch": 0.0928, "feature_vector_variance/max_squared_error": 61934.30078125, "feature_vector_variance/metric": 31820.560546875, "generated_tokens/total": 3289233.0, "global_fisher_curvature": 149504.0, "global_fisher_curvature/max": 149504.0, "global_fisher_curvature/median": 149504.0, "global_fisher_curvature/min": 149504.0, "global_fisher_curvature/p25": 149504.0, "global_fisher_curvature/p75": 149504.0, "global_fisher_curvature/p85": 149504.0, "global_fisher_curvature/p90": 149504.0, "global_fisher_curvature/p95": 149504.0, "global_fisher_curvature/p99": 149504.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.635883331298828e-06, "global_fisher_kl_divergence/max": 3.635883331298828e-06, "global_fisher_kl_divergence/median": 3.635883331298828e-06, "global_fisher_kl_divergence/min": 3.635883331298828e-06, "global_fisher_kl_divergence/p25": 3.635883331298828e-06, "global_fisher_kl_divergence/p75": 3.635883331298828e-06, "global_fisher_kl_divergence/p85": 3.635883331298828e-06, "global_fisher_kl_divergence/p90": 3.635883331298828e-06, "global_fisher_kl_divergence/p95": 3.635883331298828e-06, "global_fisher_kl_divergence/p99": 3.635883331298828e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.04248046875, "global_full_update_term/max": 0.04248046875, "global_full_update_term/median": 0.04248046875, "global_full_update_term/min": 0.04248046875, "global_full_update_term/p25": 0.04248046875, "global_full_update_term/p75": 0.04248046875, "global_full_update_term/p85": 0.04248046875, "global_full_update_term/p90": 0.04248046875, "global_full_update_term/p95": 0.04248046875, "global_full_update_term/p99": 0.04248046875, "global_full_update_term/var": NaN, "global_hessian_coeff": 59648.0, "global_hessian_coeff/max": 59648.0, "global_hessian_coeff/median": 59648.0, "global_hessian_coeff/min": 59648.0, "global_hessian_coeff/p25": 59648.0, "global_hessian_coeff/p75": 59648.0, "global_hessian_coeff/p99": 59648.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 59648.0, "global_hessian_coeff_abs/max": 59648.0, "global_hessian_coeff_abs/median": 59648.0, "global_hessian_coeff_abs/min": 59648.0, "global_hessian_coeff_abs/p25": 59648.0, "global_hessian_coeff_abs/p75": 59648.0, "global_hessian_coeff_abs/p99": 59648.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.039835959672927856, "learning_rate": 6.7160365254926005e-06, "loss": -0.4271, "masked_global_fisher_curvature": 1472.0, "masked_global_fisher_curvature/max": 1472.0, "masked_global_fisher_curvature/median": 1472.0, "masked_global_fisher_curvature/min": 1472.0, "masked_global_fisher_curvature/p25": 1472.0, "masked_global_fisher_curvature/p75": 1472.0, "masked_global_fisher_curvature/p85": 1472.0, "masked_global_fisher_curvature/p90": 1472.0, "masked_global_fisher_curvature/p95": 1472.0, "masked_global_fisher_curvature/p99": 1472.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/max": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/median": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/min": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/p25": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/p75": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/p85": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/p90": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/p95": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/p99": 3.585591912269592e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0018157958984375, "masked_global_full_update_term/max": 0.0018157958984375, "masked_global_full_update_term/median": 0.0018157958984375, "masked_global_full_update_term/min": 0.0018157958984375, "masked_global_full_update_term/p25": 0.0018157958984375, "masked_global_full_update_term/p75": 0.0018157958984375, "masked_global_full_update_term/p85": 0.0018157958984375, "masked_global_full_update_term/p90": 0.0018157958984375, "masked_global_full_update_term/p95": 0.0018157958984375, "masked_global_full_update_term/p99": 0.0018157958984375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -4608.0, "masked_global_hessian_coeff/max": -4608.0, "masked_global_hessian_coeff/median": -4608.0, "masked_global_hessian_coeff/min": -4608.0, "masked_global_hessian_coeff/p25": -4608.0, "masked_global_hessian_coeff/p75": -4608.0, "masked_global_hessian_coeff/p99": -4608.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 4608.0, "masked_global_hessian_coeff_abs/max": 4608.0, "masked_global_hessian_coeff_abs/median": 4608.0, "masked_global_hessian_coeff_abs/min": 4608.0, "masked_global_hessian_coeff_abs/p25": 4608.0, "masked_global_hessian_coeff_abs/p75": 4608.0, "masked_global_hessian_coeff_abs/p99": 4608.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.1716818809509277, "masked_per_sentence_gradient_norm/max": 13.3125, "masked_per_sentence_gradient_norm/median": 0.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.65625, "masked_per_sentence_gradient_norm/var": 11.39355754852295, "masked_per_token_gradient_norm": 0.032664768397808075, "masked_per_token_gradient_norm/max": 13.625, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.800799509510398e-10, "masked_per_token_gradient_norm/var": 0.2153366506099701, "masked_sentence_fisher_curvature": 876.37548828125, "masked_sentence_fisher_curvature/max": 3232.0, "masked_sentence_fisher_curvature/median": 704.0, "masked_sentence_fisher_curvature/min": 1.7734375, "masked_sentence_fisher_curvature/p25": 467.5, "masked_sentence_fisher_curvature/p75": 1250.0, "masked_sentence_fisher_curvature/p85": 1648.0, "masked_sentence_fisher_curvature/p90": 1720.0, "masked_sentence_fisher_curvature/p95": 2050.0, "masked_sentence_fisher_curvature/p99": 2715.20166015625, "masked_sentence_fisher_curvature/var": 438239.0625, "masked_sentence_fisher_kl_divergence": 2.1325291754692444e-08, "masked_sentence_fisher_kl_divergence/max": 7.869675755500793e-08, "masked_sentence_fisher_kl_divergence/median": 1.7113052308559418e-08, "masked_sentence_fisher_kl_divergence/min": 4.320099833421409e-11, "masked_sentence_fisher_kl_divergence/p25": 1.1365045793354511e-08, "masked_sentence_fisher_kl_divergence/p75": 3.0442606657743454e-08, "masked_sentence_fisher_kl_divergence/p85": 4.0046870708465576e-08, "masked_sentence_fisher_kl_divergence/p90": 4.190951585769653e-08, "masked_sentence_fisher_kl_divergence/p95": 4.988396540284157e-08, "masked_sentence_fisher_kl_divergence/p99": 6.586783030115839e-08, "masked_sentence_fisher_kl_divergence/var": 2.5953753895339765e-16, "masked_sentence_full_gradient_variance/max_squared_error": 15.393420219421387, "masked_sentence_full_gradient_variance/metric": 15.393420219421387, "masked_sentence_full_gradient_variance/p75": 15.393420219421387, "masked_sentence_full_gradient_variance/p90": 15.393420219421387, "masked_sentence_full_gradient_variance/p95": 15.393420219421387, "masked_sentence_full_gradient_variance/p99": 15.393420219421387, "masked_sentence_full_update_term": 0.00110505276825279, "masked_sentence_full_update_term/max": 0.006683349609375, "masked_sentence_full_update_term/median": 0.0, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0019435882568359375, "masked_sentence_full_update_term/p85": 0.002925872802734375, "masked_sentence_full_update_term/p90": 0.0040740966796875, "masked_sentence_full_update_term/p95": 0.005035400390625, "masked_sentence_full_update_term/p99": 0.006364441942423582, "masked_sentence_full_update_term/var": 3.1014992600830738e-06, "masked_sentence_hessian_coeff": -14976.333984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -109056.0, "masked_sentence_hessian_coeff/p25": -27264.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 515610336.0, "masked_sentence_hessian_coeff_abs": 14976.333984375, "masked_sentence_hessian_coeff_abs/max": 109056.0, "masked_sentence_hessian_coeff_abs/median": 0.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 27264.0, "masked_sentence_hessian_coeff_abs/p99": 74035.3125, "masked_sentence_hessian_coeff_abs/var": 515610336.0, "masked_token_fisher_curvature": 886.878173828125, "masked_token_fisher_curvature/max": 399360.0, "masked_token_fisher_curvature/median": 2.417770844642675e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.963083675318166e-23, "masked_token_fisher_curvature/p75": 2.0463630789890885e-12, "masked_token_fisher_curvature/p85": 1.418811734765768e-09, "masked_token_fisher_curvature/p90": 2.0302832126617432e-07, "masked_token_fisher_curvature/p95": 0.00180816650390625, "masked_token_fisher_curvature/p99": 5632.0, "masked_token_fisher_curvature/var": 167582320.0, "masked_token_fisher_kl_divergence": 2.1591201715409625e-08, "masked_token_fisher_kl_divergence/max": 9.715557098388672e-06, "masked_token_fisher_kl_divergence/median": 5.869125134844328e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.2097247463182325e-33, "masked_token_fisher_kl_divergence/p75": 4.983763190631992e-23, "masked_token_fisher_kl_divergence/p85": 3.451659260061274e-20, "masked_token_fisher_kl_divergence/p90": 4.933119884809045e-18, "masked_token_fisher_kl_divergence/p95": 4.39648317751562e-14, "masked_token_fisher_kl_divergence/p99": 1.3690441846847534e-07, "masked_token_fisher_kl_divergence/var": 9.932095564827575e-14, "masked_token_full_update_term": 9.920032425725367e-06, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.6566128730773926e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.8396496823179335e-20, "masked_token_full_update_term/p85": 2.9531932455029164e-14, "masked_token_full_update_term/p90": 1.4210854715202004e-12, "masked_token_full_update_term/p95": 2.8194335754960775e-10, "masked_token_full_update_term/p99": 2.0265579223632812e-05, "masked_token_full_update_term/var": 2.1003156547294566e-08, "masked_token_hessian_coeff": -16759.671875, "masked_token_hessian_coeff/max": 108.5, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -7045120.0, "masked_token_hessian_coeff/p25": -6.889422365929931e-11, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.002155601978302002, "masked_token_hessian_coeff/var": 59789144064.0, "masked_token_hessian_coeff_abs": 16759.6875, "masked_token_hessian_coeff_abs/max": 7045120.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.9453076422214508e-08, "masked_token_hessian_coeff_abs/p99": 15217.5, "masked_token_hessian_coeff_abs/var": 59789144064.0, "mean_logprobs": -0.01214599609375, "mean_logprobs/var": 6.151199340820312e-05, "num_completions/total": 5568, "per_sentence_gradient_norm": 26.46484375, "per_sentence_gradient_norm/max": 181.0, "per_sentence_gradient_norm/median": 0.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 51.6875, "per_sentence_gradient_norm/var": 1409.0528564453125, "per_token_feature_norm": 191.04287719726562, "per_token_feature_norm/max": 256.0, "per_token_feature_norm/median": 191.0, "per_token_feature_norm/min": 110.0, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 198.0, "per_token_feature_norm/var": 160.57826232910156, "per_token_gradient_norm": 0.703943133354187, "per_token_gradient_norm/max": 284.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.8558133635669947e-10, "per_token_gradient_norm/var": 98.16681671142578, "per_token_policy_error_norm": 0.006889768876135349, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006849245633929968, "policy_entropy": 0.012058934196829796, "policy_entropy/max": 1.9765625, "policy_entropy/median": 2.1391315385699272e-09, "policy_entropy/min": 5.770411953169921e-21, "policy_entropy/p25": 3.510081114654895e-12, "policy_entropy/p75": 3.2223761081695557e-07, "policy_entropy/var": 0.006473719608038664, "policy_loss": -0.4270833432674408, "policy_loss/max": 0.0, "policy_loss/median": 0.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24725878238677979, "policy_sharpness": 9.666668891906738, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 2.154916524887085, "reward": 0.4270833432674408, "reward/max": 1.0, "reward/median": 0.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24725878238677979, "rewards/accuracy_reward": 0.4270833432674408, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 0.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24725878238677979, "sentence_fisher_curvature": 248423.25, "sentence_fisher_curvature/max": 1916928.0, "sentence_fisher_curvature/median": 4192.0, "sentence_fisher_curvature/min": 23.375, "sentence_fisher_curvature/p25": 687.0, "sentence_fisher_curvature/p75": 471552.0, "sentence_fisher_curvature/p85": 641024.0, "sentence_fisher_curvature/p90": 708608.0, "sentence_fisher_curvature/p95": 873472.0, "sentence_fisher_curvature/p99": 1138690.5, "sentence_fisher_curvature/var": 127476531200.0, "sentence_fisher_kl_divergence": 6.046768248779699e-06, "sentence_fisher_kl_divergence/max": 4.673004150390625e-05, "sentence_fisher_kl_divergence/median": 1.019798219203949e-07, "sentence_fisher_kl_divergence/min": 5.675246939063072e-10, "sentence_fisher_kl_divergence/p25": 1.673470251262188e-08, "sentence_fisher_kl_divergence/p75": 1.1473894119262695e-05, "sentence_fisher_kl_divergence/p85": 1.558661460876465e-05, "sentence_fisher_kl_divergence/p90": 1.722574234008789e-05, "sentence_fisher_kl_divergence/p95": 2.1278858184814453e-05, "sentence_fisher_kl_divergence/p99": 2.770430000964552e-05, "sentence_fisher_kl_divergence/var": 7.55744633540445e-11, "sentence_full_gradient_variance/max_squared_error": 2051.26318359375, "sentence_full_gradient_variance/metric": 2051.26318359375, "sentence_full_gradient_variance/p75": 2051.26318359375, "sentence_full_gradient_variance/p90": 2051.26318359375, "sentence_full_gradient_variance/p95": 2051.26318359375, "sentence_full_gradient_variance/p99": 2051.26318359375, "sentence_full_update_term": 0.015860876068472862, "sentence_full_update_term/max": 0.09619140625, "sentence_full_update_term/median": 0.0, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0328369140625, "sentence_full_update_term/p85": 0.04327392578125, "sentence_full_update_term/p90": 0.045654296875, "sentence_full_update_term/p95": 0.0494384765625, "sentence_full_update_term/p99": 0.06511240452528, "sentence_full_update_term/var": 0.00044010888086631894, "sentence_hessian_coeff": 45426.0, "sentence_hessian_coeff/max": 1171456.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -327680.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 1040.0, "sentence_hessian_coeff/p99": 770663.6875, "sentence_hessian_coeff/var": 40806539264.0, "sentence_hessian_coeff_abs": 95471.3359375, "sentence_hessian_coeff_abs/max": 1171456.0, "sentence_hessian_coeff_abs/median": 0.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 110976.0, "sentence_hessian_coeff_abs/p99": 770663.6875, "sentence_hessian_coeff_abs/var": 33681063936.0, "step": 58, "token_fisher_curvature": 211080.6875, "token_fisher_curvature/max": 160432128.0, "token_fisher_curvature/median": 2.7972416050126014e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.707546226615891e-23, "token_fisher_curvature/p75": 2.7426949600339867e-12, "token_fisher_curvature/p85": 2.4883775040507317e-09, "token_fisher_curvature/p90": 5.662441253662109e-07, "token_fisher_curvature/p95": 0.021484375, "token_fisher_curvature/p99": 150528.0, "token_fisher_curvature/var": 17646238040064.0, "token_fisher_kl_divergence": 5.138050255482085e-06, "token_fisher_kl_divergence/max": 0.00390625, "token_fisher_kl_divergence/median": 6.815758221109542e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.3902806786045359e-33, "token_fisher_kl_divergence/p75": 6.658803931051873e-23, "token_fisher_kl_divergence/p85": 6.056285572868247e-20, "token_fisher_kl_divergence/p90": 1.3769367590565906e-17, "token_fisher_kl_divergence/p95": 5.222489107836736e-13, "token_fisher_kl_divergence/p99": 3.6656856536865234e-06, "token_fisher_kl_divergence/var": 1.045532282972772e-08, "token_full_update_term": 0.0002266923402203247, "token_full_update_term/max": 0.09228515625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.6566128730773926e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 8.16328002916332e-20, "token_full_update_term/p85": 5.0725916522775805e-14, "token_full_update_term/p90": 2.4726887204451486e-12, "token_full_update_term/p95": 8.36735125631094e-10, "token_full_update_term/p99": 0.00183868408203125, "token_full_update_term/var": 1.0521753210923634e-05, "token_hessian_coeff": 22130.763671875, "token_hessian_coeff/max": 158334976.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -20185088.0, "token_hessian_coeff/p25": -1.5211298887152225e-10, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.004438161849975586, "token_hessian_coeff/var": 12278911991808.0, "token_hessian_coeff_abs": 212600.046875, "token_hessian_coeff_abs/max": 158334976.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 4.1211023926734924e-08, "token_hessian_coeff_abs/p99": 2488064.0, "token_hessian_coeff_abs/var": 12234202808320.0 }, { "accuracy_reward": 0.7083333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20877192914485931, "adam_stats/lm_head/lr_effective_max": 2.8273809220991097e-05, "adam_stats/lm_head/lr_effective_mean": -2.0210545043086725e-11, "adam_stats/lm_head/lr_effective_min": -3.2615382224321365e-05, "adam_stats/lm_head/lr_effective_std": 7.312588081731519e-07, "adam_stats/lr_effective_max": 3.028174614883028e-05, "adam_stats/lr_effective_mean": -4.0801338002660614e-12, "adam_stats/lr_effective_min": -3.2615382224321365e-05, "adam_stats/m_t_max": 0.000401137163862586, "adam_stats/m_t_mean": -2.4205084551282008e-12, "adam_stats/m_t_min": -0.0002968676562886685, "adam_stats/v_t_max": 2.5335893951705657e-05, "adam_stats/v_t_mean": 1.7221216857057908e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7083333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20877192914485931, "all_logprobs": -0.008697565644979477, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00012302398681640625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.011738383211195469, "clip_ratio": 0.0, "completion_length": 609.5833740234375, "completion_length/correct": 519.308837890625, "completion_length/correct/max": 948.0, "completion_length/correct/median": 530.0, "completion_length/correct/min": 221.0, "completion_length/correct/p25": 391.5, "completion_length/correct/p75": 646.5, "completion_length/correct/var": 34507.5, "completion_length/incorrect": 828.8214721679688, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 783.0, "completion_length/incorrect/min": 475.0, "completion_length/incorrect/p25": 673.75, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 37661.04296875, "completion_length/max": 1024.0, "completion_length/median": 595.0, "completion_length/min": 221.0, "completion_length/p25": 396.75, "completion_length/p75": 757.5, "completion_length/var": 55040.484375, "curvature_clip_ratio_token_fisher": 0.008578264154493809, "curvature_clip_ratio_token_hessian": 0.005690362304449081, "curvature_clip_ratio_total_fisher": 0.008578264154493809, "curvature_clip_ratio_total_full": 0.008578264154493809, "curvature_clip_ratio_total_hessian": 0.005690362304449081, "epoch": 0.0944, "feature_vector_variance/max_squared_error": 71148.65625, "feature_vector_variance/metric": 31247.640625, "generated_tokens/total": 3347753.0, "global_fisher_curvature": 116736.0, "global_fisher_curvature/max": 116736.0, "global_fisher_curvature/median": 116736.0, "global_fisher_curvature/min": 116736.0, "global_fisher_curvature/p25": 116736.0, "global_fisher_curvature/p75": 116736.0, "global_fisher_curvature/p85": 116736.0, "global_fisher_curvature/p90": 116736.0, "global_fisher_curvature/p95": 116736.0, "global_fisher_curvature/p99": 116736.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.637505531311035e-06, "global_fisher_kl_divergence/max": 2.637505531311035e-06, "global_fisher_kl_divergence/median": 2.637505531311035e-06, "global_fisher_kl_divergence/min": 2.637505531311035e-06, "global_fisher_kl_divergence/p25": 2.637505531311035e-06, "global_fisher_kl_divergence/p75": 2.637505531311035e-06, "global_fisher_kl_divergence/p85": 2.637505531311035e-06, "global_fisher_kl_divergence/p90": 2.637505531311035e-06, "global_fisher_kl_divergence/p95": 2.637505531311035e-06, "global_fisher_kl_divergence/p99": 2.637505531311035e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.058837890625, "global_full_update_term/max": 0.058837890625, "global_full_update_term/median": 0.058837890625, "global_full_update_term/min": 0.058837890625, "global_full_update_term/p25": 0.058837890625, "global_full_update_term/p75": 0.058837890625, "global_full_update_term/p85": 0.058837890625, "global_full_update_term/p90": 0.058837890625, "global_full_update_term/p95": 0.058837890625, "global_full_update_term/p99": 0.058837890625, "global_full_update_term/var": NaN, "global_hessian_coeff": 6880.0, "global_hessian_coeff/max": 6880.0, "global_hessian_coeff/median": 6880.0, "global_hessian_coeff/min": 6880.0, "global_hessian_coeff/p25": 6880.0, "global_hessian_coeff/p75": 6880.0, "global_hessian_coeff/p99": 6880.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 6880.0, "global_hessian_coeff_abs/max": 6880.0, "global_hessian_coeff_abs/median": 6880.0, "global_hessian_coeff_abs/min": 6880.0, "global_hessian_coeff_abs/p25": 6880.0, "global_hessian_coeff_abs/p75": 6880.0, "global_hessian_coeff_abs/p99": 6880.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05150701478123665, "learning_rate": 6.456201742799511e-06, "loss": -0.7083, "masked_global_fisher_curvature": 432.0, "masked_global_fisher_curvature/max": 432.0, "masked_global_fisher_curvature/median": 432.0, "masked_global_fisher_curvature/min": 432.0, "masked_global_fisher_curvature/p25": 432.0, "masked_global_fisher_curvature/p75": 432.0, "masked_global_fisher_curvature/p85": 432.0, "masked_global_fisher_curvature/p90": 432.0, "masked_global_fisher_curvature/p95": 432.0, "masked_global_fisher_curvature/p99": 432.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/max": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/median": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/min": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/p25": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/p75": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/p85": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/p90": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/p95": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/p99": 9.720679372549057e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.003204345703125, "masked_global_full_update_term/max": 0.003204345703125, "masked_global_full_update_term/median": 0.003204345703125, "masked_global_full_update_term/min": 0.003204345703125, "masked_global_full_update_term/p25": 0.003204345703125, "masked_global_full_update_term/p75": 0.003204345703125, "masked_global_full_update_term/p85": 0.003204345703125, "masked_global_full_update_term/p90": 0.003204345703125, "masked_global_full_update_term/p95": 0.003204345703125, "masked_global_full_update_term/p99": 0.003204345703125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -5632.0, "masked_global_hessian_coeff/max": -5632.0, "masked_global_hessian_coeff/median": -5632.0, "masked_global_hessian_coeff/min": -5632.0, "masked_global_hessian_coeff/p25": -5632.0, "masked_global_hessian_coeff/p75": -5632.0, "masked_global_hessian_coeff/p99": -5632.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 5632.0, "masked_global_hessian_coeff_abs/max": 5632.0, "masked_global_hessian_coeff_abs/median": 5632.0, "masked_global_hessian_coeff_abs/min": 5632.0, "masked_global_hessian_coeff_abs/p25": 5632.0, "masked_global_hessian_coeff_abs/p75": 5632.0, "masked_global_hessian_coeff_abs/p99": 5632.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.2627463340759277, "masked_per_sentence_gradient_norm/max": 10.5, "masked_per_sentence_gradient_norm/median": 1.34375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 4.1875, "masked_per_sentence_gradient_norm/var": 8.17296314239502, "masked_per_token_gradient_norm": 0.04353244975209236, "masked_per_token_gradient_norm/max": 14.375, "masked_per_token_gradient_norm/median": 1.0169642905566434e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.174339558929205e-08, "masked_per_token_gradient_norm/var": 0.3082648515701294, "masked_sentence_fisher_curvature": 735.3710327148438, "masked_sentence_fisher_curvature/max": 4480.0, "masked_sentence_fisher_curvature/median": 430.0, "masked_sentence_fisher_curvature/min": 0.3359375, "masked_sentence_fisher_curvature/p25": 283.25, "masked_sentence_fisher_curvature/p75": 911.0, "masked_sentence_fisher_curvature/p85": 1280.0, "masked_sentence_fisher_curvature/p90": 1568.0, "masked_sentence_fisher_curvature/p95": 2312.0, "masked_sentence_fisher_curvature/p99": 4480.0, "masked_sentence_fisher_curvature/var": 737612.4375, "masked_sentence_fisher_kl_divergence": 1.6587161866254974e-08, "masked_sentence_fisher_kl_divergence/max": 1.0104849934577942e-07, "masked_sentence_fisher_kl_divergence/median": 9.720679372549057e-09, "masked_sentence_fisher_kl_divergence/min": 7.560174708487466e-12, "masked_sentence_fisher_kl_divergence/p25": 6.395566742867231e-09, "masked_sentence_fisher_kl_divergence/p75": 2.0547304302453995e-08, "masked_sentence_fisher_kl_divergence/p85": 2.88418959826231e-08, "masked_sentence_fisher_kl_divergence/p90": 3.5390257835388184e-08, "masked_sentence_fisher_kl_divergence/p95": 5.209585651755333e-08, "masked_sentence_fisher_kl_divergence/p99": 1.0104849934577942e-07, "masked_sentence_fisher_kl_divergence/var": 3.7560511375689474e-16, "masked_sentence_full_gradient_variance/max_squared_error": 12.785974502563477, "masked_sentence_full_gradient_variance/metric": 12.785974502563477, "masked_sentence_full_gradient_variance/p75": 12.785974502563477, "masked_sentence_full_gradient_variance/p90": 12.785974502563477, "masked_sentence_full_gradient_variance/p95": 12.785974502563477, "masked_sentence_full_gradient_variance/p99": 12.785974502563477, "masked_sentence_full_update_term": 0.0011406861012801528, "masked_sentence_full_update_term/max": 0.0067138671875, "masked_sentence_full_update_term/median": 0.000701904296875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0017833709716796875, "masked_sentence_full_update_term/p85": 0.002849578857421875, "masked_sentence_full_update_term/p90": 0.0034942626953125, "masked_sentence_full_update_term/p95": 0.004150390625, "masked_sentence_full_update_term/p99": 0.005960085429251194, "masked_sentence_full_update_term/var": 2.2998488020675723e-06, "masked_sentence_hessian_coeff": -17204.96484375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -14208.0, "masked_sentence_hessian_coeff/min": -103424.0, "masked_sentence_hessian_coeff/p25": -21696.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 483166752.0, "masked_sentence_hessian_coeff_abs": 17204.96484375, "masked_sentence_hessian_coeff_abs/max": 103424.0, "masked_sentence_hessian_coeff_abs/median": 14080.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 21696.0, "masked_sentence_hessian_coeff_abs/p99": 103424.0, "masked_sentence_hessian_coeff_abs/var": 483166752.0, "masked_token_fisher_curvature": 842.6317749023438, "masked_token_fisher_curvature/max": 409600.0, "masked_token_fisher_curvature/median": 1.1926223897340549e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.055026834311428e-24, "masked_token_fisher_curvature/p75": 1.1857181902996672e-13, "masked_token_fisher_curvature/p85": 1.1505107977427542e-10, "masked_token_fisher_curvature/p90": 3.67872416973114e-08, "masked_token_fisher_curvature/p95": 0.00057220458984375, "masked_token_fisher_curvature/p99": 5327.875, "masked_token_fisher_curvature/var": 174894256.0, "masked_token_fisher_kl_divergence": 1.9006597185011742e-08, "masked_token_fisher_kl_divergence/max": 9.238719940185547e-06, "masked_token_fisher_kl_divergence/median": 2.68212707775144e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.6267457648365236e-35, "masked_token_fisher_kl_divergence/p75": 2.678643467993984e-24, "masked_token_fisher_kl_divergence/p85": 2.5940384009662948e-21, "masked_token_fisher_kl_divergence/p90": 8.300922883092143e-19, "masked_token_fisher_kl_divergence/p95": 1.2878587085651816e-14, "masked_token_fisher_kl_divergence/p99": 1.1982774594798684e-07, "masked_token_fisher_kl_divergence/var": 8.90037487612759e-14, "masked_token_full_update_term": 1.2758486263919622e-05, "masked_token_full_update_term/max": 0.004058837890625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.6093254089355469e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.3530843112619095e-15, "masked_token_full_update_term/p85": 4.741207426661731e-13, "masked_token_full_update_term/p90": 1.1198153515579179e-11, "masked_token_full_update_term/p95": 5.2677933126688e-09, "masked_token_full_update_term/p99": 0.0002231597900390625, "masked_token_full_update_term/var": 2.743538018989966e-08, "masked_token_hessian_coeff": -22260.845703125, "masked_token_hessian_coeff/max": 1768.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -7045120.0, "masked_token_hessian_coeff/p25": -2.6635825634002686e-07, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.0017572641372680664, "masked_token_hessian_coeff/var": 84163133440.0, "masked_token_hessian_coeff_abs": 22260.9296875, "masked_token_hessian_coeff_abs/max": 7045120.0, "masked_token_hessian_coeff_abs/median": 8.412825991399586e-12, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.0265579223632812e-06, "masked_token_hessian_coeff_abs/p99": 309248.0, "masked_token_hessian_coeff_abs/var": 84163133440.0, "mean_logprobs": -0.00872802734375, "mean_logprobs/var": 2.384185791015625e-05, "num_completions/total": 5664, "per_sentence_gradient_norm": 41.0625, "per_sentence_gradient_norm/max": 213.0, "per_sentence_gradient_norm/median": 29.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 59.875, "per_sentence_gradient_norm/var": 2150.322265625, "per_token_feature_norm": 190.64927673339844, "per_token_feature_norm/max": 282.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 85.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 198.0, "per_token_feature_norm/var": 158.61151123046875, "per_token_gradient_norm": 0.752314031124115, "per_token_gradient_norm/max": 294.0, "per_token_gradient_norm/median": 1.234568003383174e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.5366822481155396e-08, "per_token_gradient_norm/var": 98.42711639404297, "per_token_policy_error_norm": 0.00516286538913846, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004901238717138767, "policy_entropy": 0.00965848658233881, "policy_entropy/max": 1.484375, "policy_entropy/median": 4.43833414465189e-10, "policy_entropy/min": 1.1580528575742387e-21, "policy_entropy/p25": 8.668621376273222e-13, "policy_entropy/p75": 1.0756775736808777e-07, "policy_entropy/var": 0.0051330942660570145, "policy_loss": -0.7083333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20877192914485931, "policy_sharpness": 9.729613304138184, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7285412549972534, "reward": 0.7083333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20877192914485931, "rewards/accuracy_reward": 0.7083333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20877192914485931, "sentence_fisher_curvature": 249751.53125, "sentence_fisher_curvature/max": 782336.0, "sentence_fisher_curvature/median": 254976.0, "sentence_fisher_curvature/min": 88.5, "sentence_fisher_curvature/p25": 3772.0, "sentence_fisher_curvature/p75": 392704.0, "sentence_fisher_curvature/p85": 522240.0, "sentence_fisher_curvature/p90": 600064.0, "sentence_fisher_curvature/p95": 660480.0, "sentence_fisher_curvature/p99": 747315.3125, "sentence_fisher_curvature/var": 51920879616.0, "sentence_fisher_kl_divergence": 5.632143711409299e-06, "sentence_fisher_kl_divergence/max": 1.7642974853515625e-05, "sentence_fisher_kl_divergence/median": 5.751848220825195e-06, "sentence_fisher_kl_divergence/min": 1.9936123862862587e-09, "sentence_fisher_kl_divergence/p25": 8.509960025548935e-08, "sentence_fisher_kl_divergence/p75": 8.851289749145508e-06, "sentence_fisher_kl_divergence/p85": 1.17570161819458e-05, "sentence_fisher_kl_divergence/p90": 1.3530254364013672e-05, "sentence_fisher_kl_divergence/p95": 1.4916062355041504e-05, "sentence_fisher_kl_divergence/p99": 1.6850235624588095e-05, "sentence_fisher_kl_divergence/var": 2.6400624741906853e-11, "sentence_full_gradient_variance/max_squared_error": 3747.55224609375, "sentence_full_gradient_variance/metric": 3747.55224609375, "sentence_full_gradient_variance/p75": 3747.55224609375, "sentence_full_gradient_variance/p90": 3747.55224609375, "sentence_full_gradient_variance/p95": 3747.55224609375, "sentence_full_gradient_variance/p99": 3747.55224609375, "sentence_full_update_term": 0.021457355469465256, "sentence_full_update_term/max": 0.09521484375, "sentence_full_update_term/median": 0.01483154296875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.037353515625, "sentence_full_update_term/p85": 0.0426025390625, "sentence_full_update_term/p90": 0.050048828125, "sentence_full_update_term/p95": 0.06256103515625, "sentence_full_update_term/p99": 0.09104005247354507, "sentence_full_update_term/var": 0.0005065567092970014, "sentence_hessian_coeff": 4287.0, "sentence_hessian_coeff/max": 450560.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -360448.0, "sentence_hessian_coeff/p25": -61440.0, "sentence_hessian_coeff/p75": 74240.0, "sentence_hessian_coeff/p99": 376627.4375, "sentence_hessian_coeff/var": 21777661952.0, "sentence_hessian_coeff_abs": 96956.3359375, "sentence_hessian_coeff_abs/max": 450560.0, "sentence_hessian_coeff_abs/median": 61440.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 151040.0, "sentence_hessian_coeff_abs/p99": 376627.4375, "sentence_hessian_coeff_abs/var": 12296751104.0, "step": 59, "token_fisher_curvature": 215969.515625, "token_fisher_curvature/max": 165675008.0, "token_fisher_curvature/median": 1.4433441421213278e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.4039936552322367e-24, "token_fisher_curvature/p75": 1.652011860642233e-13, "token_fisher_curvature/p85": 2.546585164964199e-10, "token_fisher_curvature/p90": 1.257285475730896e-07, "token_fisher_curvature/p95": 0.00757145881652832, "token_fisher_curvature/p99": 200128.0, "token_fisher_curvature/var": 16179863224320.0, "token_fisher_kl_divergence": 4.8699885155656375e-06, "token_fisher_kl_divergence/max": 0.0037384033203125, "token_fisher_kl_divergence/median": 3.2540512340366737e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.416677968589101e-35, "token_fisher_kl_divergence/p75": 3.7223127564886245e-24, "token_fisher_kl_divergence/p85": 5.743942173568224e-21, "token_fisher_kl_divergence/p90": 2.8324781756183803e-18, "token_fisher_kl_divergence/p95": 1.7062046220317484e-13, "token_fisher_kl_divergence/p99": 4.518777132034302e-06, "token_fisher_kl_divergence/var": 8.228194126047583e-09, "token_full_update_term": 0.0002360135258641094, "token_full_update_term/max": 0.09033203125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.6093254089355469e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.0816681711721685e-15, "token_full_update_term/p85": 6.927791673660977e-13, "token_full_update_term/p90": 2.2964741219766438e-11, "token_full_update_term/p95": 1.828311724239029e-08, "token_full_update_term/p99": 0.0024261474609375, "token_full_update_term/var": 9.911073902912904e-06, "token_hessian_coeff": -1564.1282958984375, "token_hessian_coeff/max": 166723584.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -20709376.0, "token_hessian_coeff/p25": -3.1851232051849365e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0035314559936523438, "token_hessian_coeff/var": 10778634616832.0, "token_hessian_coeff_abs": 219029.0625, "token_hessian_coeff_abs/max": 166723584.0, "token_hessian_coeff_abs/median": 1.1254996934439987e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.592802047729492e-06, "token_hessian_coeff_abs/p99": 4112384.0, "token_hessian_coeff_abs/var": 10730662264832.0 }, { "accuracy_reward": 0.6041666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24166668951511383, "adam_stats/lm_head/lr_effective_max": 2.7053887606598437e-05, "adam_stats/lm_head/lr_effective_mean": -3.7227314980681214e-11, "adam_stats/lm_head/lr_effective_min": -2.827372372848913e-05, "adam_stats/lm_head/lr_effective_std": 6.883873311380739e-07, "adam_stats/lr_effective_max": 2.9269573133205995e-05, "adam_stats/lr_effective_mean": -1.7201122470833496e-11, "adam_stats/lr_effective_min": -2.9968898161314428e-05, "adam_stats/m_t_max": 0.00035048535210080445, "adam_stats/m_t_mean": -5.329312945806519e-12, "adam_stats/m_t_min": -0.0002362330415053293, "adam_stats/v_t_max": 2.5312048819614574e-05, "adam_stats/v_t_mean": 1.7208408092592165e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6041666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24166668951511383, "all_logprobs": -0.008656332269310951, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.625, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -9.584426879882812e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.013569426722824574, "clip_ratio": 0.0, "completion_length": 677.3333740234375, "completion_length/correct": 559.0689697265625, "completion_length/correct/max": 933.0, "completion_length/correct/median": 577.0, "completion_length/correct/min": 300.0, "completion_length/correct/p25": 329.0, "completion_length/correct/p75": 686.75, "completion_length/correct/var": 33392.37890625, "completion_length/incorrect": 857.8421020507812, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 407.0, "completion_length/incorrect/p25": 687.75, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 50097.1640625, "completion_length/max": 1024.0, "completion_length/median": 674.0, "completion_length/min": 300.0, "completion_length/p25": 497.0, "completion_length/p75": 905.25, "completion_length/var": 61119.42578125, "curvature_clip_ratio_token_fisher": 0.006935900542885065, "curvature_clip_ratio_token_hessian": 0.004552165511995554, "curvature_clip_ratio_total_fisher": 0.006935900542885065, "curvature_clip_ratio_total_full": 0.006935900542885065, "curvature_clip_ratio_total_hessian": 0.004552165511995554, "epoch": 0.096, "feature_vector_variance/max_squared_error": 67512.984375, "feature_vector_variance/metric": 31090.408203125, "generated_tokens/total": 3412777.0, "global_fisher_curvature": 94208.0, "global_fisher_curvature/max": 94208.0, "global_fisher_curvature/median": 94208.0, "global_fisher_curvature/min": 94208.0, "global_fisher_curvature/p25": 94208.0, "global_fisher_curvature/p75": 94208.0, "global_fisher_curvature/p85": 94208.0, "global_fisher_curvature/p90": 94208.0, "global_fisher_curvature/p95": 94208.0, "global_fisher_curvature/p99": 94208.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.9669532775878906e-06, "global_fisher_kl_divergence/max": 1.9669532775878906e-06, "global_fisher_kl_divergence/median": 1.9669532775878906e-06, "global_fisher_kl_divergence/min": 1.9669532775878906e-06, "global_fisher_kl_divergence/p25": 1.9669532775878906e-06, "global_fisher_kl_divergence/p75": 1.9669532775878906e-06, "global_fisher_kl_divergence/p85": 1.9669532775878906e-06, "global_fisher_kl_divergence/p90": 1.9669532775878906e-06, "global_fisher_kl_divergence/p95": 1.9669532775878906e-06, "global_fisher_kl_divergence/p99": 1.9669532775878906e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.043701171875, "global_full_update_term/max": 0.043701171875, "global_full_update_term/median": 0.043701171875, "global_full_update_term/min": 0.043701171875, "global_full_update_term/p25": 0.043701171875, "global_full_update_term/p75": 0.043701171875, "global_full_update_term/p85": 0.043701171875, "global_full_update_term/p90": 0.043701171875, "global_full_update_term/p95": 0.043701171875, "global_full_update_term/p99": 0.043701171875, "global_full_update_term/var": NaN, "global_hessian_coeff": 13312.0, "global_hessian_coeff/max": 13312.0, "global_hessian_coeff/median": 13312.0, "global_hessian_coeff/min": 13312.0, "global_hessian_coeff/p25": 13312.0, "global_hessian_coeff/p75": 13312.0, "global_hessian_coeff/p99": 13312.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 13312.0, "global_hessian_coeff_abs/max": 13312.0, "global_hessian_coeff_abs/median": 13312.0, "global_hessian_coeff_abs/min": 13312.0, "global_hessian_coeff_abs/p25": 13312.0, "global_hessian_coeff_abs/p75": 13312.0, "global_hessian_coeff_abs/p99": 13312.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.04861941933631897, "learning_rate": 6.197638667498023e-06, "loss": -0.6042, "masked_global_fisher_curvature": 2944.0, "masked_global_fisher_curvature/max": 2944.0, "masked_global_fisher_curvature/median": 2944.0, "masked_global_fisher_curvature/min": 2944.0, "masked_global_fisher_curvature/p25": 2944.0, "masked_global_fisher_curvature/p75": 2944.0, "masked_global_fisher_curvature/p85": 2944.0, "masked_global_fisher_curvature/p90": 2944.0, "masked_global_fisher_curvature/p95": 2944.0, "masked_global_fisher_curvature/p99": 2944.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/max": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/median": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/min": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/p25": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/p75": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/p85": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/p90": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/p95": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/p99": 6.146728992462158e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.002197265625, "masked_global_full_update_term/max": 0.002197265625, "masked_global_full_update_term/median": 0.002197265625, "masked_global_full_update_term/min": 0.002197265625, "masked_global_full_update_term/p25": 0.002197265625, "masked_global_full_update_term/p75": 0.002197265625, "masked_global_full_update_term/p85": 0.002197265625, "masked_global_full_update_term/p90": 0.002197265625, "masked_global_full_update_term/p95": 0.002197265625, "masked_global_full_update_term/p99": 0.002197265625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -4928.0, "masked_global_hessian_coeff/max": -4928.0, "masked_global_hessian_coeff/median": -4928.0, "masked_global_hessian_coeff/min": -4928.0, "masked_global_hessian_coeff/p25": -4928.0, "masked_global_hessian_coeff/p75": -4928.0, "masked_global_hessian_coeff/p99": -4928.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 4928.0, "masked_global_hessian_coeff_abs/max": 4928.0, "masked_global_hessian_coeff_abs/median": 4928.0, "masked_global_hessian_coeff_abs/min": 4928.0, "masked_global_hessian_coeff_abs/p25": 4928.0, "masked_global_hessian_coeff_abs/p75": 4928.0, "masked_global_hessian_coeff_abs/p99": 4928.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.1441855430603027, "masked_per_sentence_gradient_norm/max": 7.8125, "masked_per_sentence_gradient_norm/median": 1.4765625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 4.34375, "masked_per_sentence_gradient_norm/var": 5.883887767791748, "masked_per_token_gradient_norm": 0.03536247834563255, "masked_per_token_gradient_norm/max": 16.25, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.1059455573558807e-09, "masked_per_token_gradient_norm/var": 0.25586822628974915, "masked_sentence_fisher_curvature": 717.3046875, "masked_sentence_fisher_curvature/max": 1528.0, "masked_sentence_fisher_curvature/median": 748.0, "masked_sentence_fisher_curvature/min": 17.125, "masked_sentence_fisher_curvature/p25": 390.0, "masked_sentence_fisher_curvature/p75": 1052.0, "masked_sentence_fisher_curvature/p85": 1152.0, "masked_sentence_fisher_curvature/p90": 1204.0, "masked_sentence_fisher_curvature/p95": 1432.0, "masked_sentence_fisher_curvature/p99": 1474.8001708984375, "masked_sentence_fisher_curvature/var": 174012.875, "masked_sentence_fisher_kl_divergence": 1.4944060211519172e-08, "masked_sentence_fisher_kl_divergence/max": 3.189779818058014e-08, "masked_sentence_fisher_kl_divergence/median": 1.5599653124809265e-08, "masked_sentence_fisher_kl_divergence/min": 3.5652192309498787e-10, "masked_sentence_fisher_kl_divergence/p25": 8.105416782200336e-09, "masked_sentence_fisher_kl_divergence/p75": 2.1944288164377213e-08, "masked_sentence_fisher_kl_divergence/p85": 2.3981556296348572e-08, "masked_sentence_fisher_kl_divergence/p90": 2.5087501853704453e-08, "masked_sentence_fisher_kl_divergence/p95": 2.9802322387695312e-08, "masked_sentence_fisher_kl_divergence/p99": 3.079185617593794e-08, "masked_sentence_fisher_kl_divergence/var": 7.55413760863437e-17, "masked_sentence_full_gradient_variance/max_squared_error": 9.857627868652344, "masked_sentence_full_gradient_variance/metric": 9.857627868652344, "masked_sentence_full_gradient_variance/p75": 9.857627868652344, "masked_sentence_full_gradient_variance/p90": 9.857627868652344, "masked_sentence_full_gradient_variance/p95": 9.857627868652344, "masked_sentence_full_gradient_variance/p99": 9.857627868652344, "masked_sentence_full_update_term": 0.001040130853652954, "masked_sentence_full_update_term/max": 0.00439453125, "masked_sentence_full_update_term/median": 0.000583648681640625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.002124786376953125, "masked_sentence_full_update_term/p85": 0.002227783203125, "masked_sentence_full_update_term/p90": 0.00264739990234375, "masked_sentence_full_update_term/p95": 0.00402069091796875, "masked_sentence_full_update_term/p99": 0.004191590007394552, "masked_sentence_full_update_term/var": 1.5391310626000632e-06, "masked_sentence_hessian_coeff": -15159.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -13952.0, "masked_sentence_hessian_coeff/min": -40448.0, "masked_sentence_hessian_coeff/p25": -28416.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 235270352.0, "masked_sentence_hessian_coeff_abs": 15159.0, "masked_sentence_hessian_coeff_abs/max": 40448.0, "masked_sentence_hessian_coeff_abs/median": 13056.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 28416.0, "masked_sentence_hessian_coeff_abs/p99": 40448.0, "masked_sentence_hessian_coeff_abs/var": 235270352.0, "masked_token_fisher_curvature": 817.4590454101562, "masked_token_fisher_curvature/max": 473088.0, "masked_token_fisher_curvature/median": 9.351243737687476e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.274746684520826e-24, "masked_token_fisher_curvature/p75": 1.1235457009206584e-13, "masked_token_fisher_curvature/p85": 7.594280759803951e-11, "masked_token_fisher_curvature/p90": 1.2747477740049362e-08, "masked_token_fisher_curvature/p95": 9.918212890625e-05, "masked_token_fisher_curvature/p99": 2532.5, "masked_token_fisher_curvature/var": 182477104.0, "masked_token_fisher_kl_divergence": 1.703464747038197e-08, "masked_token_fisher_kl_divergence/max": 9.834766387939453e-06, "masked_token_fisher_kl_divergence/median": 1.9524307404220042e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.739593222515463e-35, "masked_token_fisher_kl_divergence/p75": 2.3393701698765314e-24, "masked_token_fisher_kl_divergence/p85": 1.581569331201389e-21, "masked_token_fisher_kl_divergence/p90": 2.659683454378503e-19, "masked_token_fisher_kl_divergence/p95": 2.067790383364354e-15, "masked_token_fisher_kl_divergence/p99": 5.275069270282984e-08, "masked_token_fisher_kl_divergence/var": 7.921993546182102e-14, "masked_token_full_update_term": 1.0003939678426832e-05, "masked_token_full_update_term/max": 0.004302978515625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.5821070671081543e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 9.053088140253962e-18, "masked_token_full_update_term/p85": 3.730349362740526e-14, "masked_token_full_update_term/p90": 1.1795009413617663e-12, "masked_token_full_update_term/p95": 1.4733814168721437e-10, "masked_token_full_update_term/p99": 3.0517578125e-05, "masked_token_full_update_term/var": 2.1422676965698884e-08, "masked_token_hessian_coeff": -18340.748046875, "masked_token_hessian_coeff/max": 412.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -7241728.0, "masked_token_hessian_coeff/p25": -9.778887033462524e-09, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0028076171875, "masked_token_hessian_coeff/var": 72652668928.0, "masked_token_hessian_coeff_abs": 18340.80078125, "masked_token_hessian_coeff_abs/max": 7241728.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.2165477275848389e-07, "masked_token_hessian_coeff_abs/p99": 25944.0, "masked_token_hessian_coeff_abs/var": 72652668928.0, "mean_logprobs": -0.00823974609375, "mean_logprobs/var": 4.076957702636719e-05, "num_completions/total": 5760, "per_sentence_gradient_norm": 32.723960876464844, "per_sentence_gradient_norm/max": 254.0, "per_sentence_gradient_norm/median": 11.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 48.25, "per_sentence_gradient_norm/var": 2260.02099609375, "per_token_feature_norm": 190.87457275390625, "per_token_feature_norm/max": 272.0, "per_token_feature_norm/median": 191.0, "per_token_feature_norm/min": 104.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 198.0, "per_token_feature_norm/var": 140.26617431640625, "per_token_gradient_norm": 0.5942626595497131, "per_token_gradient_norm/max": 282.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.367880031466484e-09, "per_token_gradient_norm/var": 79.80464935302734, "per_token_policy_error_norm": 0.004948053043335676, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0046431757509708405, "policy_entropy": 0.009556532837450504, "policy_entropy/max": 3.234375, "policy_entropy/median": 4.729372449219227e-10, "policy_entropy/min": 7.040961374051372e-21, "policy_entropy/p25": 8.384404281969182e-13, "policy_entropy/p75": 1.0663643479347229e-07, "policy_entropy/var": 0.005495736841112375, "policy_loss": -0.6041666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24166668951511383, "policy_sharpness": 9.732065200805664, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7238961458206177, "reward": 0.6041666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24166668951511383, "rewards/accuracy_reward": 0.6041666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24166668951511383, "sentence_fisher_curvature": 183132.3125, "sentence_fisher_curvature/max": 884736.0, "sentence_fisher_curvature/median": 43776.0, "sentence_fisher_curvature/min": 17.125, "sentence_fisher_curvature/p25": 1184.0, "sentence_fisher_curvature/p75": 289280.0, "sentence_fisher_curvature/p85": 483840.0, "sentence_fisher_curvature/p90": 567296.0, "sentence_fisher_curvature/p95": 655360.0, "sentence_fisher_curvature/p99": 826368.1875, "sentence_fisher_curvature/var": 57541971968.0, "sentence_fisher_kl_divergence": 3.8182843127287924e-06, "sentence_fisher_kl_divergence/max": 1.8477439880371094e-05, "sentence_fisher_kl_divergence/median": 9.126961231231689e-07, "sentence_fisher_kl_divergence/min": 3.5652192309498787e-10, "sentence_fisher_kl_divergence/p25": 2.4650944396853447e-08, "sentence_fisher_kl_divergence/p75": 6.027519702911377e-06, "sentence_fisher_kl_divergence/p85": 1.0102987289428711e-05, "sentence_fisher_kl_divergence/p90": 1.1831521987915039e-05, "sentence_fisher_kl_divergence/p95": 1.3649463653564453e-05, "sentence_fisher_kl_divergence/p99": 1.7231706806342117e-05, "sentence_fisher_kl_divergence/var": 2.5012247481526195e-11, "sentence_full_gradient_variance/max_squared_error": 3248.33642578125, "sentence_full_gradient_variance/metric": 3248.33642578125, "sentence_full_gradient_variance/p75": 3248.33642578125, "sentence_full_gradient_variance/p90": 3248.33642578125, "sentence_full_gradient_variance/p95": 3248.33642578125, "sentence_full_gradient_variance/p99": 3248.33642578125, "sentence_full_update_term": 0.016843479126691818, "sentence_full_update_term/max": 0.08740234375, "sentence_full_update_term/median": 0.003936767578125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.028076171875, "sentence_full_update_term/p85": 0.035400390625, "sentence_full_update_term/p90": 0.0545654296875, "sentence_full_update_term/p95": 0.0694580078125, "sentence_full_update_term/p99": 0.08229982107877731, "sentence_full_update_term/var": 0.0005232291296124458, "sentence_hessian_coeff": 12228.083984375, "sentence_hessian_coeff/max": 466944.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -284672.0, "sentence_hessian_coeff/p25": -72512.0, "sentence_hessian_coeff/p75": 80128.0, "sentence_hessian_coeff/p99": 352153.96875, "sentence_hessian_coeff/var": 20972201984.0, "sentence_hessian_coeff_abs": 95035.921875, "sentence_hessian_coeff_abs/max": 466944.0, "sentence_hessian_coeff_abs/median": 77312.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 126848.0, "sentence_hessian_coeff_abs/p99": 352153.96875, "sentence_hessian_coeff_abs/var": 11996400640.0, "step": 60, "token_fisher_curvature": 181073.234375, "token_fisher_curvature/max": 163577856.0, "token_fisher_curvature/median": 1.1316360175317453e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.5332406259436473e-24, "token_fisher_curvature/p75": 1.4477308241112041e-13, "token_fisher_curvature/p85": 1.1641532182693481e-10, "token_fisher_curvature/p90": 2.782326191663742e-08, "token_fisher_curvature/p95": 0.0008800327777862549, "token_fisher_curvature/p99": 79124.0, "token_fisher_curvature/var": 14867077529600.0, "token_fisher_kl_divergence": 3.7743959637737134e-06, "token_fisher_kl_divergence/max": 0.0034027099609375, "token_fisher_kl_divergence/median": 2.3567219543477728e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.266214691683848e-35, "token_fisher_kl_divergence/p75": 3.0114544175758664e-24, "token_fisher_kl_divergence/p85": 2.421984833555265e-21, "token_fisher_kl_divergence/p90": 5.793705359219414e-19, "token_fisher_kl_divergence/p95": 1.830176635242431e-14, "token_fisher_kl_divergence/p99": 1.648877514526248e-06, "token_fisher_kl_divergence/var": 6.460969981247899e-09, "token_full_update_term": 0.00018238111806567758, "token_full_update_term/max": 0.08544921875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.5821070671081543e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.5395670849294163e-17, "token_full_update_term/p85": 5.518936002646413e-14, "token_full_update_term/p90": 1.7273127372874342e-12, "token_full_update_term/p95": 4.396412123242044e-10, "token_full_update_term/p99": 0.0010206103324890137, "token_full_update_term/var": 7.706305950705428e-06, "token_hessian_coeff": 8512.759765625, "token_hessian_coeff/max": 158334976.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -20971520.0, "token_hessian_coeff/p25": -1.2179953046143055e-08, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00665283203125, "token_hessian_coeff/var": 10144291225600.0, "token_hessian_coeff_abs": 184684.65625, "token_hessian_coeff_abs/max": 158334976.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.8870999813079834e-07, "token_hessian_coeff_abs/p99": 1753088.0, "token_hessian_coeff_abs/var": 10110253400064.0 }, { "accuracy_reward": 0.7083333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20877192914485931, "adam_stats/lm_head/lr_effective_max": 2.5551522412570193e-05, "adam_stats/lm_head/lr_effective_mean": -3.889480057472916e-11, "adam_stats/lm_head/lr_effective_min": -2.6956560759572312e-05, "adam_stats/lm_head/lr_effective_std": 6.850196996310842e-07, "adam_stats/lr_effective_max": 2.8027769076288678e-05, "adam_stats/lr_effective_mean": -3.594479531729422e-12, "adam_stats/lr_effective_min": -2.8423217372619547e-05, "adam_stats/m_t_max": 0.0003213495947420597, "adam_stats/m_t_mean": -4.553992599687362e-12, "adam_stats/m_t_min": -0.0002496505912858993, "adam_stats/v_t_max": 2.5287878088420257e-05, "adam_stats/v_t_mean": 1.7195044216614108e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7083333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20877192914485931, "all_logprobs": -0.009094306267797947, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.75, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -1.1920928955078125e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00020313262939453125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.014538594521582127, "clip_ratio": 0.0, "completion_length": 547.1146240234375, "completion_length/correct": 472.6911926269531, "completion_length/correct/max": 992.0, "completion_length/correct/median": 472.0, "completion_length/correct/min": 194.0, "completion_length/correct/p25": 357.25, "completion_length/correct/p75": 518.5, "completion_length/correct/var": 35973.53125, "completion_length/incorrect": 727.857177734375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 224.0, "completion_length/incorrect/p25": 354.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 129295.7578125, "completion_length/max": 1024.0, "completion_length/median": 473.0, "completion_length/min": 194.0, "completion_length/p25": 357.25, "completion_length/p75": 668.25, "completion_length/var": 75711.1015625, "curvature_clip_ratio_token_fisher": 0.009633874520659447, "curvature_clip_ratio_token_hessian": 0.006149686872959137, "curvature_clip_ratio_total_fisher": 0.009633874520659447, "curvature_clip_ratio_total_full": 0.009633874520659447, "curvature_clip_ratio_total_hessian": 0.006149686872959137, "epoch": 0.0976, "feature_vector_variance/max_squared_error": 70232.734375, "feature_vector_variance/metric": 31090.478515625, "generated_tokens/total": 3465300.0, "global_fisher_curvature": 138240.0, "global_fisher_curvature/max": 138240.0, "global_fisher_curvature/median": 138240.0, "global_fisher_curvature/min": 138240.0, "global_fisher_curvature/p25": 138240.0, "global_fisher_curvature/p75": 138240.0, "global_fisher_curvature/p85": 138240.0, "global_fisher_curvature/p90": 138240.0, "global_fisher_curvature/p95": 138240.0, "global_fisher_curvature/p99": 138240.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.652406692504883e-06, "global_fisher_kl_divergence/max": 2.652406692504883e-06, "global_fisher_kl_divergence/median": 2.652406692504883e-06, "global_fisher_kl_divergence/min": 2.652406692504883e-06, "global_fisher_kl_divergence/p25": 2.652406692504883e-06, "global_fisher_kl_divergence/p75": 2.652406692504883e-06, "global_fisher_kl_divergence/p85": 2.652406692504883e-06, "global_fisher_kl_divergence/p90": 2.652406692504883e-06, "global_fisher_kl_divergence/p95": 2.652406692504883e-06, "global_fisher_kl_divergence/p99": 2.652406692504883e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.03955078125, "global_full_update_term/max": 0.03955078125, "global_full_update_term/median": 0.03955078125, "global_full_update_term/min": 0.03955078125, "global_full_update_term/p25": 0.03955078125, "global_full_update_term/p75": 0.03955078125, "global_full_update_term/p85": 0.03955078125, "global_full_update_term/p90": 0.03955078125, "global_full_update_term/p95": 0.03955078125, "global_full_update_term/p99": 0.03955078125, "global_full_update_term/var": NaN, "global_hessian_coeff": 30208.0, "global_hessian_coeff/max": 30208.0, "global_hessian_coeff/median": 30208.0, "global_hessian_coeff/min": 30208.0, "global_hessian_coeff/p25": 30208.0, "global_hessian_coeff/p75": 30208.0, "global_hessian_coeff/p99": 30208.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 30208.0, "global_hessian_coeff_abs/max": 30208.0, "global_hessian_coeff_abs/median": 30208.0, "global_hessian_coeff_abs/min": 30208.0, "global_hessian_coeff_abs/p25": 30208.0, "global_hessian_coeff_abs/p75": 30208.0, "global_hessian_coeff_abs/p99": 30208.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.05678848549723625, "learning_rate": 5.9406623188668065e-06, "loss": -0.7083, "masked_global_fisher_curvature": 868.0, "masked_global_fisher_curvature/max": 868.0, "masked_global_fisher_curvature/median": 868.0, "masked_global_fisher_curvature/min": 868.0, "masked_global_fisher_curvature/p25": 868.0, "masked_global_fisher_curvature/p75": 868.0, "masked_global_fisher_curvature/p85": 868.0, "masked_global_fisher_curvature/p90": 868.0, "masked_global_fisher_curvature/p95": 868.0, "masked_global_fisher_curvature/p99": 868.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/max": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/median": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/min": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/p25": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/p75": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/p85": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/p90": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/p95": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/p99": 1.664739102125168e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.002532958984375, "masked_global_full_update_term/max": 0.002532958984375, "masked_global_full_update_term/median": 0.002532958984375, "masked_global_full_update_term/min": 0.002532958984375, "masked_global_full_update_term/p25": 0.002532958984375, "masked_global_full_update_term/p75": 0.002532958984375, "masked_global_full_update_term/p85": 0.002532958984375, "masked_global_full_update_term/p90": 0.002532958984375, "masked_global_full_update_term/p95": 0.002532958984375, "masked_global_full_update_term/p99": 0.002532958984375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -6272.0, "masked_global_hessian_coeff/max": -6272.0, "masked_global_hessian_coeff/median": -6272.0, "masked_global_hessian_coeff/min": -6272.0, "masked_global_hessian_coeff/p25": -6272.0, "masked_global_hessian_coeff/p75": -6272.0, "masked_global_hessian_coeff/p99": -6272.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 6272.0, "masked_global_hessian_coeff_abs/max": 6272.0, "masked_global_hessian_coeff_abs/median": 6272.0, "masked_global_hessian_coeff_abs/min": 6272.0, "masked_global_hessian_coeff_abs/p25": 6272.0, "masked_global_hessian_coeff_abs/p75": 6272.0, "masked_global_hessian_coeff_abs/p99": 6272.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.2133994102478027, "masked_per_sentence_gradient_norm/max": 10.25, "masked_per_sentence_gradient_norm/median": 1.5859375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.48828125, "masked_per_sentence_gradient_norm/var": 5.5623016357421875, "masked_per_token_gradient_norm": 0.04799151420593262, "masked_per_token_gradient_norm/max": 17.125, "masked_per_token_gradient_norm/median": 1.2434497875801753e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.3760443329811096e-08, "masked_per_token_gradient_norm/var": 0.34445813298225403, "masked_sentence_fisher_curvature": 845.390625, "masked_sentence_fisher_curvature/max": 2864.0, "masked_sentence_fisher_curvature/median": 772.0, "masked_sentence_fisher_curvature/min": 13.625, "masked_sentence_fisher_curvature/p25": 436.0, "masked_sentence_fisher_curvature/p75": 1074.0, "masked_sentence_fisher_curvature/p85": 1358.0, "masked_sentence_fisher_curvature/p90": 1740.0, "masked_sentence_fisher_curvature/p95": 1912.0, "masked_sentence_fisher_curvature/p99": 2757.600341796875, "masked_sentence_fisher_curvature/var": 335266.5, "masked_sentence_fisher_kl_divergence": 1.6242097444774117e-08, "masked_sentence_fisher_kl_divergence/max": 5.494803190231323e-08, "masked_sentence_fisher_kl_divergence/median": 1.4842953532934189e-08, "masked_sentence_fisher_kl_divergence/min": 2.6193447411060333e-10, "masked_sentence_fisher_kl_divergence/p25": 8.396455086767673e-09, "masked_sentence_fisher_kl_divergence/p75": 2.066371962428093e-08, "masked_sentence_fisher_kl_divergence/p85": 2.6047928258776665e-08, "masked_sentence_fisher_kl_divergence/p90": 3.3527612686157227e-08, "masked_sentence_fisher_kl_divergence/p95": 3.67872416973114e-08, "masked_sentence_fisher_kl_divergence/p99": 5.295733629395727e-08, "masked_sentence_fisher_kl_divergence/var": 1.238391020945553e-16, "masked_sentence_full_gradient_variance/max_squared_error": 9.977715492248535, "masked_sentence_full_gradient_variance/metric": 9.977715492248535, "masked_sentence_full_gradient_variance/p75": 9.977715492248535, "masked_sentence_full_gradient_variance/p90": 9.977715492248535, "masked_sentence_full_gradient_variance/p95": 9.977715492248535, "masked_sentence_full_gradient_variance/p99": 9.977715492248535, "masked_sentence_full_update_term": 0.0009242644300684333, "masked_sentence_full_update_term/max": 0.00372314453125, "masked_sentence_full_update_term/median": 0.000701904296875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0015869140625, "masked_sentence_full_update_term/p85": 0.00168609619140625, "masked_sentence_full_update_term/p90": 0.00240325927734375, "masked_sentence_full_update_term/p95": 0.002838134765625, "masked_sentence_full_update_term/p99": 0.00372314453125, "masked_sentence_full_update_term/var": 9.32876275783201e-07, "masked_sentence_hessian_coeff": -20894.833984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -22272.0, "masked_sentence_hessian_coeff/min": -64256.0, "masked_sentence_hessian_coeff/p25": -30208.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 303754112.0, "masked_sentence_hessian_coeff_abs": 20894.833984375, "masked_sentence_hessian_coeff_abs/max": 64256.0, "masked_sentence_hessian_coeff_abs/median": 21632.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 30208.0, "masked_sentence_hessian_coeff_abs/p99": 64256.0, "masked_sentence_hessian_coeff_abs/var": 303754112.0, "masked_token_fisher_curvature": 945.3746948242188, "masked_token_fisher_curvature/max": 518144.0, "masked_token_fisher_curvature/median": 4.2012834183813297e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.1842738050228387e-24, "masked_token_fisher_curvature/p75": 1.376676550535194e-13, "masked_token_fisher_curvature/p85": 1.2460077414289117e-10, "masked_token_fisher_curvature/p90": 2.3283064365386963e-08, "masked_token_fisher_curvature/p95": 0.0005988925695419312, "masked_token_fisher_curvature/p99": 4896.0, "masked_token_fisher_curvature/var": 205229744.0, "masked_token_fisher_kl_divergence": 1.8156276482272915e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 8.085824278515371e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.194163843733922e-35, "masked_token_fisher_kl_divergence/p75": 2.6495628995839168e-24, "masked_token_fisher_kl_divergence/p85": 2.395515053953568e-21, "masked_token_fisher_kl_divergence/p90": 4.472333961502706e-19, "masked_token_fisher_kl_divergence/p95": 1.1501650326595225e-14, "masked_token_fisher_kl_divergence/p99": 9.406358003616333e-08, "masked_token_fisher_kl_divergence/var": 7.570489103225081e-14, "masked_token_full_update_term": 1.3085203136142809e-05, "masked_token_full_update_term/max": 0.004302978515625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.421438694000244e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.385380947269368e-15, "masked_token_full_update_term/p85": 1.0871303857129533e-12, "masked_token_full_update_term/p90": 1.994049370068751e-11, "masked_token_full_update_term/p95": 2.441765900584869e-09, "masked_token_full_update_term/p99": 0.000225067138671875, "masked_token_full_update_term/var": 2.6806542763324614e-08, "masked_token_hessian_coeff": -25232.921875, "masked_token_hessian_coeff/max": 3504.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -7766016.0, "masked_token_hessian_coeff/p25": -7.934868335723877e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.005427241325378418, "masked_token_hessian_coeff/var": 99950600192.0, "masked_token_hessian_coeff_abs": 25233.310546875, "masked_token_hessian_coeff_abs/max": 7766016.0, "masked_token_hessian_coeff_abs/median": 1.5188561519607902e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 5.930662155151367e-06, "masked_token_hessian_coeff_abs/p99": 391576.0, "masked_token_hessian_coeff_abs/var": 99950600192.0, "mean_logprobs": -0.009765625, "mean_logprobs/var": 7.724761962890625e-05, "num_completions/total": 5856, "per_sentence_gradient_norm": 28.89453125, "per_sentence_gradient_norm/max": 210.0, "per_sentence_gradient_norm/median": 20.125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 38.875, "per_sentence_gradient_norm/var": 1436.6898193359375, "per_token_feature_norm": 190.60784912109375, "per_token_feature_norm/max": 276.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 112.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 135.1695556640625, "per_token_gradient_norm": 0.8406364917755127, "per_token_gradient_norm/max": 308.0, "per_token_gradient_norm/median": 1.5702994460298214e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 4.307366907596588e-08, "per_token_gradient_norm/var": 115.75949096679688, "per_token_policy_error_norm": 0.005218733102083206, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005296752322465181, "policy_entropy": 0.009501680731773376, "policy_entropy/max": 2.03125, "policy_entropy/median": 3.838067641481757e-10, "policy_entropy/min": 7.326834993749698e-20, "policy_entropy/p25": 1.0658141036401503e-12, "policy_entropy/p75": 1.0151416063308716e-07, "policy_entropy/var": 0.004907927941530943, "policy_loss": -0.7083333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20877192914485931, "policy_sharpness": 9.718510627746582, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7619186639785767, "reward": 0.7083333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20877192914485931, "rewards/accuracy_reward": 0.7083333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20877192914485931, "sentence_fisher_curvature": 275747.21875, "sentence_fisher_curvature/max": 1499136.0, "sentence_fisher_curvature/median": 115200.0, "sentence_fisher_curvature/min": 16.125, "sentence_fisher_curvature/p25": 2588.0, "sentence_fisher_curvature/p75": 452608.0, "sentence_fisher_curvature/p85": 587776.0, "sentence_fisher_curvature/p90": 731136.0, "sentence_fisher_curvature/p95": 950272.0, "sentence_fisher_curvature/p99": 1468006.5, "sentence_fisher_curvature/var": 113803157504.0, "sentence_fisher_kl_divergence": 5.295763457979774e-06, "sentence_fisher_kl_divergence/max": 2.8848648071289062e-05, "sentence_fisher_kl_divergence/median": 2.205371856689453e-06, "sentence_fisher_kl_divergence/min": 3.092281986027956e-10, "sentence_fisher_kl_divergence/p25": 4.9709342420101166e-08, "sentence_fisher_kl_divergence/p75": 8.702278137207031e-06, "sentence_fisher_kl_divergence/p85": 1.1280179023742676e-05, "sentence_fisher_kl_divergence/p90": 1.4036893844604492e-05, "sentence_fisher_kl_divergence/p95": 1.823902130126953e-05, "sentence_fisher_kl_divergence/p99": 2.8169157303636894e-05, "sentence_fisher_kl_divergence/var": 4.200216910388299e-11, "sentence_full_gradient_variance/max_squared_error": 2225.6181640625, "sentence_full_gradient_variance/metric": 2225.6181640625, "sentence_full_gradient_variance/p75": 2225.6181640625, "sentence_full_gradient_variance/p90": 2225.6181640625, "sentence_full_gradient_variance/p95": 2225.6181640625, "sentence_full_gradient_variance/p99": 2225.6181640625, "sentence_full_update_term": 0.014816125854849815, "sentence_full_update_term/max": 0.12353515625, "sentence_full_update_term/median": 0.011474609375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.020263671875, "sentence_full_update_term/p85": 0.0247802734375, "sentence_full_update_term/p90": 0.02862548828125, "sentence_full_update_term/p95": 0.04718017578125, "sentence_full_update_term/p99": 0.08132337778806686, "sentence_full_update_term/var": 0.00037572882138192654, "sentence_hessian_coeff": 34361.3984375, "sentence_hessian_coeff/max": 1015808.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -360448.0, "sentence_hessian_coeff/p25": -103424.0, "sentence_hessian_coeff/p75": 57344.0, "sentence_hessian_coeff/p99": 980787.3125, "sentence_hessian_coeff/var": 52502405120.0, "sentence_hessian_coeff_abs": 134964.234375, "sentence_hessian_coeff_abs/max": 1015808.0, "sentence_hessian_coeff_abs/median": 65280.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 181248.0, "sentence_hessian_coeff_abs/p99": 980787.3125, "sentence_hessian_coeff_abs/var": 35288453120.0, "step": 61, "token_fisher_curvature": 261442.34375, "token_fisher_curvature/max": 162529280.0, "token_fisher_curvature/median": 5.251604272976662e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.44276774644566e-24, "token_fisher_curvature/p75": 2.0961010704922955e-13, "token_fisher_curvature/p85": 2.582964953035116e-10, "token_fisher_curvature/p90": 7.12461769580841e-08, "token_fisher_curvature/p95": 0.00914907455444336, "token_fisher_curvature/p99": 364544.0, "token_fisher_curvature/var": 23605775695872.0, "token_fisher_kl_divergence": 5.022861842007842e-06, "token_fisher_kl_divergence/max": 0.0031280517578125, "token_fisher_kl_divergence/median": 1.0107280348144214e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.6831694936759934e-35, "token_fisher_kl_divergence/p75": 4.03250548619601e-24, "token_fisher_kl_divergence/p85": 4.9498487855173176e-21, "token_fisher_kl_divergence/p90": 1.3688052427629493e-18, "token_fisher_kl_divergence/p95": 1.75769121479874e-13, "token_fisher_kl_divergence/p99": 7.0035457611083984e-06, "token_fisher_kl_divergence/var": 8.716052768420468e-09, "token_full_update_term": 0.00024601948098279536, "token_full_update_term/max": 0.08203125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.421438694000244e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 6.5503158452884236e-15, "token_full_update_term/p85": 1.7763568394002505e-12, "token_full_update_term/p90": 3.9744207924741204e-11, "token_full_update_term/p95": 1.257285475730896e-08, "token_full_update_term/p99": 0.0033316612243652344, "token_full_update_term/var": 1.0286101314704865e-05, "token_hessian_coeff": 22038.17578125, "token_hessian_coeff/max": 161480704.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -21233664.0, "token_hessian_coeff/p25": -9.611248970031738e-07, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.011962890625, "token_hessian_coeff/var": 17122239447040.0, "token_hessian_coeff_abs": 278547.15625, "token_hessian_coeff_abs/max": 161480704.0, "token_hessian_coeff_abs/median": 1.8826540326699615e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 7.927417755126953e-06, "token_hessian_coeff_abs/p99": 5734400.0, "token_hessian_coeff_abs/var": 17045133459456.0 }, { "accuracy_reward": 0.8333333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.14035087823867798, "adam_stats/lm_head/lr_effective_max": 2.5402554456377402e-05, "adam_stats/lm_head/lr_effective_mean": -3.874492740529867e-11, "adam_stats/lm_head/lr_effective_min": -2.828724063874688e-05, "adam_stats/lm_head/lr_effective_std": 6.641632239734463e-07, "adam_stats/lr_effective_max": 2.8665532227023505e-05, "adam_stats/lr_effective_mean": -1.4461377408070408e-11, "adam_stats/lr_effective_min": -2.9542034098994918e-05, "adam_stats/m_t_max": 0.00037963592330925167, "adam_stats/m_t_mean": -2.191910064910929e-12, "adam_stats/m_t_min": -0.00032798657775856555, "adam_stats/v_t_max": 2.527256765461061e-05, "adam_stats/v_t_mean": 1.7196613057157695e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8333333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.14035087823867798, "all_logprobs": -0.008050285279750824, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.0, "all_logprobs/p1": -0.1378418207168579, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -7.486343383789062e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012551856227219105, "clip_ratio": 0.0, "completion_length": 486.85418701171875, "completion_length/correct": 508.1875, "completion_length/correct/max": 859.0, "completion_length/correct/median": 514.0, "completion_length/correct/min": 227.0, "completion_length/correct/p25": 355.0, "completion_length/correct/p75": 671.25, "completion_length/correct/var": 33146.73828125, "completion_length/incorrect": 380.1875, "completion_length/incorrect/max": 580.0, "completion_length/incorrect/median": 375.0, "completion_length/incorrect/min": 237.0, "completion_length/incorrect/p25": 267.75, "completion_length/incorrect/p75": 406.0, "completion_length/incorrect/var": 11708.8291015625, "completion_length/max": 859.0, "completion_length/median": 487.0, "completion_length/min": 227.0, "completion_length/p25": 354.5, "completion_length/p75": 646.5, "completion_length/var": 31712.396484375, "curvature_clip_ratio_token_fisher": 0.012794728390872478, "curvature_clip_ratio_token_hessian": 0.008922075852751732, "curvature_clip_ratio_total_fisher": 0.012794728390872478, "curvature_clip_ratio_total_full": 0.012794728390872478, "curvature_clip_ratio_total_hessian": 0.008922075852751732, "epoch": 0.0992, "feature_vector_variance/max_squared_error": 62477.921875, "feature_vector_variance/metric": 31220.623046875, "generated_tokens/total": 3512038.0, "global_fisher_curvature": 135168.0, "global_fisher_curvature/max": 135168.0, "global_fisher_curvature/median": 135168.0, "global_fisher_curvature/min": 135168.0, "global_fisher_curvature/p25": 135168.0, "global_fisher_curvature/p75": 135168.0, "global_fisher_curvature/p85": 135168.0, "global_fisher_curvature/p90": 135168.0, "global_fisher_curvature/p95": 135168.0, "global_fisher_curvature/p99": 135168.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.384185791015625e-06, "global_fisher_kl_divergence/max": 2.384185791015625e-06, "global_fisher_kl_divergence/median": 2.384185791015625e-06, "global_fisher_kl_divergence/min": 2.384185791015625e-06, "global_fisher_kl_divergence/p25": 2.384185791015625e-06, "global_fisher_kl_divergence/p75": 2.384185791015625e-06, "global_fisher_kl_divergence/p85": 2.384185791015625e-06, "global_fisher_kl_divergence/p90": 2.384185791015625e-06, "global_fisher_kl_divergence/p95": 2.384185791015625e-06, "global_fisher_kl_divergence/p99": 2.384185791015625e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.05126953125, "global_full_update_term/max": 0.05126953125, "global_full_update_term/median": 0.05126953125, "global_full_update_term/min": 0.05126953125, "global_full_update_term/p25": 0.05126953125, "global_full_update_term/p75": 0.05126953125, "global_full_update_term/p85": 0.05126953125, "global_full_update_term/p90": 0.05126953125, "global_full_update_term/p95": 0.05126953125, "global_full_update_term/p99": 0.05126953125, "global_full_update_term/var": NaN, "global_hessian_coeff": 11136.0, "global_hessian_coeff/max": 11136.0, "global_hessian_coeff/median": 11136.0, "global_hessian_coeff/min": 11136.0, "global_hessian_coeff/p25": 11136.0, "global_hessian_coeff/p75": 11136.0, "global_hessian_coeff/p99": 11136.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 11136.0, "global_hessian_coeff_abs/max": 11136.0, "global_hessian_coeff_abs/median": 11136.0, "global_hessian_coeff_abs/min": 11136.0, "global_hessian_coeff_abs/p25": 11136.0, "global_hessian_coeff_abs/p75": 11136.0, "global_hessian_coeff_abs/p99": 11136.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.08162228763103485, "learning_rate": 5.685585783002493e-06, "loss": -0.8333, "masked_global_fisher_curvature": 932.0, "masked_global_fisher_curvature/max": 932.0, "masked_global_fisher_curvature/median": 932.0, "masked_global_fisher_curvature/min": 932.0, "masked_global_fisher_curvature/p25": 932.0, "masked_global_fisher_curvature/p75": 932.0, "masked_global_fisher_curvature/p85": 932.0, "masked_global_fisher_curvature/p90": 932.0, "masked_global_fisher_curvature/p95": 932.0, "masked_global_fisher_curvature/p99": 932.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/max": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/median": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/min": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/p25": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/p75": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/p85": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/p90": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/p95": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/p99": 1.641456037759781e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.006378173828125, "masked_global_full_update_term/max": 0.006378173828125, "masked_global_full_update_term/median": 0.006378173828125, "masked_global_full_update_term/min": 0.006378173828125, "masked_global_full_update_term/p25": 0.006378173828125, "masked_global_full_update_term/p75": 0.006378173828125, "masked_global_full_update_term/p85": 0.006378173828125, "masked_global_full_update_term/p90": 0.006378173828125, "masked_global_full_update_term/p95": 0.006378173828125, "masked_global_full_update_term/p99": 0.006378173828125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -8192.0, "masked_global_hessian_coeff/max": -8192.0, "masked_global_hessian_coeff/median": -8192.0, "masked_global_hessian_coeff/min": -8192.0, "masked_global_hessian_coeff/p25": -8192.0, "masked_global_hessian_coeff/p75": -8192.0, "masked_global_hessian_coeff/p99": -8192.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 8192.0, "masked_global_hessian_coeff_abs/max": 8192.0, "masked_global_hessian_coeff_abs/median": 8192.0, "masked_global_hessian_coeff_abs/min": 8192.0, "masked_global_hessian_coeff_abs/p25": 8192.0, "masked_global_hessian_coeff_abs/p75": 8192.0, "masked_global_hessian_coeff_abs/p99": 8192.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 3.4075522422790527, "masked_per_sentence_gradient_norm/max": 11.75, "masked_per_sentence_gradient_norm/median": 3.546875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.341796875, "masked_per_sentence_gradient_norm/p75": 5.3828125, "masked_per_sentence_gradient_norm/var": 8.385656356811523, "masked_per_token_gradient_norm": 0.07050962746143341, "masked_per_token_gradient_norm/max": 18.75, "masked_per_token_gradient_norm/median": 2.1464074961841106e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 7.37188088351104e-14, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.8719583749771118e-07, "masked_per_token_gradient_norm/var": 0.5598970651626587, "masked_sentence_fisher_curvature": 1011.44140625, "masked_sentence_fisher_curvature/max": 4048.0, "masked_sentence_fisher_curvature/median": 916.0, "masked_sentence_fisher_curvature/min": 25.625, "masked_sentence_fisher_curvature/p25": 164.0, "masked_sentence_fisher_curvature/p75": 1460.0, "masked_sentence_fisher_curvature/p85": 1800.0, "masked_sentence_fisher_curvature/p90": 1912.0, "masked_sentence_fisher_curvature/p95": 2848.0, "masked_sentence_fisher_curvature/p99": 3516.001708984375, "masked_sentence_fisher_curvature/var": 771408.75, "masked_sentence_fisher_kl_divergence": 1.7845783517600466e-08, "masked_sentence_fisher_kl_divergence/max": 7.12461769580841e-08, "masked_sentence_fisher_kl_divergence/median": 1.618172973394394e-08, "masked_sentence_fisher_kl_divergence/min": 4.5292836148291826e-10, "masked_sentence_fisher_kl_divergence/p25": 2.8958311304450035e-09, "masked_sentence_fisher_kl_divergence/p75": 2.578599378466606e-08, "masked_sentence_fisher_kl_divergence/p85": 3.1781382858753204e-08, "masked_sentence_fisher_kl_divergence/p90": 3.3760443329811096e-08, "masked_sentence_fisher_kl_divergence/p95": 5.029141902923584e-08, "masked_sentence_fisher_kl_divergence/p99": 6.195626411908961e-08, "masked_sentence_fisher_kl_divergence/var": 2.399056975162071e-16, "masked_sentence_full_gradient_variance/max_squared_error": 19.194873809814453, "masked_sentence_full_gradient_variance/metric": 19.194873809814453, "masked_sentence_full_gradient_variance/p75": 19.194873809814453, "masked_sentence_full_gradient_variance/p90": 19.194873809814453, "masked_sentence_full_gradient_variance/p95": 19.194873809814453, "masked_sentence_full_gradient_variance/p99": 19.194873809814453, "masked_sentence_full_update_term": 0.001477191923186183, "masked_sentence_full_update_term/max": 0.004669189453125, "masked_sentence_full_update_term/median": 0.00141143798828125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 9.870529174804688e-05, "masked_sentence_full_update_term/p75": 0.0022430419921875, "masked_sentence_full_update_term/p85": 0.002716064453125, "masked_sentence_full_update_term/p90": 0.003265380859375, "masked_sentence_full_update_term/p95": 0.00412750244140625, "masked_sentence_full_update_term/p99": 0.004524231422692537, "masked_sentence_full_update_term/var": 1.6786367496024468e-06, "masked_sentence_hessian_coeff": -27418.66796875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -29184.0, "masked_sentence_hessian_coeff/min": -95232.0, "masked_sentence_hessian_coeff/p25": -38912.0, "masked_sentence_hessian_coeff/p75": -7464.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 494175680.0, "masked_sentence_hessian_coeff_abs": 27418.66796875, "masked_sentence_hessian_coeff_abs/max": 95232.0, "masked_sentence_hessian_coeff_abs/median": 28544.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 7464.0, "masked_sentence_hessian_coeff_abs/p75": 38912.0, "masked_sentence_hessian_coeff_abs/p99": 80640.046875, "masked_sentence_hessian_coeff_abs/var": 494175680.0, "masked_token_fisher_curvature": 1276.6434326171875, "masked_token_fisher_curvature/max": 552960.0, "masked_token_fisher_curvature/median": 2.0328790734103208e-19, "masked_token_fisher_curvature/min": 9.183549615799121e-41, "masked_token_fisher_curvature/p25": 1.34416849539867e-24, "masked_token_fisher_curvature/p75": 9.547918011776346e-14, "masked_token_fisher_curvature/p85": 5.213784959323675e-11, "masked_token_fisher_curvature/p90": 8.207280188798904e-09, "masked_token_fisher_curvature/p95": 0.00010788626968860626, "masked_token_fisher_curvature/p99": 8334.0, "masked_token_fisher_curvature/var": 329000160.0, "masked_token_fisher_kl_divergence": 2.253499964410821e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 3.5991778800708664e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.3697966112577316e-35, "masked_token_fisher_kl_divergence/p75": 1.6802106192483375e-24, "masked_token_fisher_kl_divergence/p85": 9.208329675305158e-22, "masked_token_fisher_kl_divergence/p90": 1.4484263398048536e-19, "masked_token_fisher_kl_divergence/p95": 1.9033711239069273e-15, "masked_token_fisher_kl_divergence/p99": 1.4698889572173357e-07, "masked_token_fisher_kl_divergence/var": 1.0254563718352719e-13, "masked_token_full_update_term": 1.8712600649450906e-05, "masked_token_full_update_term/max": 0.0042724609375, "masked_token_full_update_term/median": 8.165397611531455e-19, "masked_token_full_update_term/min": -5.0067901611328125e-06, "masked_token_full_update_term/p25": -9.595295105615121e-22, "masked_token_full_update_term/p75": 4.884981308350689e-14, "masked_token_full_update_term/p85": 4.263256414560601e-12, "masked_token_full_update_term/p90": 6.639311322942376e-11, "masked_token_full_update_term/p95": 2.3515895009040833e-08, "masked_token_full_update_term/p99": 0.000492095947265625, "masked_token_full_update_term/var": 4.0570462545019836e-08, "masked_token_hessian_coeff": -38101.95703125, "masked_token_hessian_coeff/max": 40.25, "masked_token_hessian_coeff/median": -1.0622898116707802e-09, "masked_token_hessian_coeff/min": -8290304.0, "masked_token_hessian_coeff/p25": -9.655952453613281e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.018337249755859375, "masked_token_hessian_coeff/var": 167836237824.0, "masked_token_hessian_coeff_abs": 38101.99609375, "masked_token_hessian_coeff_abs/max": 8290304.0, "masked_token_hessian_coeff_abs/median": 2.3748725652694702e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 9.606537787476555e-12, "masked_token_hessian_coeff_abs/p75": 4.220008850097656e-05, "masked_token_hessian_coeff_abs/p99": 963648.0, "masked_token_hessian_coeff_abs/var": 167836237824.0, "mean_logprobs": -0.00799560546875, "mean_logprobs/var": 3.647804260253906e-05, "num_completions/total": 5952, "per_sentence_gradient_norm": 47.43424606323242, "per_sentence_gradient_norm/max": 204.0, "per_sentence_gradient_norm/median": 35.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 10.5625, "per_sentence_gradient_norm/p75": 64.125, "per_sentence_gradient_norm/var": 2287.289794921875, "per_token_feature_norm": 190.87998962402344, "per_token_feature_norm/max": 251.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 100.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 142.5214385986328, "per_token_gradient_norm": 1.1723664999008179, "per_token_gradient_norm/max": 300.0, "per_token_gradient_norm/median": 2.5283952709287405e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 8.08242361927114e-14, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.4028122425079346e-07, "per_token_gradient_norm/var": 151.57962036132812, "per_token_policy_error_norm": 0.004613024182617664, "per_token_policy_error_norm/max": 1.9921875, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004372408613562584, "policy_entropy": 0.008680546656250954, "policy_entropy/max": 1.296875, "policy_entropy/median": 2.9467628337442875e-10, "policy_entropy/min": 2.9434394917086937e-20, "policy_entropy/p25": 7.744915819785092e-13, "policy_entropy/p75": 8.475035429000854e-08, "policy_entropy/var": 0.004459678195416927, "policy_loss": -0.8333333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.14035087823867798, "policy_sharpness": 9.74732780456543, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6311293840408325, "reward": 0.8333333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.14035087823867798, "rewards/accuracy_reward": 0.8333333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.14035087823867798, "sentence_fisher_curvature": 318173.78125, "sentence_fisher_curvature/max": 1236992.0, "sentence_fisher_curvature/median": 284672.0, "sentence_fisher_curvature/min": 25.625, "sentence_fisher_curvature/p25": 33792.0, "sentence_fisher_curvature/p75": 483328.0, "sentence_fisher_curvature/p85": 636928.0, "sentence_fisher_curvature/p90": 790528.0, "sentence_fisher_curvature/p95": 878592.0, "sentence_fisher_curvature/p99": 1054106.25, "sentence_fisher_curvature/var": 88213577728.0, "sentence_fisher_kl_divergence": 5.616411272058031e-06, "sentence_fisher_kl_divergence/max": 2.181529998779297e-05, "sentence_fisher_kl_divergence/median": 5.036592483520508e-06, "sentence_fisher_kl_divergence/min": 4.5292836148291826e-10, "sentence_fisher_kl_divergence/p25": 5.960464477539062e-07, "sentence_fisher_kl_divergence/p75": 8.52346420288086e-06, "sentence_fisher_kl_divergence/p85": 1.1220574378967285e-05, "sentence_fisher_kl_divergence/p90": 1.3947486877441406e-05, "sentence_fisher_kl_divergence/p95": 1.5527009963989258e-05, "sentence_fisher_kl_divergence/p99": 1.8644343072082847e-05, "sentence_fisher_kl_divergence/var": 2.7508979469348382e-11, "sentence_full_gradient_variance/max_squared_error": 4427.9716796875, "sentence_full_gradient_variance/metric": 4427.9716796875, "sentence_full_gradient_variance/p75": 4427.9716796875, "sentence_full_gradient_variance/p90": 4427.9716796875, "sentence_full_gradient_variance/p95": 4427.9716796875, "sentence_full_gradient_variance/p99": 4427.9716796875, "sentence_full_update_term": 0.023213624954223633, "sentence_full_update_term/max": 0.099609375, "sentence_full_update_term/median": 0.020751953125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0047607421875, "sentence_full_update_term/p75": 0.0361328125, "sentence_full_update_term/p85": 0.04193115234375, "sentence_full_update_term/p90": 0.04736328125, "sentence_full_update_term/p95": 0.05694580078125, "sentence_full_update_term/p99": 0.09450685232877731, "sentence_full_update_term/var": 0.00045010834583081305, "sentence_hessian_coeff": 17120.66796875, "sentence_hessian_coeff/max": 798720.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -382976.0, "sentence_hessian_coeff/p25": -85504.0, "sentence_hessian_coeff/p75": 81792.0, "sentence_hessian_coeff/p99": 666419.625, "sentence_hessian_coeff/var": 34732826624.0, "sentence_hessian_coeff_abs": 124657.3359375, "sentence_hessian_coeff_abs/max": 798720.0, "sentence_hessian_coeff_abs/median": 84992.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 36736.0, "sentence_hessian_coeff_abs/p75": 187136.0, "sentence_hessian_coeff_abs/p99": 666419.625, "sentence_hessian_coeff_abs/var": 19326009344.0, "step": 62, "token_fisher_curvature": 345774.3125, "token_fisher_curvature/max": 170917888.0, "token_fisher_curvature/median": 2.930733997499879e-19, "token_fisher_curvature/min": 9.183549615799121e-41, "token_fisher_curvature/p25": 1.5315766029302153e-24, "token_fisher_curvature/p75": 1.603162047558726e-13, "token_fisher_curvature/p85": 1.4097167877480388e-10, "token_fisher_curvature/p90": 4.1211023926734924e-08, "token_fisher_curvature/p95": 0.0067138671875, "token_fisher_curvature/p99": 1581056.0, "token_fisher_curvature/var": 27604933410816.0, "token_fisher_kl_divergence": 6.10179176874226e-06, "token_fisher_kl_divergence/max": 0.003021240234375, "token_fisher_kl_divergence/median": 5.17689969051289e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.7083389842945504e-35, "token_fisher_kl_divergence/p75": 2.824046310044321e-24, "token_fisher_kl_divergence/p85": 2.4881592825595072e-21, "token_fisher_kl_divergence/p90": 7.284483346386983e-19, "token_fisher_kl_divergence/p95": 1.1812772982011666e-13, "token_fisher_kl_divergence/p99": 2.7894973754882812e-05, "token_fisher_kl_divergence/var": 8.595524292331902e-09, "token_full_update_term": 0.000329910108121112, "token_full_update_term/max": 0.08056640625, "token_full_update_term/median": 1.0977546996415732e-18, "token_full_update_term/min": -5.0067901611328125e-06, "token_full_update_term/p25": -1.2924697071141057e-22, "token_full_update_term/p75": 7.09432512735475e-14, "token_full_update_term/p85": 7.247535904753022e-12, "token_full_update_term/p90": 1.7462298274040222e-10, "token_full_update_term/p95": 1.7429556464776397e-07, "token_full_update_term/p99": 0.007221341133117676, "token_full_update_term/var": 1.2372146557027008e-05, "token_hessian_coeff": -7061.22607421875, "token_hessian_coeff/max": 170917888.0, "token_hessian_coeff/median": -1.229636836796999e-09, "token_hessian_coeff/min": -21495808.0, "token_hessian_coeff/p25": -1.233816146850586e-05, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.04544544219970703, "token_hessian_coeff/var": 18942846304256.0, "token_hessian_coeff_abs": 345524.1875, "token_hessian_coeff_abs/max": 170917888.0, "token_hessian_coeff_abs/median": 2.8638169169425964e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 1.0686562745831907e-11, "token_hessian_coeff_abs/p75": 5.793571472167969e-05, "token_hessian_coeff_abs/p99": 9716224.0, "token_hessian_coeff_abs/var": 18823512064000.0 }, { "accuracy_reward": 0.5520833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24989034235477448, "adam_stats/lm_head/lr_effective_max": 2.375495751039125e-05, "adam_stats/lm_head/lr_effective_mean": -4.0809012419318336e-11, "adam_stats/lm_head/lr_effective_min": -2.5084731532842852e-05, "adam_stats/lm_head/lr_effective_std": 6.355933237500722e-07, "adam_stats/lr_effective_max": 2.8207492505316623e-05, "adam_stats/lr_effective_mean": -1.3198045087370325e-11, "adam_stats/lr_effective_min": -2.8502550776465796e-05, "adam_stats/m_t_max": 0.0006376929231919348, "adam_stats/m_t_mean": -8.606467860432232e-13, "adam_stats/m_t_min": -0.000600363768171519, "adam_stats/v_t_max": 2.5249935788451694e-05, "adam_stats/v_t_mean": 1.7194937964801205e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5520833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24989034235477448, "all_logprobs": -0.01019959431141615, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.6875, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00012302398681640625, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01590001955628395, "clip_ratio": 0.0, "completion_length": 563.9375, "completion_length/correct": 464.0377502441406, "completion_length/correct/max": 937.0, "completion_length/correct/median": 389.0, "completion_length/correct/min": 272.0, "completion_length/correct/p25": 352.0, "completion_length/correct/p75": 513.0, "completion_length/correct/var": 32032.845703125, "completion_length/incorrect": 687.0697631835938, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 735.0, "completion_length/incorrect/min": 291.0, "completion_length/incorrect/p25": 337.0, "completion_length/incorrect/p75": 981.0, "completion_length/incorrect/var": 82105.4140625, "completion_length/max": 1024.0, "completion_length/median": 475.0, "completion_length/min": 272.0, "completion_length/p25": 346.5, "completion_length/p75": 791.0, "completion_length/var": 66263.359375, "curvature_clip_ratio_token_fisher": 0.007757951971143484, "curvature_clip_ratio_token_hessian": 0.0055413939990103245, "curvature_clip_ratio_total_fisher": 0.007757951971143484, "curvature_clip_ratio_total_full": 0.007757951971143484, "curvature_clip_ratio_total_hessian": 0.0055413939990103245, "epoch": 0.1008, "feature_vector_variance/max_squared_error": 69270.6796875, "feature_vector_variance/metric": 31329.08984375, "generated_tokens/total": 3566176.0, "global_fisher_curvature": 137216.0, "global_fisher_curvature/max": 137216.0, "global_fisher_curvature/median": 137216.0, "global_fisher_curvature/min": 137216.0, "global_fisher_curvature/p25": 137216.0, "global_fisher_curvature/p75": 137216.0, "global_fisher_curvature/p85": 137216.0, "global_fisher_curvature/p90": 137216.0, "global_fisher_curvature/p95": 137216.0, "global_fisher_curvature/p99": 137216.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.2202730178833008e-06, "global_fisher_kl_divergence/max": 2.2202730178833008e-06, "global_fisher_kl_divergence/median": 2.2202730178833008e-06, "global_fisher_kl_divergence/min": 2.2202730178833008e-06, "global_fisher_kl_divergence/p25": 2.2202730178833008e-06, "global_fisher_kl_divergence/p75": 2.2202730178833008e-06, "global_fisher_kl_divergence/p85": 2.2202730178833008e-06, "global_fisher_kl_divergence/p90": 2.2202730178833008e-06, "global_fisher_kl_divergence/p95": 2.2202730178833008e-06, "global_fisher_kl_divergence/p99": 2.2202730178833008e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.046630859375, "global_full_update_term/max": 0.046630859375, "global_full_update_term/median": 0.046630859375, "global_full_update_term/min": 0.046630859375, "global_full_update_term/p25": 0.046630859375, "global_full_update_term/p75": 0.046630859375, "global_full_update_term/p85": 0.046630859375, "global_full_update_term/p90": 0.046630859375, "global_full_update_term/p95": 0.046630859375, "global_full_update_term/p99": 0.046630859375, "global_full_update_term/var": NaN, "global_hessian_coeff": 42496.0, "global_hessian_coeff/max": 42496.0, "global_hessian_coeff/median": 42496.0, "global_hessian_coeff/min": 42496.0, "global_hessian_coeff/p25": 42496.0, "global_hessian_coeff/p75": 42496.0, "global_hessian_coeff/p99": 42496.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 42496.0, "global_hessian_coeff_abs/max": 42496.0, "global_hessian_coeff_abs/median": 42496.0, "global_hessian_coeff_abs/min": 42496.0, "global_hessian_coeff_abs/p25": 42496.0, "global_hessian_coeff_abs/p75": 42496.0, "global_hessian_coeff_abs/p99": 42496.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.08478718250989914, "learning_rate": 5.432719831372507e-06, "loss": -0.5521, "masked_global_fisher_curvature": 828.0, "masked_global_fisher_curvature/max": 828.0, "masked_global_fisher_curvature/median": 828.0, "masked_global_fisher_curvature/min": 828.0, "masked_global_fisher_curvature/p25": 828.0, "masked_global_fisher_curvature/p75": 828.0, "masked_global_fisher_curvature/p85": 828.0, "masked_global_fisher_curvature/p90": 828.0, "masked_global_fisher_curvature/p95": 828.0, "masked_global_fisher_curvature/p99": 828.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/max": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/median": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/min": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/p25": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/p75": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/p85": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/p90": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/p95": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/p99": 1.3387762010097504e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.003387451171875, "masked_global_full_update_term/max": 0.003387451171875, "masked_global_full_update_term/median": 0.003387451171875, "masked_global_full_update_term/min": 0.003387451171875, "masked_global_full_update_term/p25": 0.003387451171875, "masked_global_full_update_term/p75": 0.003387451171875, "masked_global_full_update_term/p85": 0.003387451171875, "masked_global_full_update_term/p90": 0.003387451171875, "masked_global_full_update_term/p95": 0.003387451171875, "masked_global_full_update_term/p99": 0.003387451171875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -6368.0, "masked_global_hessian_coeff/max": -6368.0, "masked_global_hessian_coeff/median": -6368.0, "masked_global_hessian_coeff/min": -6368.0, "masked_global_hessian_coeff/p25": -6368.0, "masked_global_hessian_coeff/p75": -6368.0, "masked_global_hessian_coeff/p99": -6368.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 6368.0, "masked_global_hessian_coeff_abs/max": 6368.0, "masked_global_hessian_coeff_abs/median": 6368.0, "masked_global_hessian_coeff_abs/min": 6368.0, "masked_global_hessian_coeff_abs/p25": 6368.0, "masked_global_hessian_coeff_abs/p75": 6368.0, "masked_global_hessian_coeff_abs/p99": 6368.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.82452392578125, "masked_per_sentence_gradient_norm/max": 7.09375, "masked_per_sentence_gradient_norm/median": 0.7109375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 2.66015625, "masked_per_sentence_gradient_norm/var": 5.991543292999268, "masked_per_token_gradient_norm": 0.04197048395872116, "masked_per_token_gradient_norm/max": 18.5, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.724401954561472e-09, "masked_per_token_gradient_norm/var": 0.37957340478897095, "masked_sentence_fisher_curvature": 1131.0689697265625, "masked_sentence_fisher_curvature/max": 3264.0, "masked_sentence_fisher_curvature/median": 1012.0, "masked_sentence_fisher_curvature/min": 0.1181640625, "masked_sentence_fisher_curvature/p25": 300.5, "masked_sentence_fisher_curvature/p75": 1674.0, "masked_sentence_fisher_curvature/p85": 2172.0, "masked_sentence_fisher_curvature/p90": 2720.0, "masked_sentence_fisher_curvature/p95": 3188.0, "masked_sentence_fisher_curvature/p99": 3203.2001953125, "masked_sentence_fisher_curvature/var": 885971.4375, "masked_sentence_fisher_kl_divergence": 1.8276772095759952e-08, "masked_sentence_fisher_kl_divergence/max": 5.2852556109428406e-08, "masked_sentence_fisher_kl_divergence/median": 1.641456037759781e-08, "masked_sentence_fisher_kl_divergence/min": 1.9042545318370685e-12, "masked_sentence_fisher_kl_divergence/p25": 4.8603396862745285e-09, "masked_sentence_fisher_kl_divergence/p75": 2.703745849430561e-08, "masked_sentence_fisher_kl_divergence/p85": 3.504101186990738e-08, "masked_sentence_fisher_kl_divergence/p90": 4.3888576328754425e-08, "masked_sentence_fisher_kl_divergence/p95": 5.1513779908418655e-08, "masked_sentence_fisher_kl_divergence/p99": 5.1746614104786204e-08, "masked_sentence_fisher_kl_divergence/var": 2.312960840634e-16, "masked_sentence_full_gradient_variance/max_squared_error": 8.873252868652344, "masked_sentence_full_gradient_variance/metric": 8.873252868652344, "masked_sentence_full_gradient_variance/p75": 8.873252868652344, "masked_sentence_full_gradient_variance/p90": 8.873252868652344, "masked_sentence_full_gradient_variance/p95": 8.873252868652344, "masked_sentence_full_gradient_variance/p99": 8.873252868652344, "masked_sentence_full_update_term": 0.0007318159332498908, "masked_sentence_full_update_term/max": 0.004425048828125, "masked_sentence_full_update_term/median": 0.0001506805419921875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0011081695556640625, "masked_sentence_full_update_term/p85": 0.0019741058349609375, "masked_sentence_full_update_term/p90": 0.0024566650390625, "masked_sentence_full_update_term/p95": 0.002655029296875, "masked_sentence_full_update_term/p99": 0.0036567712668329477, "masked_sentence_full_update_term/var": 1.0937761771856458e-06, "masked_sentence_hessian_coeff": -19748.66796875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -4352.0, "masked_sentence_hessian_coeff/min": -94208.0, "masked_sentence_hessian_coeff/p25": -40768.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 583663296.0, "masked_sentence_hessian_coeff_abs": 19748.66796875, "masked_sentence_hessian_coeff_abs/max": 94208.0, "masked_sentence_hessian_coeff_abs/median": 4352.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 40768.0, "masked_sentence_hessian_coeff_abs/p99": 70860.875, "masked_sentence_hessian_coeff_abs/var": 583663296.0, "masked_token_fisher_curvature": 1206.5357666015625, "masked_token_fisher_curvature/max": 614400.0, "masked_token_fisher_curvature/median": 2.9002408113987244e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.4889251025954498e-23, "masked_token_fisher_curvature/p75": 4.547473508864641e-13, "masked_token_fisher_curvature/p85": 2.8194335754960775e-10, "masked_token_fisher_curvature/p90": 3.073364496231079e-08, "masked_token_fisher_curvature/p95": 0.00016473978757858276, "masked_token_fisher_curvature/p99": 4394.5, "masked_token_fisher_curvature/var": 380876928.0, "masked_token_fisher_kl_divergence": 1.9492116365427137e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 4.69372238606502e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.407412430484045e-34, "masked_token_fisher_kl_divergence/p75": 7.34122793640812e-24, "masked_token_fisher_kl_divergence/p85": 4.552802091491864e-21, "masked_token_fisher_kl_divergence/p90": 4.980553729855286e-19, "masked_token_fisher_kl_divergence/p95": 2.663017376058896e-15, "masked_token_fisher_kl_divergence/p99": 7.093331078067422e-08, "masked_token_fisher_kl_divergence/var": 9.939389534942972e-14, "masked_token_full_update_term": 1.0758561984403059e-05, "masked_token_full_update_term/max": 0.00433349609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.1441450119018555e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.463672932855431e-17, "masked_token_full_update_term/p85": 4.574118861455645e-14, "masked_token_full_update_term/p90": 1.5297763056310032e-12, "masked_token_full_update_term/p95": 2.346496330574155e-10, "masked_token_full_update_term/p99": 2.3126602172851562e-05, "masked_token_full_update_term/var": 2.56419312449907e-08, "masked_token_hessian_coeff": -22185.96875, "masked_token_hessian_coeff/max": 780.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -9175040.0, "masked_token_hessian_coeff/p25": -4.940375220030546e-09, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.002187013626098633, "masked_token_hessian_coeff/var": 108251160576.0, "masked_token_hessian_coeff_abs": 22186.064453125, "masked_token_hessian_coeff_abs/max": 9175040.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.1047890186309814e-07, "masked_token_hessian_coeff_abs/p99": 22272.0, "masked_token_hessian_coeff_abs/var": 108251152384.0, "mean_logprobs": -0.009765625, "mean_logprobs/var": 5.316734313964844e-05, "num_completions/total": 6048, "per_sentence_gradient_norm": 38.916015625, "per_sentence_gradient_norm/max": 237.0, "per_sentence_gradient_norm/median": 14.875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 55.6875, "per_sentence_gradient_norm/var": 2876.170166015625, "per_token_feature_norm": 190.290283203125, "per_token_feature_norm/max": 282.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 104.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 149.97470092773438, "per_token_gradient_norm": 0.7504942417144775, "per_token_gradient_norm/max": 280.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.2264430299401283e-09, "per_token_gradient_norm/var": 104.25823974609375, "per_token_policy_error_norm": 0.0060340226627886295, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0058682202361524105, "policy_entropy": 0.010478752665221691, "policy_entropy/max": 2.390625, "policy_entropy/median": 9.276845958083868e-10, "policy_entropy/min": 6.054962083888163e-22, "policy_entropy/p25": 3.069544618483633e-12, "policy_entropy/p75": 1.6111880540847778e-07, "policy_entropy/var": 0.005875566974282265, "policy_loss": -0.5520833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24989034235477448, "policy_sharpness": 9.718016624450684, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8607704639434814, "reward": 0.5520833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24989034235477448, "rewards/accuracy_reward": 0.5520833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24989034235477448, "sentence_fisher_curvature": 267682.71875, "sentence_fisher_curvature/max": 1540096.0, "sentence_fisher_curvature/median": 19200.0, "sentence_fisher_curvature/min": 177.0, "sentence_fisher_curvature/p25": 1466.0, "sentence_fisher_curvature/p75": 503808.0, "sentence_fisher_curvature/p85": 684032.0, "sentence_fisher_curvature/p90": 747520.0, "sentence_fisher_curvature/p95": 866304.0, "sentence_fisher_curvature/p99": 1228801.0, "sentence_fisher_curvature/var": 115518251008.0, "sentence_fisher_kl_divergence": 4.325912868807791e-06, "sentence_fisher_kl_divergence/max": 2.491474151611328e-05, "sentence_fisher_kl_divergence/median": 3.110617399215698e-07, "sentence_fisher_kl_divergence/min": 2.86672729998827e-09, "sentence_fisher_kl_divergence/p25": 2.36614141613245e-08, "sentence_fisher_kl_divergence/p75": 8.165836334228516e-06, "sentence_fisher_kl_divergence/p85": 1.1026859283447266e-05, "sentence_fisher_kl_divergence/p90": 1.2069940567016602e-05, "sentence_fisher_kl_divergence/p95": 1.399219036102295e-05, "sentence_fisher_kl_divergence/p99": 1.9818560758722015e-05, "sentence_fisher_kl_divergence/var": 3.016748656436974e-11, "sentence_full_gradient_variance/max_squared_error": 4252.666015625, "sentence_full_gradient_variance/metric": 4252.666015625, "sentence_full_gradient_variance/p75": 4252.666015625, "sentence_full_gradient_variance/p90": 4252.666015625, "sentence_full_gradient_variance/p95": 4252.666015625, "sentence_full_gradient_variance/p99": 4252.666015625, "sentence_full_update_term": 0.017236709594726562, "sentence_full_update_term/max": 0.09521484375, "sentence_full_update_term/median": 0.00433349609375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.032470703125, "sentence_full_update_term/p85": 0.0374755859375, "sentence_full_update_term/p90": 0.046142578125, "sentence_full_update_term/p95": 0.0753173828125, "sentence_full_update_term/p99": 0.09104005247354507, "sentence_full_update_term/var": 0.0005838855868205428, "sentence_hessian_coeff": 40585.16796875, "sentence_hessian_coeff/max": 1204224.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -276480.0, "sentence_hessian_coeff/p25": -3484.0, "sentence_hessian_coeff/p75": 51456.0, "sentence_hessian_coeff/p99": 640001.8125, "sentence_hessian_coeff/var": 36761014272.0, "sentence_hessian_coeff_abs": 97994.8359375, "sentence_hessian_coeff_abs/max": 1204224.0, "sentence_hessian_coeff_abs/median": 28032.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 172544.0, "sentence_hessian_coeff_abs/p99": 640001.8125, "sentence_hessian_coeff_abs/var": 28721436672.0, "step": 63, "token_fisher_curvature": 248763.0, "token_fisher_curvature/max": 177209344.0, "token_fisher_curvature/median": 3.63207727782644e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.6543612251060553e-23, "token_fisher_curvature/p75": 6.288303211476887e-13, "token_fisher_curvature/p85": 4.725961844087578e-10, "token_fisher_curvature/p90": 6.845220923423767e-08, "token_fisher_curvature/p95": 0.0014911293983459473, "token_fisher_curvature/p99": 175516.0, "token_fisher_curvature/var": 20624990797824.0, "token_fisher_kl_divergence": 4.020297637907788e-06, "token_fisher_kl_divergence/max": 0.00286865234375, "token_fisher_kl_divergence/median": 5.877013743896538e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.6782463289135e-34, "token_fisher_kl_divergence/p75": 1.0184661292059153e-23, "token_fisher_kl_divergence/p85": 7.642321679377423e-21, "token_fisher_kl_divergence/p90": 1.1045309632196076e-18, "token_fisher_kl_divergence/p95": 2.4141279253431236e-14, "token_fisher_kl_divergence/p99": 2.8316862881183624e-06, "token_fisher_kl_divergence/var": 5.3885025330657754e-09, "token_full_update_term": 0.00020802243670914322, "token_full_update_term/max": 0.07861328125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.1441450119018555e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.5614276324970042e-17, "token_full_update_term/p85": 6.838973831690964e-14, "token_full_update_term/p90": 2.7569058147491887e-12, "token_full_update_term/p95": 6.548361852765083e-10, "token_full_update_term/p99": 0.0015385448932647705, "token_full_update_term/var": 8.166619409166742e-06, "token_hessian_coeff": 31476.107421875, "token_hessian_coeff/max": 177209344.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -22151168.0, "token_hessian_coeff/p25": -5.995389074087143e-09, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.00750732421875, "token_hessian_coeff/var": 13182044209152.0, "token_hessian_coeff_abs": 224271.5, "token_hessian_coeff_abs/max": 177209344.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.812594175338745e-07, "token_hessian_coeff_abs/p99": 2719744.0, "token_hessian_coeff_abs/var": 13132737019904.0 }, { "accuracy_reward": 0.65625, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2279605269432068, "adam_stats/lm_head/lr_effective_max": 2.5485645892331377e-05, "adam_stats/lm_head/lr_effective_mean": -4.7771349376279915e-11, "adam_stats/lm_head/lr_effective_min": -2.405462510068901e-05, "adam_stats/lm_head/lr_effective_std": 6.33925139936764e-07, "adam_stats/lr_effective_max": 2.7934838726650923e-05, "adam_stats/lr_effective_mean": -7.372501914515439e-12, "adam_stats/lr_effective_min": -2.8736380045302212e-05, "adam_stats/m_t_max": 0.0005515887751244009, "adam_stats/m_t_mean": -1.2335175198982529e-12, "adam_stats/m_t_min": -0.00039843725971877575, "adam_stats/v_t_max": 2.522644899727311e-05, "adam_stats/v_t_mean": 1.7208867794313298e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.65625, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2279605269432068, "all_logprobs": -0.012177936732769012, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.96875, "all_logprobs/p1": -0.251953125, "all_logprobs/p10": -1.0728836059570312e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0001074313186109066, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.021289445459842682, "clip_ratio": 0.0, "completion_length": 541.3333740234375, "completion_length/correct": 474.3492431640625, "completion_length/correct/max": 1022.0, "completion_length/correct/median": 420.0, "completion_length/correct/min": 212.0, "completion_length/correct/p25": 340.0, "completion_length/correct/p75": 602.0, "completion_length/correct/var": 40068.29296875, "completion_length/incorrect": 669.212158203125, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 722.0, "completion_length/incorrect/min": 306.0, "completion_length/incorrect/p25": 515.0, "completion_length/incorrect/p75": 784.0, "completion_length/incorrect/var": 53823.85546875, "completion_length/max": 1024.0, "completion_length/median": 445.0, "completion_length/min": 212.0, "completion_length/p25": 348.0, "completion_length/p75": 722.0, "completion_length/var": 52935.99609375, "curvature_clip_ratio_token_fisher": 0.00902478490024805, "curvature_clip_ratio_token_hessian": 0.006619458086788654, "curvature_clip_ratio_total_fisher": 0.00902478490024805, "curvature_clip_ratio_total_full": 0.00902478490024805, "curvature_clip_ratio_total_hessian": 0.006619458086788654, "epoch": 0.1024, "feature_vector_variance/max_squared_error": 60491.55859375, "feature_vector_variance/metric": 31154.0, "generated_tokens/total": 3618144.0, "global_fisher_curvature": 148480.0, "global_fisher_curvature/max": 148480.0, "global_fisher_curvature/median": 148480.0, "global_fisher_curvature/min": 148480.0, "global_fisher_curvature/p25": 148480.0, "global_fisher_curvature/p75": 148480.0, "global_fisher_curvature/p85": 148480.0, "global_fisher_curvature/p90": 148480.0, "global_fisher_curvature/p95": 148480.0, "global_fisher_curvature/p99": 148480.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.1904706954956055e-06, "global_fisher_kl_divergence/max": 2.1904706954956055e-06, "global_fisher_kl_divergence/median": 2.1904706954956055e-06, "global_fisher_kl_divergence/min": 2.1904706954956055e-06, "global_fisher_kl_divergence/p25": 2.1904706954956055e-06, "global_fisher_kl_divergence/p75": 2.1904706954956055e-06, "global_fisher_kl_divergence/p85": 2.1904706954956055e-06, "global_fisher_kl_divergence/p90": 2.1904706954956055e-06, "global_fisher_kl_divergence/p95": 2.1904706954956055e-06, "global_fisher_kl_divergence/p99": 2.1904706954956055e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.05419921875, "global_full_update_term/max": 0.05419921875, "global_full_update_term/median": 0.05419921875, "global_full_update_term/min": 0.05419921875, "global_full_update_term/p25": 0.05419921875, "global_full_update_term/p75": 0.05419921875, "global_full_update_term/p85": 0.05419921875, "global_full_update_term/p90": 0.05419921875, "global_full_update_term/p95": 0.05419921875, "global_full_update_term/p99": 0.05419921875, "global_full_update_term/var": NaN, "global_hessian_coeff": 55552.0, "global_hessian_coeff/max": 55552.0, "global_hessian_coeff/median": 55552.0, "global_hessian_coeff/min": 55552.0, "global_hessian_coeff/p25": 55552.0, "global_hessian_coeff/p75": 55552.0, "global_hessian_coeff/p99": 55552.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 55552.0, "global_hessian_coeff_abs/max": 55552.0, "global_hessian_coeff_abs/median": 55552.0, "global_hessian_coeff_abs/min": 55552.0, "global_hessian_coeff_abs/p25": 55552.0, "global_hessian_coeff_abs/p75": 55552.0, "global_hessian_coeff_abs/p99": 55552.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.10659752041101456, "learning_rate": 5.182372542187895e-06, "loss": -0.6562, "masked_global_fisher_curvature": 498.0, "masked_global_fisher_curvature/max": 498.0, "masked_global_fisher_curvature/median": 498.0, "masked_global_fisher_curvature/min": 498.0, "masked_global_fisher_curvature/p25": 498.0, "masked_global_fisher_curvature/p75": 498.0, "masked_global_fisher_curvature/p85": 498.0, "masked_global_fisher_curvature/p90": 498.0, "masked_global_fisher_curvature/p95": 498.0, "masked_global_fisher_curvature/p99": 498.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/max": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/median": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/min": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/p25": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/p75": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/p85": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/p90": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/p95": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/p99": 7.363269105553627e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00189208984375, "masked_global_full_update_term/max": 0.00189208984375, "masked_global_full_update_term/median": 0.00189208984375, "masked_global_full_update_term/min": 0.00189208984375, "masked_global_full_update_term/p25": 0.00189208984375, "masked_global_full_update_term/p75": 0.00189208984375, "masked_global_full_update_term/p85": 0.00189208984375, "masked_global_full_update_term/p90": 0.00189208984375, "masked_global_full_update_term/p95": 0.00189208984375, "masked_global_full_update_term/p99": 0.00189208984375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -5920.0, "masked_global_hessian_coeff/max": -5920.0, "masked_global_hessian_coeff/median": -5920.0, "masked_global_hessian_coeff/min": -5920.0, "masked_global_hessian_coeff/p25": -5920.0, "masked_global_hessian_coeff/p75": -5920.0, "masked_global_hessian_coeff/p99": -5920.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 5920.0, "masked_global_hessian_coeff_abs/max": 5920.0, "masked_global_hessian_coeff_abs/median": 5920.0, "masked_global_hessian_coeff_abs/min": 5920.0, "masked_global_hessian_coeff_abs/p25": 5920.0, "masked_global_hessian_coeff_abs/p75": 5920.0, "masked_global_hessian_coeff_abs/p99": 5920.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.5440673828125, "masked_per_sentence_gradient_norm/max": 9.625, "masked_per_sentence_gradient_norm/median": 1.484375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.84765625, "masked_per_sentence_gradient_norm/var": 8.315162658691406, "masked_per_token_gradient_norm": 0.051407162100076675, "masked_per_token_gradient_norm/max": 17.0, "masked_per_token_gradient_norm/median": 2.7200464103316335e-14, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 6.577465683221817e-09, "masked_per_token_gradient_norm/var": 0.46327370405197144, "masked_sentence_fisher_curvature": 1149.9427490234375, "masked_sentence_fisher_curvature/max": 5056.0, "masked_sentence_fisher_curvature/median": 980.0, "masked_sentence_fisher_curvature/min": 30.5, "masked_sentence_fisher_curvature/p25": 347.0, "masked_sentence_fisher_curvature/p75": 1764.0, "masked_sentence_fisher_curvature/p85": 1904.0, "masked_sentence_fisher_curvature/p90": 2320.0, "masked_sentence_fisher_curvature/p95": 2636.0, "masked_sentence_fisher_curvature/p99": 5056.0, "masked_sentence_fisher_curvature/var": 1069621.875, "masked_sentence_fisher_kl_divergence": 1.6957661941319202e-08, "masked_sentence_fisher_kl_divergence/max": 7.450580596923828e-08, "masked_sentence_fisher_kl_divergence/median": 1.4435499906539917e-08, "masked_sentence_fisher_kl_divergence/min": 4.4929038267582655e-10, "masked_sentence_fisher_kl_divergence/p25": 5.107722245156765e-09, "masked_sentence_fisher_kl_divergence/p75": 2.601882442831993e-08, "masked_sentence_fisher_kl_divergence/p85": 2.805609256029129e-08, "masked_sentence_fisher_kl_divergence/p90": 3.4226104617118835e-08, "masked_sentence_fisher_kl_divergence/p95": 3.882450982928276e-08, "masked_sentence_fisher_kl_divergence/p99": 7.450580596923828e-08, "masked_sentence_fisher_kl_divergence/var": 2.3235950745889816e-16, "masked_sentence_full_gradient_variance/max_squared_error": 14.044576644897461, "masked_sentence_full_gradient_variance/metric": 14.044576644897461, "masked_sentence_full_gradient_variance/p75": 14.044576644897461, "masked_sentence_full_gradient_variance/p90": 14.044576644897461, "masked_sentence_full_gradient_variance/p95": 14.044576644897461, "masked_sentence_full_gradient_variance/p99": 14.044576644897461, "masked_sentence_full_update_term": 0.0008904337882995605, "masked_sentence_full_update_term/max": 0.0032806396484375, "masked_sentence_full_update_term/median": 0.000499725341796875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0013103485107421875, "masked_sentence_full_update_term/p85": 0.002155303955078125, "masked_sentence_full_update_term/p90": 0.00261688232421875, "masked_sentence_full_update_term/p95": 0.0028533935546875, "masked_sentence_full_update_term/p99": 0.0032806396484375, "masked_sentence_full_update_term/var": 9.745747320266673e-07, "masked_sentence_hessian_coeff": -24220.5, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -24320.0, "masked_sentence_hessian_coeff/min": -102912.0, "masked_sentence_hessian_coeff/p25": -40448.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 613504320.0, "masked_sentence_hessian_coeff_abs": 24220.5, "masked_sentence_hessian_coeff_abs/max": 102912.0, "masked_sentence_hessian_coeff_abs/median": 23552.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 40448.0, "masked_sentence_hessian_coeff_abs/p99": 102912.0, "masked_sentence_hessian_coeff_abs/var": 613504320.0, "masked_token_fisher_curvature": 1271.22314453125, "masked_token_fisher_curvature/max": 671744.0, "masked_token_fisher_curvature/median": 2.591920818598159e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 5.719178453979918e-25, "masked_token_fisher_curvature/p75": 1.5187850976872141e-13, "masked_token_fisher_curvature/p85": 1.2187229003757238e-10, "masked_token_fisher_curvature/p90": 1.4668330550193787e-08, "masked_token_fisher_curvature/p95": 0.0001850128173828125, "masked_token_fisher_curvature/p99": 6880.0, "masked_token_fisher_curvature/var": 415213216.0, "masked_token_fisher_kl_divergence": 1.875375588156203e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 3.821045009664276e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 8.416539551887579e-36, "masked_token_fisher_kl_divergence/p75": 2.235972593307403e-24, "masked_token_fisher_kl_divergence/p85": 1.7999450129153882e-21, "masked_token_fisher_kl_divergence/p90": 2.168404344971009e-19, "masked_token_fisher_kl_divergence/p95": 2.733924198139448e-15, "masked_token_fisher_kl_divergence/p99": 1.0151416063308716e-07, "masked_token_fisher_kl_divergence/var": 9.03563722866145e-14, "masked_token_full_update_term": 1.275637714570621e-05, "masked_token_full_update_term/max": 0.00433349609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.3676624298095703e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.0878077872387166e-16, "masked_token_full_update_term/p85": 1.376676550535194e-13, "masked_token_full_update_term/p90": 4.575895218295045e-12, "masked_token_full_update_term/p95": 5.20230969414115e-10, "masked_token_full_update_term/p99": 5.030632019042969e-05, "masked_token_full_update_term/var": 2.9505958565323454e-08, "masked_token_hessian_coeff": -28390.154296875, "masked_token_hessian_coeff/max": 556.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -9437184.0, "masked_token_hessian_coeff/p25": -5.3783878684043884e-08, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.0052490234375, "masked_token_hessian_coeff/var": 145869914112.0, "masked_token_hessian_coeff_abs": 28390.234375, "masked_token_hessian_coeff_abs/max": 9437184.0, "masked_token_hessian_coeff_abs/median": 4.803268893738277e-12, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 7.748603820800781e-07, "masked_token_hessian_coeff_abs/p99": 65280.0, "masked_token_hessian_coeff_abs/var": 145869914112.0, "mean_logprobs": -0.012451171875, "mean_logprobs/var": 0.00016880035400390625, "num_completions/total": 6144, "per_sentence_gradient_norm": 40.31022262573242, "per_sentence_gradient_norm/max": 209.0, "per_sentence_gradient_norm/median": 21.875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 58.5625, "per_sentence_gradient_norm/var": 2696.8505859375, "per_token_feature_norm": 190.7696075439453, "per_token_feature_norm/max": 264.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 69.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 128.67935180664062, "per_token_gradient_norm": 0.9177326560020447, "per_token_gradient_norm/max": 284.0, "per_token_gradient_norm/median": 3.930189507173054e-14, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 8.32369551062584e-09, "per_token_gradient_norm/var": 131.20785522460938, "per_token_policy_error_norm": 0.006897310726344585, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006809816230088472, "policy_entropy": 0.011889584362506866, "policy_entropy/max": 2.390625, "policy_entropy/median": 3.54702933691442e-10, "policy_entropy/min": 1.343341314786117e-21, "policy_entropy/p25": 5.222489107836736e-13, "policy_entropy/p75": 1.0803341865539551e-07, "policy_entropy/var": 0.008839093148708344, "policy_loss": -0.65625, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2279605269432068, "policy_sharpness": 9.719078063964844, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8746354579925537, "reward": 0.65625, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2279605269432068, "rewards/accuracy_reward": 0.65625, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2279605269432068, "sentence_fisher_curvature": 356459.0625, "sentence_fisher_curvature/max": 2654208.0, "sentence_fisher_curvature/median": 114688.0, "sentence_fisher_curvature/min": 78.0, "sentence_fisher_curvature/p25": 1998.0, "sentence_fisher_curvature/p75": 574464.0, "sentence_fisher_curvature/p85": 817152.0, "sentence_fisher_curvature/p90": 991232.0, "sentence_fisher_curvature/p95": 1220608.0, "sentence_fisher_curvature/p99": 2171700.75, "sentence_fisher_curvature/var": 262811762688.0, "sentence_fisher_kl_divergence": 5.258816145214951e-06, "sentence_fisher_kl_divergence/max": 3.910064697265625e-05, "sentence_fisher_kl_divergence/median": 1.691281795501709e-06, "sentence_fisher_kl_divergence/min": 1.1496013030409813e-09, "sentence_fisher_kl_divergence/p25": 2.9423972591757774e-08, "sentence_fisher_kl_divergence/p75": 8.478760719299316e-06, "sentence_fisher_kl_divergence/p85": 1.2069940567016602e-05, "sentence_fisher_kl_divergence/p90": 1.4603137969970703e-05, "sentence_fisher_kl_divergence/p95": 1.800060272216797e-05, "sentence_fisher_kl_divergence/p99": 3.207924237358384e-05, "sentence_fisher_kl_divergence/var": 5.717505982549831e-11, "sentence_full_gradient_variance/max_squared_error": 4232.921875, "sentence_full_gradient_variance/metric": 4232.921875, "sentence_full_gradient_variance/p75": 4232.921875, "sentence_full_gradient_variance/p90": 4232.921875, "sentence_full_gradient_variance/p95": 4232.921875, "sentence_full_gradient_variance/p99": 4232.921875, "sentence_full_update_term": 0.017320554703474045, "sentence_full_update_term/max": 0.08154296875, "sentence_full_update_term/median": 0.009033203125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.02703857421875, "sentence_full_update_term/p85": 0.0400390625, "sentence_full_update_term/p90": 0.049072265625, "sentence_full_update_term/p95": 0.0675048828125, "sentence_full_update_term/p99": 0.08015137165784836, "sentence_full_update_term/var": 0.0004600630491040647, "sentence_hessian_coeff": 78013.3359375, "sentence_hessian_coeff/max": 1556480.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -317440.0, "sentence_hessian_coeff/p25": -54912.0, "sentence_hessian_coeff/p75": 97280.0, "sentence_hessian_coeff/p99": 1198490.75, "sentence_hessian_coeff/var": 90247061504.0, "sentence_hessian_coeff_abs": 158421.34375, "sentence_hessian_coeff_abs/max": 1556480.0, "sentence_hessian_coeff_abs/median": 55296.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 183296.0, "sentence_hessian_coeff_abs/p99": 1198490.75, "sentence_hessian_coeff_abs/var": 71035707392.0, "step": 64, "token_fisher_curvature": 302382.71875, "token_fisher_curvature/max": 170917888.0, "token_fisher_curvature/median": 3.3881317890172014e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 6.914712933060466e-25, "token_fisher_curvature/p75": 2.184918912462308e-13, "token_fisher_curvature/p85": 2.546585164964199e-10, "token_fisher_curvature/p90": 4.265166353434324e-08, "token_fisher_curvature/p95": 0.0032851696014404297, "token_fisher_curvature/p99": 359064.0, "token_fisher_curvature/var": 27318540042240.0, "token_fisher_kl_divergence": 4.46197282144567e-06, "token_fisher_kl_divergence/max": 0.0025177001953125, "token_fisher_kl_divergence/median": 5.0043363674957936e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.0203290965137456e-35, "token_fisher_kl_divergence/p75": 3.2182495707141233e-24, "token_fisher_kl_divergence/p85": 3.758708703440958e-21, "token_fisher_kl_divergence/p90": 6.308277874676402e-19, "token_fisher_kl_divergence/p95": 4.8471643365743944e-14, "token_fisher_kl_divergence/p99": 5.294568836688995e-06, "token_fisher_kl_divergence/var": 5.947373704628944e-09, "token_full_update_term": 0.00023926456924527884, "token_full_update_term/max": 0.0732421875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.3676624298095703e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.475586568020162e-16, "token_full_update_term/p85": 2.2115642650533118e-13, "token_full_update_term/p90": 8.01492205937393e-12, "token_full_update_term/p95": 2.2598669602302834e-09, "token_full_update_term/p99": 0.0031076669692993164, "token_full_update_term/var": 9.106393008551095e-06, "token_hessian_coeff": 45568.83984375, "token_hessian_coeff/max": 167772160.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -22413312.0, "token_hessian_coeff/p25": -6.85686245560646e-08, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.012918233871459961, "token_hessian_coeff/var": 19970704539648.0, "token_hessian_coeff_abs": 297753.71875, "token_hessian_coeff_abs/max": 167772160.0, "token_hessian_coeff_abs/median": 6.366462912410498e-12, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.0058283805847168e-06, "token_hessian_coeff_abs/p99": 5810816.0, "token_hessian_coeff_abs/var": 19884119425024.0 }, { "accuracy_reward": 0.78125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.17269739508628845, "adam_stats/lm_head/lr_effective_max": 2.2811995222582482e-05, "adam_stats/lm_head/lr_effective_mean": -2.2558078668910575e-11, "adam_stats/lm_head/lr_effective_min": -2.2591271772398613e-05, "adam_stats/lm_head/lr_effective_std": 6.101137728364847e-07, "adam_stats/lr_effective_max": 2.7013997168978676e-05, "adam_stats/lr_effective_mean": 2.286460777656263e-11, "adam_stats/lr_effective_min": -2.7813151973532513e-05, "adam_stats/m_t_max": 0.0004202969721518457, "adam_stats/m_t_mean": 1.8639538697240443e-12, "adam_stats/m_t_min": -0.0004011717683169991, "adam_stats/v_t_max": 2.5203109544236213e-05, "adam_stats/v_t_mean": 1.7207218722808948e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.78125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.17269739508628845, "all_logprobs": -0.006991600152105093, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.75, "all_logprobs/p1": -0.10009765625, "all_logprobs/p10": -3.5762786865234375e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -1.7881393432617188e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.010310840792953968, "clip_ratio": 0.0, "completion_length": 475.60418701171875, "completion_length/correct": 396.66668701171875, "completion_length/correct/max": 1015.0, "completion_length/correct/median": 328.0, "completion_length/correct/min": 174.0, "completion_length/correct/p25": 315.0, "completion_length/correct/p75": 503.0, "completion_length/correct/var": 37974.3359375, "completion_length/incorrect": 757.5238037109375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 322.0, "completion_length/incorrect/p25": 400.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 102720.6640625, "completion_length/max": 1024.0, "completion_length/median": 368.0, "completion_length/min": 174.0, "completion_length/p25": 317.0, "completion_length/p75": 513.75, "completion_length/var": 73693.703125, "curvature_clip_ratio_token_fisher": 0.006899119354784489, "curvature_clip_ratio_token_hessian": 0.00521266832947731, "curvature_clip_ratio_total_fisher": 0.006899119354784489, "curvature_clip_ratio_total_full": 0.006899119354784489, "curvature_clip_ratio_total_hessian": 0.00521266832947731, "epoch": 0.104, "feature_vector_variance/max_squared_error": 54624.609375, "feature_vector_variance/metric": 30818.681640625, "generated_tokens/total": 3663802.0, "global_fisher_curvature": 167936.0, "global_fisher_curvature/max": 167936.0, "global_fisher_curvature/median": 167936.0, "global_fisher_curvature/min": 167936.0, "global_fisher_curvature/p25": 167936.0, "global_fisher_curvature/p75": 167936.0, "global_fisher_curvature/p85": 167936.0, "global_fisher_curvature/p90": 167936.0, "global_fisher_curvature/p95": 167936.0, "global_fisher_curvature/p99": 167936.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.250075340270996e-06, "global_fisher_kl_divergence/max": 2.250075340270996e-06, "global_fisher_kl_divergence/median": 2.250075340270996e-06, "global_fisher_kl_divergence/min": 2.250075340270996e-06, "global_fisher_kl_divergence/p25": 2.250075340270996e-06, "global_fisher_kl_divergence/p75": 2.250075340270996e-06, "global_fisher_kl_divergence/p85": 2.250075340270996e-06, "global_fisher_kl_divergence/p90": 2.250075340270996e-06, "global_fisher_kl_divergence/p95": 2.250075340270996e-06, "global_fisher_kl_divergence/p99": 2.250075340270996e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0322265625, "global_full_update_term/max": 0.0322265625, "global_full_update_term/median": 0.0322265625, "global_full_update_term/min": 0.0322265625, "global_full_update_term/p25": 0.0322265625, "global_full_update_term/p75": 0.0322265625, "global_full_update_term/p85": 0.0322265625, "global_full_update_term/p90": 0.0322265625, "global_full_update_term/p95": 0.0322265625, "global_full_update_term/p99": 0.0322265625, "global_full_update_term/var": NaN, "global_hessian_coeff": 40448.0, "global_hessian_coeff/max": 40448.0, "global_hessian_coeff/median": 40448.0, "global_hessian_coeff/min": 40448.0, "global_hessian_coeff/p25": 40448.0, "global_hessian_coeff/p75": 40448.0, "global_hessian_coeff/p99": 40448.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 40448.0, "global_hessian_coeff_abs/max": 40448.0, "global_hessian_coeff_abs/median": 40448.0, "global_hessian_coeff_abs/min": 40448.0, "global_hessian_coeff_abs/p25": 40448.0, "global_hessian_coeff_abs/p75": 40448.0, "global_hessian_coeff_abs/p99": 40448.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.0886741504073143, "learning_rate": 4.934848925057485e-06, "loss": -0.7812, "masked_global_fisher_curvature": 988.0, "masked_global_fisher_curvature/max": 988.0, "masked_global_fisher_curvature/median": 988.0, "masked_global_fisher_curvature/min": 988.0, "masked_global_fisher_curvature/p25": 988.0, "masked_global_fisher_curvature/p75": 988.0, "masked_global_fisher_curvature/p85": 988.0, "masked_global_fisher_curvature/p90": 988.0, "masked_global_fisher_curvature/p95": 988.0, "masked_global_fisher_curvature/p99": 988.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/max": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/median": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/min": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p25": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p75": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p85": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p90": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p95": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/p99": 1.3271346688270569e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0021514892578125, "masked_global_full_update_term/max": 0.0021514892578125, "masked_global_full_update_term/median": 0.0021514892578125, "masked_global_full_update_term/min": 0.0021514892578125, "masked_global_full_update_term/p25": 0.0021514892578125, "masked_global_full_update_term/p75": 0.0021514892578125, "masked_global_full_update_term/p85": 0.0021514892578125, "masked_global_full_update_term/p90": 0.0021514892578125, "masked_global_full_update_term/p95": 0.0021514892578125, "masked_global_full_update_term/p99": 0.0021514892578125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -10944.0, "masked_global_hessian_coeff/max": -10944.0, "masked_global_hessian_coeff/median": -10944.0, "masked_global_hessian_coeff/min": -10944.0, "masked_global_hessian_coeff/p25": -10944.0, "masked_global_hessian_coeff/p75": -10944.0, "masked_global_hessian_coeff/p99": -10944.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 10944.0, "masked_global_hessian_coeff_abs/max": 10944.0, "masked_global_hessian_coeff_abs/median": 10944.0, "masked_global_hessian_coeff_abs/min": 10944.0, "masked_global_hessian_coeff_abs/p25": 10944.0, "masked_global_hessian_coeff_abs/p75": 10944.0, "masked_global_hessian_coeff_abs/p99": 10944.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.2446494102478027, "masked_per_sentence_gradient_norm/max": 14.5, "masked_per_sentence_gradient_norm/median": 2.25, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.2734375, "masked_per_sentence_gradient_norm/p75": 3.1875, "masked_per_sentence_gradient_norm/var": 5.801946640014648, "masked_per_token_gradient_norm": 0.045969121158123016, "masked_per_token_gradient_norm/max": 17.5, "masked_per_token_gradient_norm/median": 1.4566126083082054e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.632158041000366e-08, "masked_per_token_gradient_norm/var": 0.40267327427864075, "masked_sentence_fisher_curvature": 1159.4111328125, "masked_sentence_fisher_curvature/max": 5632.0, "masked_sentence_fisher_curvature/median": 964.0, "masked_sentence_fisher_curvature/min": 6.40625, "masked_sentence_fisher_curvature/p25": 278.0, "masked_sentence_fisher_curvature/p75": 1672.0, "masked_sentence_fisher_curvature/p85": 2384.0, "masked_sentence_fisher_curvature/p90": 2488.0, "masked_sentence_fisher_curvature/p95": 3008.0, "masked_sentence_fisher_curvature/p99": 3458.406982421875, "masked_sentence_fisher_curvature/var": 1066829.75, "masked_sentence_fisher_kl_divergence": 1.555535789066198e-08, "masked_sentence_fisher_kl_divergence/max": 7.543712854385376e-08, "masked_sentence_fisher_kl_divergence/median": 1.2922100722789764e-08, "masked_sentence_fisher_kl_divergence/min": 8.594724931754172e-11, "masked_sentence_fisher_kl_divergence/p25": 3.725290298461914e-09, "masked_sentence_fisher_kl_divergence/p75": 2.246815711259842e-08, "masked_sentence_fisher_kl_divergence/p85": 3.189779818058014e-08, "masked_sentence_fisher_kl_divergence/p90": 3.341119736433029e-08, "masked_sentence_fisher_kl_divergence/p95": 4.0279701352119446e-08, "masked_sentence_fisher_kl_divergence/p99": 4.646144802222807e-08, "masked_sentence_fisher_kl_divergence/var": 1.916636374544979e-16, "masked_sentence_full_gradient_variance/max_squared_error": 10.309257507324219, "masked_sentence_full_gradient_variance/metric": 10.309257507324219, "masked_sentence_full_gradient_variance/p75": 10.309257507324219, "masked_sentence_full_gradient_variance/p90": 10.309257507324219, "masked_sentence_full_gradient_variance/p95": 10.309257507324219, "masked_sentence_full_gradient_variance/p99": 10.309257507324219, "masked_sentence_full_update_term": 0.0007678444380871952, "masked_sentence_full_update_term/max": 0.004241943359375, "masked_sentence_full_update_term/median": 0.00072479248046875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 9.572505950927734e-05, "masked_sentence_full_update_term/p75": 0.001064300537109375, "masked_sentence_full_update_term/p85": 0.001617431640625, "masked_sentence_full_update_term/p90": 0.001995086669921875, "masked_sentence_full_update_term/p95": 0.002044677734375, "masked_sentence_full_update_term/p99": 0.0031982455402612686, "masked_sentence_full_update_term/var": 6.564997647728887e-07, "masked_sentence_hessian_coeff": -24728.66796875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -27008.0, "masked_sentence_hessian_coeff/min": -130560.0, "masked_sentence_hessian_coeff/p25": -32128.0, "masked_sentence_hessian_coeff/p75": -6600.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 544255296.0, "masked_sentence_hessian_coeff_abs": 24728.66796875, "masked_sentence_hessian_coeff_abs/max": 130560.0, "masked_sentence_hessian_coeff_abs/median": 24704.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 6600.0, "masked_sentence_hessian_coeff_abs/p75": 32128.0, "masked_sentence_hessian_coeff_abs/p99": 79488.1640625, "masked_sentence_hessian_coeff_abs/var": 544255296.0, "masked_token_fisher_curvature": 1169.2188720703125, "masked_token_fisher_curvature/max": 733184.0, "masked_token_fisher_curvature/median": 1.2451384324638215e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.3264454728053903e-25, "masked_token_fisher_curvature/p75": 5.084821452783217e-14, "masked_token_fisher_curvature/p85": 2.751221472863108e-11, "masked_token_fisher_curvature/p90": 2.136232524208026e-09, "masked_token_fisher_curvature/p95": 8.511357009410858e-06, "masked_token_fisher_curvature/p99": 2256.0, "masked_token_fisher_curvature/var": 408760000.0, "masked_token_fisher_kl_divergence": 1.569255481115306e-08, "masked_token_fisher_kl_divergence/max": 9.834766387939453e-06, "masked_token_fisher_kl_divergence/median": 1.67632942359465e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.126814973187285e-36, "masked_token_fisher_kl_divergence/p75": 6.817777705026908e-25, "masked_token_fisher_kl_divergence/p85": 3.6892255319865034e-22, "masked_token_fisher_kl_divergence/p90": 2.875693399540601e-20, "masked_token_fisher_kl_divergence/p95": 1.1431556656144037e-16, "masked_token_fisher_kl_divergence/p99": 3.026798367500305e-08, "masked_token_fisher_kl_divergence/var": 7.360891816866541e-14, "masked_token_full_update_term": 1.0916526662185788e-05, "masked_token_full_update_term/max": 0.004119873046875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -6.407499313354492e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.1371793224034263e-15, "masked_token_full_update_term/p85": 5.115907697472721e-13, "masked_token_full_update_term/p90": 1.1823431123048067e-11, "masked_token_full_update_term/p95": 1.1925180842808913e-09, "masked_token_full_update_term/p99": 0.0001506805419921875, "masked_token_full_update_term/var": 2.3592411579897998e-08, "masked_token_hessian_coeff": -25423.646484375, "masked_token_hessian_coeff/max": 6336.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -9568256.0, "masked_token_hessian_coeff/p25": -5.252659320831299e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0068509578704833984, "masked_token_hessian_coeff/var": 126936252416.0, "masked_token_hessian_coeff_abs": 25424.046875, "masked_token_hessian_coeff_abs/max": 9568256.0, "masked_token_hessian_coeff_abs/median": 1.9736035028472543e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 4.947185516357422e-06, "masked_token_hessian_coeff_abs/p99": 233700.0, "masked_token_hessian_coeff_abs/var": 126936227840.0, "mean_logprobs": -0.00775146484375, "mean_logprobs/var": 6.031990051269531e-05, "num_completions/total": 6240, "per_sentence_gradient_norm": 31.0107421875, "per_sentence_gradient_norm/max": 225.0, "per_sentence_gradient_norm/median": 17.375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 4.34375, "per_sentence_gradient_norm/p75": 41.75, "per_sentence_gradient_norm/var": 1678.1898193359375, "per_token_feature_norm": 190.36134338378906, "per_token_feature_norm/max": 241.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 110.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 121.70597076416016, "per_token_gradient_norm": 0.7215110659599304, "per_token_gradient_norm/max": 268.0, "per_token_gradient_norm/median": 1.6200374375330284e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 4.353933036327362e-08, "per_token_gradient_norm/var": 98.62488555908203, "per_token_policy_error_norm": 0.004146744031459093, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.003784613450989127, "policy_entropy": 0.007442391011863947, "policy_entropy/max": 1.265625, "policy_entropy/median": 2.15550244320184e-10, "policy_entropy/min": 2.6999175193730823e-20, "policy_entropy/p25": 3.4638958368304884e-13, "policy_entropy/p75": 6.705522537231445e-08, "policy_entropy/var": 0.004019090905785561, "policy_loss": -0.78125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.17269739508628845, "policy_sharpness": 9.800298690795898, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.3132892847061157, "reward": 0.78125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.17269739508628845, "rewards/accuracy_reward": 0.78125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.17269739508628845, "sentence_fisher_curvature": 304033.34375, "sentence_fisher_curvature/max": 2457600.0, "sentence_fisher_curvature/median": 88064.0, "sentence_fisher_curvature/min": 35.25, "sentence_fisher_curvature/p25": 1580.0, "sentence_fisher_curvature/p75": 419840.0, "sentence_fisher_curvature/p85": 664576.0, "sentence_fisher_curvature/p90": 745472.0, "sentence_fisher_curvature/p95": 1065984.0, "sentence_fisher_curvature/p99": 1741621.5, "sentence_fisher_curvature/var": 178431016960.0, "sentence_fisher_kl_divergence": 4.0834484025253914e-06, "sentence_fisher_kl_divergence/max": 3.2901763916015625e-05, "sentence_fisher_kl_divergence/median": 1.1846423149108887e-06, "sentence_fisher_kl_divergence/min": 4.729372449219227e-10, "sentence_fisher_kl_divergence/p25": 2.1245796233415604e-08, "sentence_fisher_kl_divergence/p75": 5.62518835067749e-06, "sentence_fisher_kl_divergence/p85": 8.910894393920898e-06, "sentence_fisher_kl_divergence/p90": 1.0013580322265625e-05, "sentence_fisher_kl_divergence/p95": 1.4334917068481445e-05, "sentence_fisher_kl_divergence/p99": 2.338889316888526e-05, "sentence_fisher_kl_divergence/var": 3.215229818498422e-11, "sentence_full_gradient_variance/max_squared_error": 2575.874755859375, "sentence_full_gradient_variance/metric": 2575.874755859375, "sentence_full_gradient_variance/p75": 2575.874755859375, "sentence_full_gradient_variance/p90": 2575.874755859375, "sentence_full_gradient_variance/p95": 2575.874755859375, "sentence_full_gradient_variance/p99": 2575.874755859375, "sentence_full_update_term": 0.012625059112906456, "sentence_full_update_term/max": 0.10888671875, "sentence_full_update_term/median": 0.005889892578125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.002044677734375, "sentence_full_update_term/p75": 0.016204833984375, "sentence_full_update_term/p85": 0.02734375, "sentence_full_update_term/p90": 0.0350341796875, "sentence_full_update_term/p95": 0.04400634765625, "sentence_full_update_term/p99": 0.07595225423574448, "sentence_full_update_term/var": 0.0003247527638450265, "sentence_hessian_coeff": 49469.3359375, "sentence_hessian_coeff/max": 1261568.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -415744.0, "sentence_hessian_coeff/p25": -82944.0, "sentence_hessian_coeff/p75": 52672.0, "sentence_hessian_coeff/p99": 926925.875, "sentence_hessian_coeff/var": 70680305664.0, "sentence_hessian_coeff_abs": 155122.671875, "sentence_hessian_coeff_abs/max": 1261568.0, "sentence_hessian_coeff_abs/median": 82944.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 27328.0, "sentence_hessian_coeff_abs/p75": 178432.0, "sentence_hessian_coeff_abs/p99": 926925.875, "sentence_hessian_coeff_abs/var": 48836939776.0, "step": 65, "token_fisher_curvature": 238707.796875, "token_fisher_curvature/max": 174063616.0, "token_fisher_curvature/median": 1.4060746924421386e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.50416005753358e-25, "token_fisher_curvature/p75": 7.260858581048524e-14, "token_fisher_curvature/p85": 4.547473508864641e-11, "token_fisher_curvature/p90": 5.180481821298599e-09, "token_fisher_curvature/p95": 4.839897155761719e-05, "token_fisher_curvature/p99": 94720.0, "token_fisher_curvature/var": 20330485645312.0, "token_fisher_kl_divergence": 3.2047148579295026e-06, "token_fisher_kl_divergence/max": 0.0023345947265625, "token_fisher_kl_divergence/median": 1.8858706015439813e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.361913843351742e-36, "token_fisher_kl_divergence/p75": 9.725834546033646e-25, "token_fisher_kl_divergence/p85": 6.121136532892405e-22, "token_fisher_kl_divergence/p90": 6.945670167485263e-20, "token_fisher_kl_divergence/p95": 6.487865800153259e-16, "token_fisher_kl_divergence/p99": 1.2740492820739746e-06, "token_fisher_kl_divergence/var": 3.6632235023148496e-09, "token_full_update_term": 0.00018302777607459575, "token_full_update_term/max": 0.07080078125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -6.407499313354492e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.8449465006019636e-15, "token_full_update_term/p85": 6.998845947236987e-13, "token_full_update_term/p90": 1.7769341553730555e-11, "token_full_update_term/p95": 3.0850060284137726e-09, "token_full_update_term/p99": 0.0009918212890625, "token_full_update_term/var": 6.512173513328889e-06, "token_hessian_coeff": 20317.48828125, "token_hessian_coeff/max": 173015040.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -23330816.0, "token_hessian_coeff/p25": -6.249174475669861e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0164794921875, "token_hessian_coeff/var": 13147959197696.0, "token_hessian_coeff_abs": 227817.734375, "token_hessian_coeff_abs/max": 173015040.0, "token_hessian_coeff_abs/median": 2.319211489520967e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 5.893409252166748e-06, "token_hessian_coeff_abs/p99": 2277376.0, "token_hessian_coeff_abs/var": 13096470970368.0 }, { "accuracy_reward": 0.6666666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.224561408162117, "adam_stats/lm_head/lr_effective_max": 2.2415311832446605e-05, "adam_stats/lm_head/lr_effective_mean": -3.350112895428303e-11, "adam_stats/lm_head/lr_effective_min": -2.2926209567231126e-05, "adam_stats/lm_head/lr_effective_std": 5.706812089556479e-07, "adam_stats/lr_effective_max": 2.6624296879163012e-05, "adam_stats/lr_effective_mean": -1.6406660101678239e-12, "adam_stats/lr_effective_min": -2.566126931924373e-05, "adam_stats/m_t_max": 0.0005030918400734663, "adam_stats/m_t_mean": 6.370037743813617e-12, "adam_stats/m_t_min": -0.0006419614655897021, "adam_stats/v_t_max": 2.517816574254539e-05, "adam_stats/v_t_mean": 1.7206579043527181e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6666666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.224561408162117, "all_logprobs": -0.010000832378864288, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.75, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -2.9802322387695312e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0002613067626953125, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.015395121648907661, "clip_ratio": 0.0, "completion_length": 557.8021240234375, "completion_length/correct": 421.734375, "completion_length/correct/max": 1016.0, "completion_length/correct/median": 257.0, "completion_length/correct/min": 209.0, "completion_length/correct/p25": 245.75, "completion_length/correct/p75": 624.0, "completion_length/correct/var": 66437.0234375, "completion_length/incorrect": 829.9375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 880.0, "completion_length/incorrect/min": 387.0, "completion_length/incorrect/p25": 605.75, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 48260.05859375, "completion_length/max": 1024.0, "completion_length/median": 474.0, "completion_length/min": 209.0, "completion_length/p25": 255.5, "completion_length/p75": 880.0, "completion_length/var": 97224.875, "curvature_clip_ratio_token_fisher": 0.006480046082288027, "curvature_clip_ratio_token_hessian": 0.004201759118586779, "curvature_clip_ratio_total_fisher": 0.006480046082288027, "curvature_clip_ratio_total_full": 0.006480046082288027, "curvature_clip_ratio_total_hessian": 0.004201759118586779, "epoch": 0.1056, "feature_vector_variance/max_squared_error": 66091.46875, "feature_vector_variance/metric": 31097.46484375, "generated_tokens/total": 3717351.0, "global_fisher_curvature": 120320.0, "global_fisher_curvature/max": 120320.0, "global_fisher_curvature/median": 120320.0, "global_fisher_curvature/min": 120320.0, "global_fisher_curvature/p25": 120320.0, "global_fisher_curvature/p75": 120320.0, "global_fisher_curvature/p85": 120320.0, "global_fisher_curvature/p90": 120320.0, "global_fisher_curvature/p95": 120320.0, "global_fisher_curvature/p99": 120320.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.4677643775939941e-06, "global_fisher_kl_divergence/max": 1.4677643775939941e-06, "global_fisher_kl_divergence/median": 1.4677643775939941e-06, "global_fisher_kl_divergence/min": 1.4677643775939941e-06, "global_fisher_kl_divergence/p25": 1.4677643775939941e-06, "global_fisher_kl_divergence/p75": 1.4677643775939941e-06, "global_fisher_kl_divergence/p85": 1.4677643775939941e-06, "global_fisher_kl_divergence/p90": 1.4677643775939941e-06, "global_fisher_kl_divergence/p95": 1.4677643775939941e-06, "global_fisher_kl_divergence/p99": 1.4677643775939941e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0213623046875, "global_full_update_term/max": 0.0213623046875, "global_full_update_term/median": 0.0213623046875, "global_full_update_term/min": 0.0213623046875, "global_full_update_term/p25": 0.0213623046875, "global_full_update_term/p75": 0.0213623046875, "global_full_update_term/p85": 0.0213623046875, "global_full_update_term/p90": 0.0213623046875, "global_full_update_term/p95": 0.0213623046875, "global_full_update_term/p99": 0.0213623046875, "global_full_update_term/var": NaN, "global_hessian_coeff": 33280.0, "global_hessian_coeff/max": 33280.0, "global_hessian_coeff/median": 33280.0, "global_hessian_coeff/min": 33280.0, "global_hessian_coeff/p25": 33280.0, "global_hessian_coeff/p75": 33280.0, "global_hessian_coeff/p99": 33280.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 33280.0, "global_hessian_coeff_abs/max": 33280.0, "global_hessian_coeff_abs/median": 33280.0, "global_hessian_coeff_abs/min": 33280.0, "global_hessian_coeff_abs/p25": 33280.0, "global_hessian_coeff_abs/p75": 33280.0, "global_hessian_coeff_abs/p99": 33280.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.09359110891819, "learning_rate": 4.6904505493806595e-06, "loss": -0.6667, "masked_global_fisher_curvature": 860.0, "masked_global_fisher_curvature/max": 860.0, "masked_global_fisher_curvature/median": 860.0, "masked_global_fisher_curvature/min": 860.0, "masked_global_fisher_curvature/p25": 860.0, "masked_global_fisher_curvature/p75": 860.0, "masked_global_fisher_curvature/p85": 860.0, "masked_global_fisher_curvature/p90": 860.0, "masked_global_fisher_curvature/p95": 860.0, "masked_global_fisher_curvature/p99": 860.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/max": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/median": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/min": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/p25": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/p75": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/p85": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/p90": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/p95": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/p99": 1.0477378964424133e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00170135498046875, "masked_global_full_update_term/max": 0.00170135498046875, "masked_global_full_update_term/median": 0.00170135498046875, "masked_global_full_update_term/min": 0.00170135498046875, "masked_global_full_update_term/p25": 0.00170135498046875, "masked_global_full_update_term/p75": 0.00170135498046875, "masked_global_full_update_term/p85": 0.00170135498046875, "masked_global_full_update_term/p90": 0.00170135498046875, "masked_global_full_update_term/p95": 0.00170135498046875, "masked_global_full_update_term/p99": 0.00170135498046875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -5632.0, "masked_global_hessian_coeff/max": -5632.0, "masked_global_hessian_coeff/median": -5632.0, "masked_global_hessian_coeff/min": -5632.0, "masked_global_hessian_coeff/p25": -5632.0, "masked_global_hessian_coeff/p75": -5632.0, "masked_global_hessian_coeff/p99": -5632.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 5632.0, "masked_global_hessian_coeff_abs/max": 5632.0, "masked_global_hessian_coeff_abs/median": 5632.0, "masked_global_hessian_coeff_abs/min": 5632.0, "masked_global_hessian_coeff_abs/p25": 5632.0, "masked_global_hessian_coeff_abs/p75": 5632.0, "masked_global_hessian_coeff_abs/p99": 5632.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 1.6985270977020264, "masked_per_sentence_gradient_norm/max": 9.1875, "masked_per_sentence_gradient_norm/median": 0.8203125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.015625, "masked_per_sentence_gradient_norm/var": 4.428566932678223, "masked_per_token_gradient_norm": 0.04721270874142647, "masked_per_token_gradient_norm/max": 18.25, "masked_per_token_gradient_norm/median": 5.692061405548898e-18, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.4013494364917278e-08, "masked_per_token_gradient_norm/var": 0.4134639799594879, "masked_sentence_fisher_curvature": 1142.90234375, "masked_sentence_fisher_curvature/max": 4160.0, "masked_sentence_fisher_curvature/median": 848.0, "masked_sentence_fisher_curvature/min": 11.75, "masked_sentence_fisher_curvature/p25": 397.0, "masked_sentence_fisher_curvature/p75": 1608.0, "masked_sentence_fisher_curvature/p85": 2270.0, "masked_sentence_fisher_curvature/p90": 2752.0, "masked_sentence_fisher_curvature/p95": 3296.0, "masked_sentence_fisher_curvature/p99": 3491.2021484375, "masked_sentence_fisher_curvature/var": 1003118.625, "masked_sentence_fisher_kl_divergence": 1.3913393104303395e-08, "masked_sentence_fisher_kl_divergence/max": 5.075708031654358e-08, "masked_sentence_fisher_kl_divergence/median": 1.0302755981683731e-08, "masked_sentence_fisher_kl_divergence/min": 1.4279066817834973e-10, "masked_sentence_fisher_kl_divergence/p25": 4.823959898203611e-09, "masked_sentence_fisher_kl_divergence/p75": 1.9615981727838516e-08, "masked_sentence_fisher_kl_divergence/p85": 2.764863893389702e-08, "masked_sentence_fisher_kl_divergence/p90": 3.3527612686157227e-08, "masked_sentence_fisher_kl_divergence/p95": 4.0046870708465576e-08, "masked_sentence_fisher_kl_divergence/p99": 4.2573109482191285e-08, "masked_sentence_fisher_kl_divergence/var": 1.4861325408474708e-16, "masked_sentence_full_gradient_variance/max_squared_error": 7.028172969818115, "masked_sentence_full_gradient_variance/metric": 7.028172969818115, "masked_sentence_full_gradient_variance/p75": 7.028172969818115, "masked_sentence_full_gradient_variance/p90": 7.028172969818115, "masked_sentence_full_gradient_variance/p95": 7.028172969818115, "masked_sentence_full_gradient_variance/p99": 7.028172969818115, "masked_sentence_full_update_term": 0.0006371091003529727, "masked_sentence_full_update_term/max": 0.0037841796875, "masked_sentence_full_update_term/median": 0.000209808349609375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.001056671142578125, "masked_sentence_full_update_term/p85": 0.0015506744384765625, "masked_sentence_full_update_term/p90": 0.00193023681640625, "masked_sentence_full_update_term/p95": 0.002170562744140625, "masked_sentence_full_update_term/p99": 0.0028854398988187313, "masked_sentence_full_update_term/var": 6.93590095579566e-07, "masked_sentence_hessian_coeff": -23026.833984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -15616.0, "masked_sentence_hessian_coeff/min": -79360.0, "masked_sentence_hessian_coeff/p25": -46656.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 563360896.0, "masked_sentence_hessian_coeff_abs": 23026.833984375, "masked_sentence_hessian_coeff_abs/max": 79360.0, "masked_sentence_hessian_coeff_abs/median": 15616.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 46656.0, "masked_sentence_hessian_coeff_abs/p99": 72064.0234375, "masked_sentence_hessian_coeff_abs/var": 563360896.0, "masked_token_fisher_curvature": 1380.0562744140625, "masked_token_fisher_curvature/max": 745472.0, "masked_token_fisher_curvature/median": 8.239936510889834e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.0531850015098356e-23, "masked_token_fisher_curvature/p75": 1.7923440509548527e-12, "masked_token_fisher_curvature/p85": 1.0477378964424133e-09, "masked_token_fisher_curvature/p90": 1.5161640476435423e-07, "masked_token_fisher_curvature/p95": 0.001007080078125, "masked_token_fisher_curvature/p99": 9598.5, "masked_token_fisher_curvature/var": 462769600.0, "masked_token_fisher_kl_divergence": 1.6801122271203894e-08, "masked_token_fisher_kl_divergence/max": 9.059906005859375e-06, "masked_token_fisher_kl_divergence/median": 1.001853349630685e-28, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.935195482492292e-34, "masked_token_fisher_kl_divergence/p75": 2.1842738050228387e-23, "masked_token_fisher_kl_divergence/p85": 1.2758433768017899e-20, "masked_token_fisher_kl_divergence/p90": 1.8417672646860693e-18, "masked_token_fisher_kl_divergence/p95": 1.226796442210798e-14, "masked_token_fisher_kl_divergence/p99": 1.168627932202071e-07, "masked_token_fisher_kl_divergence/var": 6.856564334313184e-14, "masked_token_full_update_term": 1.0919007763732225e-05, "masked_token_full_update_term/max": 0.003997802734375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -7.972121238708496e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 8.291978215169138e-16, "masked_token_full_update_term/p85": 3.375077994860476e-13, "masked_token_full_update_term/p90": 9.038103598868474e-12, "masked_token_full_update_term/p95": 1.4551915228366852e-09, "masked_token_full_update_term/p99": 3.0159950256347656e-05, "masked_token_full_update_term/var": 2.2867215676569685e-08, "masked_token_hessian_coeff": -27304.2265625, "masked_token_hessian_coeff/max": 812.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -9830400.0, "masked_token_hessian_coeff/p25": -1.9185245037078857e-07, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.003935456275939941, "masked_token_hessian_coeff/var": 143761113088.0, "masked_token_hessian_coeff_abs": 27304.529296875, "masked_token_hessian_coeff_abs/max": 9830400.0, "masked_token_hessian_coeff_abs/median": 1.0755285551056204e-16, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.773238182067871e-06, "masked_token_hessian_coeff_abs/p99": 41726.0, "masked_token_hessian_coeff_abs/var": 143761096704.0, "mean_logprobs": -0.00921630859375, "mean_logprobs/var": 5.793571472167969e-05, "num_completions/total": 6336, "per_sentence_gradient_norm": 20.602214813232422, "per_sentence_gradient_norm/max": 92.5, "per_sentence_gradient_norm/median": 10.625, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 30.6875, "per_sentence_gradient_norm/var": 638.1104736328125, "per_token_feature_norm": 189.7244110107422, "per_token_feature_norm/max": 270.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 106.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 161.07418823242188, "per_token_gradient_norm": 0.6286636590957642, "per_token_gradient_norm/max": 292.0, "per_token_gradient_norm/median": 6.722053469410127e-17, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.618172973394394e-08, "per_token_gradient_norm/var": 86.51280975341797, "per_token_policy_error_norm": 0.005677299108356237, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0055364700965583324, "policy_entropy": 0.010879834182560444, "policy_entropy/max": 2.03125, "policy_entropy/median": 1.6661942936480045e-09, "policy_entropy/min": 3.163138662402778e-21, "policy_entropy/p25": 4.604316927725449e-12, "policy_entropy/p75": 3.0547380447387695e-07, "policy_entropy/var": 0.006315827835351229, "policy_loss": -0.6666666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.224561408162117, "policy_sharpness": 9.70305061340332, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8938816785812378, "reward": 0.6666666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.224561408162117, "rewards/accuracy_reward": 0.6666666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.224561408162117, "sentence_fisher_curvature": 245381.75, "sentence_fisher_curvature/max": 2129920.0, "sentence_fisher_curvature/median": 97792.0, "sentence_fisher_curvature/min": 92.5, "sentence_fisher_curvature/p25": 2184.0, "sentence_fisher_curvature/p75": 378368.0, "sentence_fisher_curvature/p85": 559104.0, "sentence_fisher_curvature/p90": 679936.0, "sentence_fisher_curvature/p95": 832512.0, "sentence_fisher_curvature/p99": 1367247.25, "sentence_fisher_curvature/var": 117289558016.0, "sentence_fisher_kl_divergence": 2.989894255733816e-06, "sentence_fisher_kl_divergence/max": 2.5987625122070312e-05, "sentence_fisher_kl_divergence/median": 1.1920928955078125e-06, "sentence_fisher_kl_divergence/min": 1.127773430198431e-09, "sentence_fisher_kl_divergence/p25": 2.657179720699787e-08, "sentence_fisher_kl_divergence/p75": 4.597008228302002e-06, "sentence_fisher_kl_divergence/p85": 6.809830665588379e-06, "sentence_fisher_kl_divergence/p90": 8.285045623779297e-06, "sentence_fisher_kl_divergence/p95": 1.0132789611816406e-05, "sentence_fisher_kl_divergence/p99": 1.670125129749067e-05, "sentence_fisher_kl_divergence/var": 1.743580800572797e-11, "sentence_full_gradient_variance/max_squared_error": 1037.6646728515625, "sentence_full_gradient_variance/metric": 1037.6646728515625, "sentence_full_gradient_variance/p75": 1037.6646728515625, "sentence_full_gradient_variance/p90": 1037.6646728515625, "sentence_full_gradient_variance/p95": 1037.6646728515625, "sentence_full_gradient_variance/p99": 1037.6646728515625, "sentence_full_update_term": 0.008314928039908409, "sentence_full_update_term/max": 0.058349609375, "sentence_full_update_term/median": 0.005615234375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.01025390625, "sentence_full_update_term/p85": 0.0167236328125, "sentence_full_update_term/p90": 0.0235595703125, "sentence_full_update_term/p95": 0.030731201171875, "sentence_full_update_term/p99": 0.05579834803938866, "sentence_full_update_term/var": 0.00014223447942640632, "sentence_hessian_coeff": 36070.3359375, "sentence_hessian_coeff/max": 1449984.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -313344.0, "sentence_hessian_coeff/p25": -95744.0, "sentence_hessian_coeff/p75": 80896.0, "sentence_hessian_coeff/p99": 691202.4375, "sentence_hessian_coeff/var": 62026047488.0, "sentence_hessian_coeff_abs": 144696.34375, "sentence_hessian_coeff_abs/max": 1449984.0, "sentence_hessian_coeff_abs/median": 80896.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 198656.0, "sentence_hessian_coeff_abs/p99": 691202.4375, "sentence_hessian_coeff_abs/var": 42183389184.0, "step": 66, "token_fisher_curvature": 215422.578125, "token_fisher_curvature/max": 175112192.0, "token_fisher_curvature/median": 9.812029660993815e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 4.425416277158698e-23, "token_fisher_curvature/p75": 2.3590018827235326e-12, "token_fisher_curvature/p85": 1.622538547962904e-09, "token_fisher_curvature/p90": 3.0547380447387695e-07, "token_fisher_curvature/p95": 0.00689697265625, "token_fisher_curvature/p99": 122368.0, "token_fisher_curvature/var": 20285122150400.0, "token_fisher_kl_divergence": 2.6235006771457847e-06, "token_fisher_kl_divergence/max": 0.00213623046875, "token_fisher_kl_divergence/median": 1.1911799668837278e-28, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.38658531320805e-34, "token_fisher_kl_divergence/p75": 2.874452628621771e-23, "token_fisher_kl_divergence/p85": 1.979939514206927e-20, "token_fisher_kl_divergence/p90": 3.713392440762853e-18, "token_fisher_kl_divergence/p95": 8.393286066166183e-14, "token_fisher_kl_divergence/p99": 1.4901161193847656e-06, "token_fisher_kl_divergence/var": 3.0078899371233092e-09, "token_full_update_term": 0.00015414165682159364, "token_full_update_term/max": 0.0673828125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -7.972121238708496e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.186550857568136e-15, "token_full_update_term/p85": 5.009326287108706e-13, "token_full_update_term/p90": 1.3812950783176348e-11, "token_full_update_term/p95": 2.9831426218152046e-09, "token_full_update_term/p99": 0.0011712610721588135, "token_full_update_term/var": 5.334116394806188e-06, "token_hessian_coeff": 21145.06640625, "token_hessian_coeff/max": 174063616.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -23068672.0, "token_hessian_coeff/p25": -2.2910535335540771e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.007692813873291016, "token_hessian_coeff/var": 14220035883008.0, "token_hessian_coeff_abs": 223693.96875, "token_hessian_coeff_abs/max": 174063616.0, "token_hessian_coeff_abs/median": 1.762479051592436e-15, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.2351741790771484e-06, "token_hessian_coeff_abs/p99": 2949120.0, "token_hessian_coeff_abs/var": 14170445578240.0 }, { "accuracy_reward": 0.65625, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.22796054184436798, "adam_stats/lm_head/lr_effective_max": 2.0956234948243946e-05, "adam_stats/lm_head/lr_effective_mean": -3.917296001465509e-11, "adam_stats/lm_head/lr_effective_min": -2.140074320777785e-05, "adam_stats/lm_head/lr_effective_std": 5.199901806918206e-07, "adam_stats/lr_effective_max": 2.268754360557068e-05, "adam_stats/lr_effective_mean": 2.387310835127998e-11, "adam_stats/lr_effective_min": -2.275797669426538e-05, "adam_stats/m_t_max": 0.0006289125885814428, "adam_stats/m_t_mean": 6.5219743668187835e-12, "adam_stats/m_t_min": -0.0009210880962200463, "adam_stats/v_t_max": 2.5153049136861227e-05, "adam_stats/v_t_mean": 1.7203342700042312e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.65625, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.22796054184436798, "all_logprobs": -0.007919148541986942, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -1.0728836059570312e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -3.170967102050781e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.011179754510521889, "clip_ratio": 0.0, "completion_length": 438.6145935058594, "completion_length/correct": 408.3174743652344, "completion_length/correct/max": 803.0, "completion_length/correct/median": 385.0, "completion_length/correct/min": 233.0, "completion_length/correct/p25": 299.0, "completion_length/correct/p75": 448.0, "completion_length/correct/var": 26414.1875, "completion_length/incorrect": 496.4545593261719, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 467.0, "completion_length/incorrect/min": 233.0, "completion_length/incorrect/p25": 356.0, "completion_length/incorrect/p75": 621.0, "completion_length/incorrect/var": 47858.0703125, "completion_length/max": 1024.0, "completion_length/median": 399.0, "completion_length/min": 233.0, "completion_length/p25": 311.0, "completion_length/p75": 502.5, "completion_length/var": 35130.17578125, "curvature_clip_ratio_token_fisher": 0.006008502095937729, "curvature_clip_ratio_token_hessian": 0.004061082378029823, "curvature_clip_ratio_total_fisher": 0.006008502095937729, "curvature_clip_ratio_total_full": 0.006008502095937729, "curvature_clip_ratio_total_hessian": 0.004061082378029823, "epoch": 0.1072, "feature_vector_variance/max_squared_error": 61024.1953125, "feature_vector_variance/metric": 30908.283203125, "generated_tokens/total": 3759458.0, "global_fisher_curvature": 100352.0, "global_fisher_curvature/max": 100352.0, "global_fisher_curvature/median": 100352.0, "global_fisher_curvature/min": 100352.0, "global_fisher_curvature/p25": 100352.0, "global_fisher_curvature/p75": 100352.0, "global_fisher_curvature/p85": 100352.0, "global_fisher_curvature/p90": 100352.0, "global_fisher_curvature/p95": 100352.0, "global_fisher_curvature/p99": 100352.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.1026859283447266e-06, "global_fisher_kl_divergence/max": 1.1026859283447266e-06, "global_fisher_kl_divergence/median": 1.1026859283447266e-06, "global_fisher_kl_divergence/min": 1.1026859283447266e-06, "global_fisher_kl_divergence/p25": 1.1026859283447266e-06, "global_fisher_kl_divergence/p75": 1.1026859283447266e-06, "global_fisher_kl_divergence/p85": 1.1026859283447266e-06, "global_fisher_kl_divergence/p90": 1.1026859283447266e-06, "global_fisher_kl_divergence/p95": 1.1026859283447266e-06, "global_fisher_kl_divergence/p99": 1.1026859283447266e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0203857421875, "global_full_update_term/max": 0.0203857421875, "global_full_update_term/median": 0.0203857421875, "global_full_update_term/min": 0.0203857421875, "global_full_update_term/p25": 0.0203857421875, "global_full_update_term/p75": 0.0203857421875, "global_full_update_term/p85": 0.0203857421875, "global_full_update_term/p90": 0.0203857421875, "global_full_update_term/p95": 0.0203857421875, "global_full_update_term/p99": 0.0203857421875, "global_full_update_term/var": NaN, "global_hessian_coeff": 16064.0, "global_hessian_coeff/max": 16064.0, "global_hessian_coeff/median": 16064.0, "global_hessian_coeff/min": 16064.0, "global_hessian_coeff/p25": 16064.0, "global_hessian_coeff/p75": 16064.0, "global_hessian_coeff/p99": 16064.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 16064.0, "global_hessian_coeff_abs/max": 16064.0, "global_hessian_coeff_abs/median": 16064.0, "global_hessian_coeff_abs/min": 16064.0, "global_hessian_coeff_abs/p25": 16064.0, "global_hessian_coeff_abs/p75": 16064.0, "global_hessian_coeff_abs/p99": 16064.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.08945665508508682, "learning_rate": 4.4494751769315e-06, "loss": -0.6562, "masked_global_fisher_curvature": 1152.0, "masked_global_fisher_curvature/max": 1152.0, "masked_global_fisher_curvature/median": 1152.0, "masked_global_fisher_curvature/min": 1152.0, "masked_global_fisher_curvature/p25": 1152.0, "masked_global_fisher_curvature/p75": 1152.0, "masked_global_fisher_curvature/p85": 1152.0, "masked_global_fisher_curvature/p90": 1152.0, "masked_global_fisher_curvature/p95": 1152.0, "masked_global_fisher_curvature/p99": 1152.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/max": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/median": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/min": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/p25": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/p75": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/p85": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/p90": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/p95": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/p99": 1.2689270079135895e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.001983642578125, "masked_global_full_update_term/max": 0.001983642578125, "masked_global_full_update_term/median": 0.001983642578125, "masked_global_full_update_term/min": 0.001983642578125, "masked_global_full_update_term/p25": 0.001983642578125, "masked_global_full_update_term/p75": 0.001983642578125, "masked_global_full_update_term/p85": 0.001983642578125, "masked_global_full_update_term/p90": 0.001983642578125, "masked_global_full_update_term/p95": 0.001983642578125, "masked_global_full_update_term/p99": 0.001983642578125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -7776.0, "masked_global_hessian_coeff/max": -7776.0, "masked_global_hessian_coeff/median": -7776.0, "masked_global_hessian_coeff/min": -7776.0, "masked_global_hessian_coeff/p25": -7776.0, "masked_global_hessian_coeff/p75": -7776.0, "masked_global_hessian_coeff/p99": -7776.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 7776.0, "masked_global_hessian_coeff_abs/max": 7776.0, "masked_global_hessian_coeff_abs/median": 7776.0, "masked_global_hessian_coeff_abs/min": 7776.0, "masked_global_hessian_coeff_abs/p25": 7776.0, "masked_global_hessian_coeff_abs/p75": 7776.0, "masked_global_hessian_coeff_abs/p99": 7776.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.385498046875, "masked_per_sentence_gradient_norm/max": 9.0625, "masked_per_sentence_gradient_norm/median": 1.2109375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 4.59375, "masked_per_sentence_gradient_norm/var": 5.734270095825195, "masked_per_token_gradient_norm": 0.03849925473332405, "masked_per_token_gradient_norm/max": 19.125, "masked_per_token_gradient_norm/median": 1.5987211554602254e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.3431417755782604e-08, "masked_per_token_gradient_norm/var": 0.3594989478588104, "masked_sentence_fisher_curvature": 1233.6693115234375, "masked_sentence_fisher_curvature/max": 3632.0, "masked_sentence_fisher_curvature/median": 968.0, "masked_sentence_fisher_curvature/min": 38.25, "masked_sentence_fisher_curvature/p25": 381.5, "masked_sentence_fisher_curvature/p75": 1912.0, "masked_sentence_fisher_curvature/p85": 2112.0, "masked_sentence_fisher_curvature/p90": 2496.0, "masked_sentence_fisher_curvature/p95": 3632.0, "masked_sentence_fisher_curvature/p99": 3632.0, "masked_sentence_fisher_curvature/var": 1013624.0625, "masked_sentence_fisher_kl_divergence": 1.358097279080539e-08, "masked_sentence_fisher_kl_divergence/max": 4.0046870708465576e-08, "masked_sentence_fisher_kl_divergence/median": 1.0652001947164536e-08, "masked_sentence_fisher_kl_divergence/min": 4.2018655221909285e-10, "masked_sentence_fisher_kl_divergence/p25": 4.20550350099802e-09, "masked_sentence_fisher_kl_divergence/p75": 2.10711732506752e-08, "masked_sentence_fisher_kl_divergence/p85": 2.3283064365386963e-08, "masked_sentence_fisher_kl_divergence/p90": 2.7474015951156616e-08, "masked_sentence_fisher_kl_divergence/p95": 4.0046870708465576e-08, "masked_sentence_fisher_kl_divergence/p99": 4.0046870708465576e-08, "masked_sentence_fisher_kl_divergence/var": 1.231184755797889e-16, "masked_sentence_full_gradient_variance/max_squared_error": 10.755765914916992, "masked_sentence_full_gradient_variance/metric": 10.755765914916992, "masked_sentence_full_gradient_variance/p75": 10.755765914916992, "masked_sentence_full_gradient_variance/p90": 10.755765914916992, "masked_sentence_full_gradient_variance/p95": 10.755765914916992, "masked_sentence_full_gradient_variance/p99": 10.755765914916992, "masked_sentence_full_update_term": 0.0006761848926544189, "masked_sentence_full_update_term/max": 0.00262451171875, "masked_sentence_full_update_term/median": 0.000396728515625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00110626220703125, "masked_sentence_full_update_term/p85": 0.00125885009765625, "masked_sentence_full_update_term/p90": 0.001861572265625, "masked_sentence_full_update_term/p95": 0.002155303955078125, "masked_sentence_full_update_term/p99": 0.00262451171875, "masked_sentence_full_update_term/var": 5.385384156397777e-07, "masked_sentence_hessian_coeff": -21823.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -10944.0, "masked_sentence_hessian_coeff/min": -78336.0, "masked_sentence_hessian_coeff/p25": -34048.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 580255808.0, "masked_sentence_hessian_coeff_abs": 21823.0, "masked_sentence_hessian_coeff_abs/max": 78336.0, "masked_sentence_hessian_coeff_abs/median": 10944.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 34048.0, "masked_sentence_hessian_coeff_abs/p99": 78336.0, "masked_sentence_hessian_coeff_abs/var": 580255808.0, "masked_token_fisher_curvature": 1239.565185546875, "masked_token_fisher_curvature/max": 831488.0, "masked_token_fisher_curvature/median": 2.3310346708438345e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 5.118180040171859e-24, "masked_token_fisher_curvature/p75": 4.298783551348606e-13, "masked_token_fisher_curvature/p85": 2.3646862246096134e-10, "masked_token_fisher_curvature/p90": 2.561137080192566e-08, "masked_token_fisher_curvature/p95": 5.7386234402656555e-05, "masked_token_fisher_curvature/p99": 2663.5, "masked_token_fisher_curvature/var": 464355296.0, "masked_token_fisher_kl_divergence": 1.3631936468527783e-08, "masked_token_fisher_kl_divergence/max": 9.119510650634766e-06, "masked_token_fisher_kl_divergence/median": 2.5637979419682884e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 5.64237288394698e-35, "masked_token_fisher_kl_divergence/p75": 4.730439128037627e-24, "masked_token_fisher_kl_divergence/p85": 2.6072732907671432e-21, "masked_token_fisher_kl_divergence/p90": 2.812149384884277e-19, "masked_token_fisher_kl_divergence/p95": 6.303822481373844e-16, "masked_token_fisher_kl_divergence/p99": 2.9274815460667014e-08, "masked_token_fisher_kl_divergence/var": 5.613595661447049e-14, "masked_token_full_update_term": 8.567989425500855e-06, "masked_token_full_update_term/max": 0.004180908203125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.76837158203125e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 6.548581121812447e-16, "masked_token_full_update_term/p85": 1.6786572132332367e-13, "masked_token_full_update_term/p90": 3.552713678800501e-12, "masked_token_full_update_term/p95": 4.656612873077393e-10, "masked_token_full_update_term/p99": 1.7523765563964844e-05, "masked_token_full_update_term/var": 1.8594631612245394e-08, "masked_token_hessian_coeff": -22633.162109375, "masked_token_hessian_coeff/max": 820.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -9895936.0, "masked_token_hessian_coeff/p25": -4.805624485015869e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0010635852813720703, "masked_token_hessian_coeff/var": 129451261952.0, "masked_token_hessian_coeff_abs": 22633.36328125, "masked_token_hessian_coeff_abs/max": 9895936.0, "masked_token_hessian_coeff_abs/median": 1.978150976356119e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.384185791015625e-06, "masked_token_hessian_coeff_abs/p99": 21376.0, "masked_token_hessian_coeff_abs/var": 129451245568.0, "mean_logprobs": -0.007110595703125, "mean_logprobs/var": 4.029273986816406e-05, "num_completions/total": 6432, "per_sentence_gradient_norm": 24.13134765625, "per_sentence_gradient_norm/max": 130.0, "per_sentence_gradient_norm/median": 8.1875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 40.4375, "per_sentence_gradient_norm/var": 888.2227172851562, "per_token_feature_norm": 189.92919921875, "per_token_feature_norm/max": 255.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 108.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 129.781005859375, "per_token_gradient_norm": 0.5974000096321106, "per_token_gradient_norm/max": 278.0, "per_token_gradient_norm/median": 1.9806378759312793e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.525040715932846e-08, "per_token_gradient_norm/var": 81.0744857788086, "per_token_policy_error_norm": 0.00470425421372056, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004590031690895557, "policy_entropy": 0.008360068313777447, "policy_entropy/max": 3.09375, "policy_entropy/median": 8.512870408594608e-10, "policy_entropy/min": 3.705769144237564e-20, "policy_entropy/p25": 1.3571366253017914e-12, "policy_entropy/p75": 1.6111880540847778e-07, "policy_entropy/var": 0.004639643710106611, "policy_loss": -0.65625, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.22796054184436798, "policy_sharpness": 9.767040252685547, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.5156399011611938, "reward": 0.65625, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.22796054184436798, "rewards/accuracy_reward": 0.65625, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.22796054184436798, "sentence_fisher_curvature": 179982.71875, "sentence_fisher_curvature/max": 872448.0, "sentence_fisher_curvature/median": 55296.0, "sentence_fisher_curvature/min": 67.0, "sentence_fisher_curvature/p25": 2172.0, "sentence_fisher_curvature/p75": 282624.0, "sentence_fisher_curvature/p85": 443904.0, "sentence_fisher_curvature/p90": 604160.0, "sentence_fisher_curvature/p95": 721920.0, "sentence_fisher_curvature/p99": 864665.625, "sentence_fisher_curvature/var": 60479946752.0, "sentence_fisher_kl_divergence": 1.979945864150068e-06, "sentence_fisher_kl_divergence/max": 9.59634780883789e-06, "sentence_fisher_kl_divergence/median": 6.07222318649292e-07, "sentence_fisher_kl_divergence/min": 7.385096978396177e-10, "sentence_fisher_kl_divergence/p25": 2.389424480497837e-08, "sentence_fisher_kl_divergence/p75": 3.11434268951416e-06, "sentence_fisher_kl_divergence/p85": 4.872679710388184e-06, "sentence_fisher_kl_divergence/p90": 6.645917892456055e-06, "sentence_fisher_kl_divergence/p95": 7.9423189163208e-06, "sentence_fisher_kl_divergence/p99": 9.53972357820021e-06, "sentence_fisher_kl_divergence/var": 7.324043281575765e-12, "sentence_full_gradient_variance/max_squared_error": 1412.2923583984375, "sentence_full_gradient_variance/metric": 1412.2923583984375, "sentence_full_gradient_variance/p75": 1412.2923583984375, "sentence_full_gradient_variance/p90": 1412.2923583984375, "sentence_full_gradient_variance/p95": 1412.2923583984375, "sentence_full_gradient_variance/p99": 1412.2923583984375, "sentence_full_update_term": 0.007896065711975098, "sentence_full_update_term/max": 0.044189453125, "sentence_full_update_term/median": 0.0027618408203125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0134735107421875, "sentence_full_update_term/p85": 0.019775390625, "sentence_full_update_term/p90": 0.0203857421875, "sentence_full_update_term/p95": 0.0255126953125, "sentence_full_update_term/p99": 0.033520542085170746, "sentence_full_update_term/var": 9.423850860912353e-05, "sentence_hessian_coeff": 14239.0, "sentence_hessian_coeff/max": 561152.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -292864.0, "sentence_hessian_coeff/p25": -78336.0, "sentence_hessian_coeff/p75": 33792.0, "sentence_hessian_coeff/p99": 498893.0, "sentence_hessian_coeff/var": 25633605632.0, "sentence_hessian_coeff_abs": 99765.671875, "sentence_hessian_coeff_abs/max": 561152.0, "sentence_hessian_coeff_abs/median": 78336.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 125184.0, "sentence_hessian_coeff_abs/p99": 498893.0, "sentence_hessian_coeff_abs/var": 15780528128.0, "step": 67, "token_fisher_curvature": 202614.8125, "token_fisher_curvature/max": 166723584.0, "token_fisher_curvature/median": 2.778268066994105e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.5059209523060905e-24, "token_fisher_curvature/p75": 5.186961971048731e-13, "token_fisher_curvature/p85": 3.54702933691442e-10, "token_fisher_curvature/p90": 5.2386894822120667e-08, "token_fisher_curvature/p95": 0.00028571486473083496, "token_fisher_curvature/p99": 83968.0, "token_fisher_curvature/var": 16851223445504.0, "token_fisher_kl_divergence": 2.229330448244582e-06, "token_fisher_kl_divergence/max": 0.0018310546875, "token_fisher_kl_divergence/median": 3.056836007731421e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 6.056146895436425e-35, "token_fisher_kl_divergence/p75": 5.7127161054443474e-24, "token_fisher_kl_divergence/p85": 3.891057601449442e-21, "token_fisher_kl_divergence/p90": 5.759824041329242e-19, "token_fisher_kl_divergence/p95": 3.139090549997281e-15, "token_fisher_kl_divergence/p99": 9.238719940185547e-07, "token_fisher_kl_divergence/var": 2.0399659739211984e-09, "token_full_update_term": 0.00013936804316472262, "token_full_update_term/max": 0.062255859375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.76837158203125e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 7.875644580934704e-16, "token_full_update_term/p85": 2.1405099914773018e-13, "token_full_update_term/p90": 5.155653681754302e-12, "token_full_update_term/p95": 1.1350493878126144e-09, "token_full_update_term/p99": 0.000278472900390625, "token_full_update_term/var": 4.493505912250839e-06, "token_hessian_coeff": 12875.490234375, "token_hessian_coeff/max": 162529280.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -23330816.0, "token_hessian_coeff/p25": -5.401670932769775e-07, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.0024064183235168457, "token_hessian_coeff/var": 10331585773568.0, "token_hessian_coeff_abs": 184963.828125, "token_hessian_coeff_abs/max": 162529280.0, "token_hessian_coeff_abs/median": 2.2396307031158358e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.779066562652588e-06, "token_hessian_coeff_abs/p99": 806432.0, "token_hessian_coeff_abs/var": 10297539559424.0 }, { "accuracy_reward": 0.8333333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.14035087823867798, "adam_stats/lm_head/lr_effective_max": 2.036354453593958e-05, "adam_stats/lm_head/lr_effective_mean": -6.695905635512034e-11, "adam_stats/lm_head/lr_effective_min": -2.1073392417747527e-05, "adam_stats/lm_head/lr_effective_std": 5.023206881560327e-07, "adam_stats/lr_effective_max": 2.2697637177770957e-05, "adam_stats/lr_effective_mean": -2.7887870832077333e-11, "adam_stats/lr_effective_min": -2.3045698981150053e-05, "adam_stats/m_t_max": 0.0008078442770056427, "adam_stats/m_t_mean": 4.371784618345531e-12, "adam_stats/m_t_min": -0.0008344958769157529, "adam_stats/v_t_max": 2.513252547942102e-05, "adam_stats/v_t_mean": 1.7220386358193784e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8333333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.14035087823867798, "all_logprobs": -0.006547960918396711, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -4.03125, "all_logprobs/p1": -0.09583985805511475, "all_logprobs/p10": -5.960464477539062e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -2.753734588623047e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.008173489011824131, "clip_ratio": 0.0, "completion_length": 436.3958435058594, "completion_length/correct": 417.63751220703125, "completion_length/correct/max": 904.0, "completion_length/correct/median": 357.0, "completion_length/correct/min": 245.0, "completion_length/correct/p25": 305.0, "completion_length/correct/p75": 500.0, "completion_length/correct/var": 27947.98046875, "completion_length/incorrect": 530.1875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 260.0, "completion_length/incorrect/min": 248.0, "completion_length/incorrect/p25": 248.0, "completion_length/incorrect/p75": 769.0, "completion_length/incorrect/var": 92412.15625, "completion_length/max": 1024.0, "completion_length/median": 357.0, "completion_length/min": 245.0, "completion_length/p25": 295.0, "completion_length/p75": 534.0, "completion_length/var": 39610.23828125, "curvature_clip_ratio_token_fisher": 0.007638325449079275, "curvature_clip_ratio_token_hessian": 0.004463646560907364, "curvature_clip_ratio_total_fisher": 0.007638325449079275, "curvature_clip_ratio_total_full": 0.007638325449079275, "curvature_clip_ratio_total_hessian": 0.004463646560907364, "epoch": 0.1088, "feature_vector_variance/max_squared_error": 57873.9140625, "feature_vector_variance/metric": 30999.63671875, "generated_tokens/total": 3801352.0, "global_fisher_curvature": 109568.0, "global_fisher_curvature/max": 109568.0, "global_fisher_curvature/median": 109568.0, "global_fisher_curvature/min": 109568.0, "global_fisher_curvature/p25": 109568.0, "global_fisher_curvature/p75": 109568.0, "global_fisher_curvature/p85": 109568.0, "global_fisher_curvature/p90": 109568.0, "global_fisher_curvature/p95": 109568.0, "global_fisher_curvature/p99": 109568.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.087784767150879e-06, "global_fisher_kl_divergence/max": 1.087784767150879e-06, "global_fisher_kl_divergence/median": 1.087784767150879e-06, "global_fisher_kl_divergence/min": 1.087784767150879e-06, "global_fisher_kl_divergence/p25": 1.087784767150879e-06, "global_fisher_kl_divergence/p75": 1.087784767150879e-06, "global_fisher_kl_divergence/p85": 1.087784767150879e-06, "global_fisher_kl_divergence/p90": 1.087784767150879e-06, "global_fisher_kl_divergence/p95": 1.087784767150879e-06, "global_fisher_kl_divergence/p99": 1.087784767150879e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0302734375, "global_full_update_term/max": 0.0302734375, "global_full_update_term/median": 0.0302734375, "global_full_update_term/min": 0.0302734375, "global_full_update_term/p25": 0.0302734375, "global_full_update_term/p75": 0.0302734375, "global_full_update_term/p85": 0.0302734375, "global_full_update_term/p90": 0.0302734375, "global_full_update_term/p95": 0.0302734375, "global_full_update_term/p99": 0.0302734375, "global_full_update_term/var": NaN, "global_hessian_coeff": 9344.0, "global_hessian_coeff/max": 9344.0, "global_hessian_coeff/median": 9344.0, "global_hessian_coeff/min": 9344.0, "global_hessian_coeff/p25": 9344.0, "global_hessian_coeff/p75": 9344.0, "global_hessian_coeff/p99": 9344.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 9344.0, "global_hessian_coeff_abs/max": 9344.0, "global_hessian_coeff_abs/median": 9344.0, "global_hessian_coeff_abs/min": 9344.0, "global_hessian_coeff_abs/p25": 9344.0, "global_hessian_coeff_abs/p75": 9344.0, "global_hessian_coeff_abs/p99": 9344.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.13122513890266418, "learning_rate": 4.212216399081919e-06, "loss": -0.8333, "masked_global_fisher_curvature": 812.0, "masked_global_fisher_curvature/max": 812.0, "masked_global_fisher_curvature/median": 812.0, "masked_global_fisher_curvature/min": 812.0, "masked_global_fisher_curvature/p25": 812.0, "masked_global_fisher_curvature/p75": 812.0, "masked_global_fisher_curvature/p85": 812.0, "masked_global_fisher_curvature/p90": 812.0, "masked_global_fisher_curvature/p95": 812.0, "masked_global_fisher_curvature/p99": 812.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/max": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/median": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/min": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/p25": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/p75": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/p85": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/p90": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/p95": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/p99": 8.032657206058502e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0023956298828125, "masked_global_full_update_term/max": 0.0023956298828125, "masked_global_full_update_term/median": 0.0023956298828125, "masked_global_full_update_term/min": 0.0023956298828125, "masked_global_full_update_term/p25": 0.0023956298828125, "masked_global_full_update_term/p75": 0.0023956298828125, "masked_global_full_update_term/p85": 0.0023956298828125, "masked_global_full_update_term/p90": 0.0023956298828125, "masked_global_full_update_term/p95": 0.0023956298828125, "masked_global_full_update_term/p99": 0.0023956298828125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -9408.0, "masked_global_hessian_coeff/max": -9408.0, "masked_global_hessian_coeff/median": -9408.0, "masked_global_hessian_coeff/min": -9408.0, "masked_global_hessian_coeff/p25": -9408.0, "masked_global_hessian_coeff/p75": -9408.0, "masked_global_hessian_coeff/p99": -9408.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 9408.0, "masked_global_hessian_coeff_abs/max": 9408.0, "masked_global_hessian_coeff_abs/median": 9408.0, "masked_global_hessian_coeff_abs/min": 9408.0, "masked_global_hessian_coeff_abs/p25": 9408.0, "masked_global_hessian_coeff_abs/p75": 9408.0, "masked_global_hessian_coeff_abs/p99": 9408.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 3.4664103984832764, "masked_per_sentence_gradient_norm/max": 11.375, "masked_per_sentence_gradient_norm/median": 2.46875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.767578125, "masked_per_sentence_gradient_norm/p75": 6.84375, "masked_per_sentence_gradient_norm/var": 10.150726318359375, "masked_per_token_gradient_norm": 0.07143870741128922, "masked_per_token_gradient_norm/max": 20.25, "masked_per_token_gradient_norm/median": 9.686118573881686e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 8.534839501805891e-16, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 8.940696716308594e-08, "masked_per_token_gradient_norm/var": 0.8762829303741455, "masked_sentence_fisher_curvature": 1959.478515625, "masked_sentence_fisher_curvature/max": 5792.0, "masked_sentence_fisher_curvature/median": 1920.0, "masked_sentence_fisher_curvature/min": 1.8125, "masked_sentence_fisher_curvature/p25": 622.0, "masked_sentence_fisher_curvature/p75": 2880.0, "masked_sentence_fisher_curvature/p85": 3952.0, "masked_sentence_fisher_curvature/p90": 4000.0, "masked_sentence_fisher_curvature/p95": 4800.0, "masked_sentence_fisher_curvature/p99": 4849.60302734375, "masked_sentence_fisher_curvature/var": 2287126.5, "masked_sentence_fisher_kl_divergence": 1.9395573147562573e-08, "masked_sentence_fisher_kl_divergence/max": 5.727633833885193e-08, "masked_sentence_fisher_kl_divergence/median": 1.8975697457790375e-08, "masked_sentence_fisher_kl_divergence/min": 1.7962520360015333e-11, "masked_sentence_fisher_kl_divergence/p25": 6.155460141599178e-09, "masked_sentence_fisher_kl_divergence/p75": 2.852175384759903e-08, "masked_sentence_fisher_kl_divergence/p85": 3.91155481338501e-08, "masked_sentence_fisher_kl_divergence/p90": 3.958120942115784e-08, "masked_sentence_fisher_kl_divergence/p95": 4.7497451305389404e-08, "masked_sentence_fisher_kl_divergence/p99": 4.798642549985743e-08, "masked_sentence_fisher_kl_divergence/var": 2.2393931174892077e-16, "masked_sentence_full_gradient_variance/max_squared_error": 20.803176879882812, "masked_sentence_full_gradient_variance/metric": 20.803176879882812, "masked_sentence_full_gradient_variance/p75": 20.803176879882812, "masked_sentence_full_gradient_variance/p90": 20.803176879882812, "masked_sentence_full_gradient_variance/p95": 20.803176879882812, "masked_sentence_full_gradient_variance/p99": 20.803176879882812, "masked_sentence_full_update_term": 0.0009908590000122786, "masked_sentence_full_update_term/max": 0.00469970703125, "masked_sentence_full_update_term/median": 0.000667572021484375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0001685619354248047, "masked_sentence_full_update_term/p75": 0.00167083740234375, "masked_sentence_full_update_term/p85": 0.001728057861328125, "masked_sentence_full_update_term/p90": 0.002197265625, "masked_sentence_full_update_term/p95": 0.0024871826171875, "masked_sentence_full_update_term/p99": 0.004380799364298582, "masked_sentence_full_update_term/var": 1.054453036886116e-06, "masked_sentence_hessian_coeff": -30977.146484375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -33792.0, "masked_sentence_hessian_coeff/min": -91136.0, "masked_sentence_hessian_coeff/p25": -51520.0, "masked_sentence_hessian_coeff/p75": -5920.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 631239232.0, "masked_sentence_hessian_coeff_abs": 30977.146484375, "masked_sentence_hessian_coeff_abs/max": 91136.0, "masked_sentence_hessian_coeff_abs/median": 33792.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 5920.0, "masked_sentence_hessian_coeff_abs/p75": 51520.0, "masked_sentence_hessian_coeff_abs/p99": 70220.8671875, "masked_sentence_hessian_coeff_abs/var": 631239232.0, "masked_token_fisher_curvature": 2350.2119140625, "masked_token_fisher_curvature/max": 999424.0, "masked_token_fisher_curvature/median": 7.487771253728015e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.988933069460448e-24, "masked_token_fisher_curvature/p75": 1.0125233984581428e-13, "masked_token_fisher_curvature/p85": 4.4565240386873484e-11, "masked_token_fisher_curvature/p90": 5.529727786779404e-09, "masked_token_fisher_curvature/p95": 2.09808349609375e-05, "masked_token_fisher_curvature/p99": 2576.0, "masked_token_fisher_curvature/var": 1364624640.0, "masked_token_fisher_kl_divergence": 2.325804437930401e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 7.395570986446986e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.927672318647029e-35, "masked_token_fisher_kl_divergence/p75": 1.001664023013432e-24, "masked_token_fisher_kl_divergence/p85": 4.400600858782107e-22, "masked_token_fisher_kl_divergence/p90": 5.463362509790237e-20, "masked_token_fisher_kl_divergence/p95": 2.0729945537922845e-16, "masked_token_fisher_kl_divergence/p99": 2.5494955480098724e-08, "masked_token_fisher_kl_divergence/var": 1.336297744550588e-13, "masked_token_full_update_term": 1.4758173165319022e-05, "masked_token_full_update_term/max": 0.00433349609375, "masked_token_full_update_term/median": 4.256340559952859e-20, "masked_token_full_update_term/min": -3.520399332046509e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.2323475573339238e-14, "masked_token_full_update_term/p85": 1.0018652574217413e-12, "masked_token_full_update_term/p90": 1.3642420526593924e-11, "masked_token_full_update_term/p95": 1.6883348052942893e-09, "masked_token_full_update_term/p99": 0.00013828277587890625, "masked_token_full_update_term/var": 3.880321131077835e-08, "masked_token_hessian_coeff": -39308.703125, "masked_token_hessian_coeff/max": 201.0, "masked_token_hessian_coeff/median": -6.184563972055912e-11, "masked_token_hessian_coeff/min": -11468800.0, "masked_token_hessian_coeff/p25": -4.932284355163574e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.017611026763916016, "masked_token_hessian_coeff/var": 272603201536.0, "masked_token_hessian_coeff_abs": 39308.80078125, "masked_token_hessian_coeff_abs/max": 11468800.0, "masked_token_hessian_coeff_abs/median": 1.1757947504520416e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 6.084022174945858e-14, "masked_token_hessian_coeff_abs/p75": 2.300739288330078e-05, "masked_token_hessian_coeff_abs/p99": 266240.0, "masked_token_hessian_coeff_abs/var": 272603201536.0, "mean_logprobs": -0.0057373046875, "mean_logprobs/var": 2.2292137145996094e-05, "num_completions/total": 6528, "per_sentence_gradient_norm": 41.49186325073242, "per_sentence_gradient_norm/max": 226.0, "per_sentence_gradient_norm/median": 29.125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 6.96875, "per_sentence_gradient_norm/p75": 58.25, "per_sentence_gradient_norm/var": 1934.883544921875, "per_token_feature_norm": 190.30355834960938, "per_token_feature_norm/max": 246.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 104.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 142.2800750732422, "per_token_gradient_norm": 0.7398969531059265, "per_token_gradient_norm/max": 284.0, "per_token_gradient_norm/median": 1.0913936421275139e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 9.71445146547012e-16, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.0523945093154907e-07, "per_token_gradient_norm/var": 95.59656524658203, "per_token_policy_error_norm": 0.003949047531932592, "per_token_policy_error_norm/max": 1.921875, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0037811952643096447, "policy_entropy": 0.007370245177298784, "policy_entropy/max": 1.5, "policy_entropy/median": 5.311449058353901e-10, "policy_entropy/min": 2.329340604949326e-20, "policy_entropy/p25": 1.6360246490876307e-12, "policy_entropy/p75": 8.381903171539307e-08, "policy_entropy/var": 0.003641897113993764, "policy_loss": -0.8333333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.14035087823867798, "policy_sharpness": 9.782632827758789, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.3873553276062012, "reward": 0.8333333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.14035087823867798, "rewards/accuracy_reward": 0.8333333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.14035087823867798, "sentence_fisher_curvature": 220359.296875, "sentence_fisher_curvature/max": 1064960.0, "sentence_fisher_curvature/median": 156672.0, "sentence_fisher_curvature/min": 592.0, "sentence_fisher_curvature/p25": 4000.0, "sentence_fisher_curvature/p75": 323584.0, "sentence_fisher_curvature/p85": 453120.0, "sentence_fisher_curvature/p90": 561152.0, "sentence_fisher_curvature/p95": 773120.0, "sentence_fisher_curvature/p99": 932659.625, "sentence_fisher_curvature/var": 59022647296.0, "sentence_fisher_kl_divergence": 2.18222680814506e-06, "sentence_fisher_kl_divergence/max": 1.055002212524414e-05, "sentence_fisher_kl_divergence/median": 1.5497207641601562e-06, "sentence_fisher_kl_divergence/min": 5.8498699218034744e-09, "sentence_fisher_kl_divergence/p25": 3.958120942115784e-08, "sentence_fisher_kl_divergence/p75": 3.207474946975708e-06, "sentence_fisher_kl_divergence/p85": 4.477798938751221e-06, "sentence_fisher_kl_divergence/p90": 5.558133125305176e-06, "sentence_fisher_kl_divergence/p95": 7.659196853637695e-06, "sentence_fisher_kl_divergence/p99": 9.247664820577484e-06, "sentence_fisher_kl_divergence/var": 5.791912956187861e-12, "sentence_full_gradient_variance/max_squared_error": 3528.30322265625, "sentence_full_gradient_variance/metric": 3528.30322265625, "sentence_full_gradient_variance/p75": 3528.30322265625, "sentence_full_gradient_variance/p90": 3528.30322265625, "sentence_full_gradient_variance/p95": 3528.30322265625, "sentence_full_gradient_variance/p99": 3528.30322265625, "sentence_full_update_term": 0.013072093948721886, "sentence_full_update_term/max": 0.053466796875, "sentence_full_update_term/median": 0.00860595703125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.002197265625, "sentence_full_update_term/p75": 0.020751953125, "sentence_full_update_term/p85": 0.027679443359375, "sentence_full_update_term/p90": 0.034423828125, "sentence_full_update_term/p95": 0.0399169921875, "sentence_full_update_term/p99": 0.05253906548023224, "sentence_full_update_term/var": 0.00018872857617679983, "sentence_hessian_coeff": 4343.6669921875, "sentence_hessian_coeff/max": 757760.0, "sentence_hessian_coeff/median": -36096.0, "sentence_hessian_coeff/min": -226304.0, "sentence_hessian_coeff/p25": -108032.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 609894.875, "sentence_hessian_coeff/var": 27791273984.0, "sentence_hessian_coeff_abs": 106350.3359375, "sentence_hessian_coeff_abs/max": 757760.0, "sentence_hessian_coeff_abs/median": 88576.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 33792.0, "sentence_hessian_coeff_abs/p75": 127104.0, "sentence_hessian_coeff_abs/p99": 609894.875, "sentence_hessian_coeff_abs/var": 16380890112.0, "step": 68, "token_fisher_curvature": 233618.125, "token_fisher_curvature/max": 179306496.0, "token_fisher_curvature/median": 8.673617379884035e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.841963076155758e-24, "token_fisher_curvature/p75": 1.3855583347321954e-13, "token_fisher_curvature/p85": 7.457856554538012e-11, "token_fisher_curvature/p90": 1.393482307321392e-08, "token_fisher_curvature/p95": 0.00012874603271484375, "token_fisher_curvature/p99": 415744.0, "token_fisher_curvature/var": 20366325972992.0, "token_fisher_kl_divergence": 2.312173819518648e-06, "token_fisher_kl_divergence/max": 0.00177764892578125, "token_fisher_kl_divergence/median": 8.578862344278503e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.792836160852233e-35, "token_fisher_kl_divergence/p75": 1.370017889540952e-24, "token_fisher_kl_divergence/p85": 7.378451063973007e-22, "token_fisher_kl_divergence/p90": 1.3781159877378462e-19, "token_fisher_kl_divergence/p95": 1.27675647831893e-15, "token_fisher_kl_divergence/p99": 4.112720489501953e-06, "token_fisher_kl_divergence/var": 1.9952279828316932e-09, "token_full_update_term": 0.00016177070210687816, "token_full_update_term/max": 0.061279296875, "token_full_update_term/median": 5.802175688691957e-20, "token_full_update_term/min": -3.520399332046509e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.5432100042289676e-14, "token_full_update_term/p85": 1.3784529073745944e-12, "token_full_update_term/p90": 2.091837814077735e-11, "token_full_update_term/p95": 6.063714863557834e-09, "token_full_update_term/p99": 0.002551436424255371, "token_full_update_term/var": 4.672802333516302e-06, "token_hessian_coeff": -12688.615234375, "token_hessian_coeff/max": 176160768.0, "token_hessian_coeff/median": -6.821210263296962e-11, "token_hessian_coeff/min": -23724032.0, "token_hessian_coeff/p25": -5.453824996948242e-06, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.026123046875, "token_hessian_coeff/var": 13587963707392.0, "token_hessian_coeff_abs": 241343.5625, "token_hessian_coeff_abs/max": 176160768.0, "token_hessian_coeff_abs/median": 1.2863893061876297e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 6.750155989720952e-14, "token_hessian_coeff_abs/p75": 2.7298927307128906e-05, "token_hessian_coeff_abs/p99": 5144576.0, "token_hessian_coeff_abs/var": 13529876791296.0 }, { "accuracy_reward": 0.71875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20427630841732025, "adam_stats/lm_head/lr_effective_max": 2.0890740415779874e-05, "adam_stats/lm_head/lr_effective_mean": -5.4411041644497615e-11, "adam_stats/lm_head/lr_effective_min": -2.028948802035302e-05, "adam_stats/lm_head/lr_effective_std": 4.744601369566226e-07, "adam_stats/lr_effective_max": 2.146197402908001e-05, "adam_stats/lr_effective_mean": 3.762519809602516e-11, "adam_stats/lr_effective_min": -2.203094118158333e-05, "adam_stats/m_t_max": 0.0006881499430164695, "adam_stats/m_t_mean": 8.070962301265361e-12, "adam_stats/m_t_min": -0.0006152430432848632, "adam_stats/v_t_max": 2.5114441086770967e-05, "adam_stats/v_t_mean": 1.7215505280013255e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.71875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20427630841732025, "all_logprobs": -0.008659330196678638, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.25, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -7.486343383789062e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012389512732625008, "clip_ratio": 0.0, "completion_length": 619.2604370117188, "completion_length/correct": 538.7391357421875, "completion_length/correct/max": 997.0, "completion_length/correct/median": 493.0, "completion_length/correct/min": 219.0, "completion_length/correct/p25": 380.0, "completion_length/correct/p75": 654.0, "completion_length/correct/var": 46052.16796875, "completion_length/incorrect": 825.0370483398438, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 395.0, "completion_length/incorrect/p25": 560.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 64921.8046875, "completion_length/max": 1024.0, "completion_length/median": 577.0, "completion_length/min": 219.0, "completion_length/p25": 422.0, "completion_length/p75": 839.5, "completion_length/var": 67475.5390625, "curvature_clip_ratio_token_fisher": 0.00836010742932558, "curvature_clip_ratio_token_hessian": 0.005281838122755289, "curvature_clip_ratio_total_fisher": 0.00836010742932558, "curvature_clip_ratio_total_full": 0.00836010742932558, "curvature_clip_ratio_total_hessian": 0.005281838122755289, "epoch": 0.1104, "feature_vector_variance/max_squared_error": 61883.3359375, "feature_vector_variance/metric": 30676.607421875, "generated_tokens/total": 3860801.0, "global_fisher_curvature": 149504.0, "global_fisher_curvature/max": 149504.0, "global_fisher_curvature/median": 149504.0, "global_fisher_curvature/min": 149504.0, "global_fisher_curvature/p25": 149504.0, "global_fisher_curvature/p75": 149504.0, "global_fisher_curvature/p85": 149504.0, "global_fisher_curvature/p90": 149504.0, "global_fisher_curvature/p95": 149504.0, "global_fisher_curvature/p99": 149504.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.3262033462524414e-06, "global_fisher_kl_divergence/max": 1.3262033462524414e-06, "global_fisher_kl_divergence/median": 1.3262033462524414e-06, "global_fisher_kl_divergence/min": 1.3262033462524414e-06, "global_fisher_kl_divergence/p25": 1.3262033462524414e-06, "global_fisher_kl_divergence/p75": 1.3262033462524414e-06, "global_fisher_kl_divergence/p85": 1.3262033462524414e-06, "global_fisher_kl_divergence/p90": 1.3262033462524414e-06, "global_fisher_kl_divergence/p95": 1.3262033462524414e-06, "global_fisher_kl_divergence/p99": 1.3262033462524414e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.03857421875, "global_full_update_term/max": 0.03857421875, "global_full_update_term/median": 0.03857421875, "global_full_update_term/min": 0.03857421875, "global_full_update_term/p25": 0.03857421875, "global_full_update_term/p75": 0.03857421875, "global_full_update_term/p85": 0.03857421875, "global_full_update_term/p90": 0.03857421875, "global_full_update_term/p95": 0.03857421875, "global_full_update_term/p99": 0.03857421875, "global_full_update_term/var": NaN, "global_hessian_coeff": 49920.0, "global_hessian_coeff/max": 49920.0, "global_hessian_coeff/median": 49920.0, "global_hessian_coeff/min": 49920.0, "global_hessian_coeff/p25": 49920.0, "global_hessian_coeff/p75": 49920.0, "global_hessian_coeff/p99": 49920.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 49920.0, "global_hessian_coeff_abs/max": 49920.0, "global_hessian_coeff_abs/median": 49920.0, "global_hessian_coeff_abs/min": 49920.0, "global_hessian_coeff_abs/p25": 49920.0, "global_hessian_coeff_abs/p75": 49920.0, "global_hessian_coeff_abs/p99": 49920.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.09871002286672592, "learning_rate": 3.978963279105821e-06, "loss": -0.7188, "masked_global_fisher_curvature": 1296.0, "masked_global_fisher_curvature/max": 1296.0, "masked_global_fisher_curvature/median": 1296.0, "masked_global_fisher_curvature/min": 1296.0, "masked_global_fisher_curvature/p25": 1296.0, "masked_global_fisher_curvature/p75": 1296.0, "masked_global_fisher_curvature/p85": 1296.0, "masked_global_fisher_curvature/p90": 1296.0, "masked_global_fisher_curvature/p95": 1296.0, "masked_global_fisher_curvature/p99": 1296.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/max": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/median": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/min": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/p25": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/p75": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/p85": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/p90": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/p95": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/p99": 1.1525116860866547e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.003509521484375, "masked_global_full_update_term/max": 0.003509521484375, "masked_global_full_update_term/median": 0.003509521484375, "masked_global_full_update_term/min": 0.003509521484375, "masked_global_full_update_term/p25": 0.003509521484375, "masked_global_full_update_term/p75": 0.003509521484375, "masked_global_full_update_term/p85": 0.003509521484375, "masked_global_full_update_term/p90": 0.003509521484375, "masked_global_full_update_term/p95": 0.003509521484375, "masked_global_full_update_term/p99": 0.003509521484375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -7616.0, "masked_global_hessian_coeff/max": -7616.0, "masked_global_hessian_coeff/median": -7616.0, "masked_global_hessian_coeff/min": -7616.0, "masked_global_hessian_coeff/p25": -7616.0, "masked_global_hessian_coeff/p75": -7616.0, "masked_global_hessian_coeff/p99": -7616.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 7616.0, "masked_global_hessian_coeff_abs/max": 7616.0, "masked_global_hessian_coeff_abs/median": 7616.0, "masked_global_hessian_coeff_abs/min": 7616.0, "masked_global_hessian_coeff_abs/p25": 7616.0, "masked_global_hessian_coeff_abs/p75": 7616.0, "masked_global_hessian_coeff_abs/p99": 7616.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 3.7950439453125, "masked_per_sentence_gradient_norm/max": 25.875, "masked_per_sentence_gradient_norm/median": 2.390625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 5.4453125, "masked_per_sentence_gradient_norm/var": 21.19182777404785, "masked_per_token_gradient_norm": 0.06573225557804108, "masked_per_token_gradient_norm/max": 23.75, "masked_per_token_gradient_norm/median": 1.6076029396572267e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.1699739843606949e-08, "masked_per_token_gradient_norm/var": 0.7346643209457397, "masked_sentence_fisher_curvature": 2127.33349609375, "masked_sentence_fisher_curvature/max": 10944.0, "masked_sentence_fisher_curvature/median": 1632.0, "masked_sentence_fisher_curvature/min": 89.0, "masked_sentence_fisher_curvature/p25": 660.0, "masked_sentence_fisher_curvature/p75": 3028.0, "masked_sentence_fisher_curvature/p85": 3712.0, "masked_sentence_fisher_curvature/p90": 4736.0, "masked_sentence_fisher_curvature/p95": 5072.0, "masked_sentence_fisher_curvature/p99": 9120.005859375, "masked_sentence_fisher_curvature/var": 3882101.5, "masked_sentence_fisher_kl_divergence": 1.8864360740167285e-08, "masked_sentence_fisher_kl_divergence/max": 9.685754776000977e-08, "masked_sentence_fisher_kl_divergence/median": 1.4493707567453384e-08, "masked_sentence_fisher_kl_divergence/min": 7.894414011389017e-10, "masked_sentence_fisher_kl_divergence/p25": 5.8498699218034744e-09, "masked_sentence_fisher_kl_divergence/p75": 2.6833731681108475e-08, "masked_sentence_fisher_kl_divergence/p85": 3.282912075519562e-08, "masked_sentence_fisher_kl_divergence/p90": 4.202593117952347e-08, "masked_sentence_fisher_kl_divergence/p95": 4.4994521886110306e-08, "masked_sentence_fisher_kl_divergence/p99": 8.093198289316206e-08, "masked_sentence_fisher_kl_divergence/var": 3.0510687919427454e-16, "masked_sentence_full_gradient_variance/max_squared_error": 34.18593978881836, "masked_sentence_full_gradient_variance/metric": 34.18593978881836, "masked_sentence_full_gradient_variance/p75": 34.18593978881836, "masked_sentence_full_gradient_variance/p90": 34.18593978881836, "masked_sentence_full_gradient_variance/p95": 34.18593978881836, "masked_sentence_full_gradient_variance/p99": 34.18593978881836, "masked_sentence_full_update_term": 0.0013079147320240736, "masked_sentence_full_update_term/max": 0.01116943359375, "masked_sentence_full_update_term/median": 0.0007781982421875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.001773834228515625, "masked_sentence_full_update_term/p85": 0.002605438232421875, "masked_sentence_full_update_term/p90": 0.00287628173828125, "masked_sentence_full_update_term/p95": 0.00460052490234375, "masked_sentence_full_update_term/p99": 0.006791701074689627, "masked_sentence_full_update_term/var": 3.0323481041705236e-06, "masked_sentence_hessian_coeff": -33398.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -30208.0, "masked_sentence_hessian_coeff/min": -142336.0, "masked_sentence_hessian_coeff/p25": -54336.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 877273152.0, "masked_sentence_hessian_coeff_abs": 33398.0, "masked_sentence_hessian_coeff_abs/max": 142336.0, "masked_sentence_hessian_coeff_abs/median": 27520.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 54336.0, "masked_sentence_hessian_coeff_abs/p99": 111206.5, "masked_sentence_hessian_coeff_abs/var": 877273152.0, "masked_token_fisher_curvature": 2188.84375, "masked_token_fisher_curvature/max": 1114112.0, "masked_token_fisher_curvature/median": 1.6601845766184287e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.906392817993306e-25, "masked_token_fisher_curvature/p75": 7.416289804496046e-14, "masked_token_fisher_curvature/p85": 4.2518877307884395e-11, "masked_token_fisher_curvature/p90": 6.402842700481415e-09, "masked_token_fisher_curvature/p95": 9.5367431640625e-05, "masked_token_fisher_curvature/p99": 7228.0, "masked_token_fisher_curvature/var": 1301450624.0, "masked_token_fisher_kl_divergence": 1.9423994856992977e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 1.472951221467358e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.692711865184094e-36, "masked_token_fisher_kl_divergence/p75": 6.591595506281939e-25, "masked_token_fisher_kl_divergence/p85": 3.771943593241806e-22, "masked_token_fisher_kl_divergence/p90": 5.675120746603812e-20, "masked_token_fisher_kl_divergence/p95": 8.465450562766819e-16, "masked_token_fisher_kl_divergence/p99": 6.42248778603971e-08, "masked_token_fisher_kl_divergence/var": 1.0254788012677152e-13, "masked_token_full_update_term": 1.3202999070927035e-05, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.559755325317383e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.100087019371813e-16, "masked_token_full_update_term/p85": 1.4832579608992091e-13, "masked_token_full_update_term/p90": 3.566924533515703e-12, "masked_token_full_update_term/p95": 8.347313951162505e-10, "masked_token_full_update_term/p99": 0.0001773834228515625, "masked_token_full_update_term/var": 3.047868801786535e-08, "masked_token_hessian_coeff": -38267.0, "masked_token_hessian_coeff/max": 4416.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -12451840.0, "masked_token_hessian_coeff/p25": -1.7881393432617188e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.015380859375, "masked_token_hessian_coeff/var": 249966182400.0, "masked_token_hessian_coeff_abs": 38267.76171875, "masked_token_hessian_coeff_abs/max": 12451840.0, "masked_token_hessian_coeff_abs/median": 1.9099388737231493e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.5331974029541016e-06, "masked_token_hessian_coeff_abs/p99": 482288.0, "masked_token_hessian_coeff_abs/var": 249966149632.0, "mean_logprobs": -0.00921630859375, "mean_logprobs/var": 2.5510787963867188e-05, "num_completions/total": 6624, "per_sentence_gradient_norm": 63.856773376464844, "per_sentence_gradient_norm/max": 262.0, "per_sentence_gradient_norm/median": 51.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 98.875, "per_sentence_gradient_norm/var": 3767.19189453125, "per_token_feature_norm": 189.9094696044922, "per_token_feature_norm/max": 264.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 111.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 128.50572204589844, "per_token_gradient_norm": 0.8647398948669434, "per_token_gradient_norm/max": 294.0, "per_token_gradient_norm/median": 1.9806378759312793e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.4901161193847656e-08, "per_token_gradient_norm/var": 120.4331283569336, "per_token_policy_error_norm": 0.00515958946198225, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005078586749732494, "policy_entropy": 0.009100180119276047, "policy_entropy/max": 1.6796875, "policy_entropy/median": 2.4374458007514477e-10, "policy_entropy/min": 6.246867986000465e-21, "policy_entropy/p25": 3.7836400679225335e-13, "policy_entropy/p75": 6.984919309616089e-08, "policy_entropy/var": 0.004694562871009111, "policy_loss": -0.71875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20427630841732025, "policy_sharpness": 9.741031646728516, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.676944375038147, "reward": 0.71875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20427630841732025, "rewards/accuracy_reward": 0.71875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20427630841732025, "sentence_fisher_curvature": 351451.34375, "sentence_fisher_curvature/max": 1466368.0, "sentence_fisher_curvature/median": 348160.0, "sentence_fisher_curvature/min": 89.0, "sentence_fisher_curvature/p25": 6016.0, "sentence_fisher_curvature/p75": 551936.0, "sentence_fisher_curvature/p85": 669696.0, "sentence_fisher_curvature/p90": 786432.0, "sentence_fisher_curvature/p95": 882688.0, "sentence_fisher_curvature/p99": 1131725.875, "sentence_fisher_curvature/var": 100647247872.0, "sentence_fisher_kl_divergence": 3.1172494345810264e-06, "sentence_fisher_kl_divergence/max": 1.2993812561035156e-05, "sentence_fisher_kl_divergence/median": 3.084540367126465e-06, "sentence_fisher_kl_divergence/min": 7.894414011389017e-10, "sentence_fisher_kl_divergence/p25": 5.337642505764961e-08, "sentence_fisher_kl_divergence/p75": 4.887580871582031e-06, "sentence_fisher_kl_divergence/p85": 5.938112735748291e-06, "sentence_fisher_kl_divergence/p90": 6.973743438720703e-06, "sentence_fisher_kl_divergence/p95": 7.808208465576172e-06, "sentence_fisher_kl_divergence/p99": 1.0049352567875758e-05, "sentence_fisher_kl_divergence/var": 7.916221497761278e-12, "sentence_full_gradient_variance/max_squared_error": 7640.6376953125, "sentence_full_gradient_variance/metric": 7640.6376953125, "sentence_full_gradient_variance/p75": 7640.6376953125, "sentence_full_gradient_variance/p90": 7640.6376953125, "sentence_full_gradient_variance/p95": 7640.6376953125, "sentence_full_gradient_variance/p99": 7640.6376953125, "sentence_full_update_term": 0.020702362060546875, "sentence_full_update_term/max": 0.10498046875, "sentence_full_update_term/median": 0.0166015625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.02911376953125, "sentence_full_update_term/p85": 0.03704833984375, "sentence_full_update_term/p90": 0.0452880859375, "sentence_full_update_term/p95": 0.0643310546875, "sentence_full_update_term/p99": 0.09013676643371582, "sentence_full_update_term/var": 0.00047597484081052244, "sentence_hessian_coeff": 58225.3359375, "sentence_hessian_coeff/max": 823296.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -280576.0, "sentence_hessian_coeff/p25": -36608.0, "sentence_hessian_coeff/p75": 126336.0, "sentence_hessian_coeff/p99": 675430.875, "sentence_hessian_coeff/var": 44210221056.0, "sentence_hessian_coeff_abs": 138062.671875, "sentence_hessian_coeff_abs/max": 823296.0, "sentence_hessian_coeff_abs/median": 80896.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 223488.0, "sentence_hessian_coeff_abs/p99": 675430.875, "sentence_hessian_coeff_abs/var": 28374151168.0, "step": 69, "token_fisher_curvature": 296162.71875, "token_fisher_curvature/max": 185597952.0, "token_fisher_curvature/median": 2.0837010502455788e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.1325750167382745e-25, "token_fisher_curvature/p75": 1.0258460747536446e-13, "token_fisher_curvature/p85": 7.685230229981244e-11, "token_fisher_curvature/p90": 1.862645149230957e-08, "token_fisher_curvature/p95": 0.00164031982421875, "token_fisher_curvature/p99": 438352.0, "token_fisher_curvature/var": 27547213496320.0, "token_fisher_kl_divergence": 2.6274453830410494e-06, "token_fisher_kl_divergence/max": 0.00164794921875, "token_fisher_kl_divergence/median": 1.8488927466117464e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.892545904823883e-36, "token_fisher_kl_divergence/p75": 9.111911435154445e-25, "token_fisher_kl_divergence/p85": 6.815968247436948e-22, "token_fisher_kl_divergence/p90": 1.6517142471458857e-19, "token_fisher_kl_divergence/p95": 1.454392162258955e-14, "token_fisher_kl_divergence/p99": 3.889785148203373e-06, "token_fisher_kl_divergence/var": 2.167974244571269e-09, "token_full_update_term": 0.00018139745225198567, "token_full_update_term/max": 0.05908203125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.559755325317383e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 7.945033519973776e-16, "token_full_update_term/p85": 2.192031278713813e-13, "token_full_update_term/p90": 5.97422111781043e-12, "token_full_update_term/p95": 2.7939677238464355e-09, "token_full_update_term/p99": 0.0023363828659057617, "token_full_update_term/var": 5.318947387422668e-06, "token_hessian_coeff": 20080.5390625, "token_hessian_coeff/max": 184549376.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -23724032.0, "token_hessian_coeff/p25": -2.169981598854065e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.02905750274658203, "token_hessian_coeff/var": 19474497404928.0, "token_hessian_coeff_abs": 293056.375, "token_hessian_coeff_abs/max": 184549376.0, "token_hessian_coeff_abs/median": 2.319211489520967e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.3080577850341797e-06, "token_hessian_coeff_abs/p99": 6193152.0, "token_hessian_coeff_abs/var": 19389015392256.0 }, { "accuracy_reward": 0.75, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.75, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.1894737035036087, "adam_stats/lm_head/lr_effective_max": 1.8701186490943655e-05, "adam_stats/lm_head/lr_effective_mean": -5.3105055075608476e-11, "adam_stats/lm_head/lr_effective_min": -1.931131919263862e-05, "adam_stats/lm_head/lr_effective_std": 4.52354896651741e-07, "adam_stats/lr_effective_max": 2.12037284654798e-05, "adam_stats/lr_effective_mean": 5.957939697154302e-11, "adam_stats/lr_effective_min": -2.0958628738299012e-05, "adam_stats/m_t_max": 0.001016063499264419, "adam_stats/m_t_mean": 9.262951516930684e-12, "adam_stats/m_t_min": -0.0006213011802174151, "adam_stats/v_t_max": 2.509402111172676e-05, "adam_stats/v_t_mean": 1.723250231747131e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.75, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.75, "advantages/p75": 1.0, "advantages/var": 0.1894737035036087, "all_logprobs": -0.009012394584715366, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.25, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -7.891654968261719e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.014247598126530647, "clip_ratio": 0.0, "completion_length": 478.65625, "completion_length/correct": 462.6805725097656, "completion_length/correct/max": 856.0, "completion_length/correct/median": 423.0, "completion_length/correct/min": 191.0, "completion_length/correct/p25": 326.75, "completion_length/correct/p75": 594.5, "completion_length/correct/var": 32345.99609375, "completion_length/incorrect": 526.5833740234375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 497.0, "completion_length/incorrect/min": 349.0, "completion_length/incorrect/p25": 389.5, "completion_length/incorrect/p75": 527.75, "completion_length/incorrect/var": 30372.94921875, "completion_length/max": 1024.0, "completion_length/median": 451.0, "completion_length/min": 191.0, "completion_length/p25": 349.0, "completion_length/p75": 593.0, "completion_length/var": 32301.5546875, "curvature_clip_ratio_token_fisher": 0.008617875166237354, "curvature_clip_ratio_token_hessian": 0.0055276271887123585, "curvature_clip_ratio_total_fisher": 0.008617875166237354, "curvature_clip_ratio_total_full": 0.008617875166237354, "curvature_clip_ratio_total_hessian": 0.0055276271887123585, "epoch": 0.112, "feature_vector_variance/max_squared_error": 55727.91796875, "feature_vector_variance/metric": 31193.36328125, "generated_tokens/total": 3906752.0, "global_fisher_curvature": 142336.0, "global_fisher_curvature/max": 142336.0, "global_fisher_curvature/median": 142336.0, "global_fisher_curvature/min": 142336.0, "global_fisher_curvature/p25": 142336.0, "global_fisher_curvature/p75": 142336.0, "global_fisher_curvature/p85": 142336.0, "global_fisher_curvature/p90": 142336.0, "global_fisher_curvature/p95": 142336.0, "global_fisher_curvature/p99": 142336.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.125037670135498e-06, "global_fisher_kl_divergence/max": 1.125037670135498e-06, "global_fisher_kl_divergence/median": 1.125037670135498e-06, "global_fisher_kl_divergence/min": 1.125037670135498e-06, "global_fisher_kl_divergence/p25": 1.125037670135498e-06, "global_fisher_kl_divergence/p75": 1.125037670135498e-06, "global_fisher_kl_divergence/p85": 1.125037670135498e-06, "global_fisher_kl_divergence/p90": 1.125037670135498e-06, "global_fisher_kl_divergence/p95": 1.125037670135498e-06, "global_fisher_kl_divergence/p99": 1.125037670135498e-06, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.033203125, "global_full_update_term/max": 0.033203125, "global_full_update_term/median": 0.033203125, "global_full_update_term/min": 0.033203125, "global_full_update_term/p25": 0.033203125, "global_full_update_term/p75": 0.033203125, "global_full_update_term/p85": 0.033203125, "global_full_update_term/p90": 0.033203125, "global_full_update_term/p95": 0.033203125, "global_full_update_term/p99": 0.033203125, "global_full_update_term/var": NaN, "global_hessian_coeff": 31616.0, "global_hessian_coeff/max": 31616.0, "global_hessian_coeff/median": 31616.0, "global_hessian_coeff/min": 31616.0, "global_hessian_coeff/p25": 31616.0, "global_hessian_coeff/p75": 31616.0, "global_hessian_coeff/p99": 31616.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 31616.0, "global_hessian_coeff_abs/max": 31616.0, "global_hessian_coeff_abs/median": 31616.0, "global_hessian_coeff_abs/min": 31616.0, "global_hessian_coeff_abs/p25": 31616.0, "global_hessian_coeff_abs/p75": 31616.0, "global_hessian_coeff_abs/p99": 31616.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.11082905530929565, "learning_rate": 3.750000000000002e-06, "loss": -0.75, "masked_global_fisher_curvature": 3296.0, "masked_global_fisher_curvature/max": 3296.0, "masked_global_fisher_curvature/median": 3296.0, "masked_global_fisher_curvature/min": 3296.0, "masked_global_fisher_curvature/p25": 3296.0, "masked_global_fisher_curvature/p75": 3296.0, "masked_global_fisher_curvature/p85": 3296.0, "masked_global_fisher_curvature/p90": 3296.0, "masked_global_fisher_curvature/p95": 3296.0, "masked_global_fisher_curvature/p99": 3296.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/max": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/median": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/min": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p25": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p75": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p85": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p90": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p95": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/p99": 2.60770320892334e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0032196044921875, "masked_global_full_update_term/max": 0.0032196044921875, "masked_global_full_update_term/median": 0.0032196044921875, "masked_global_full_update_term/min": 0.0032196044921875, "masked_global_full_update_term/p25": 0.0032196044921875, "masked_global_full_update_term/p75": 0.0032196044921875, "masked_global_full_update_term/p85": 0.0032196044921875, "masked_global_full_update_term/p90": 0.0032196044921875, "masked_global_full_update_term/p95": 0.0032196044921875, "masked_global_full_update_term/p99": 0.0032196044921875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -9920.0, "masked_global_hessian_coeff/max": -9920.0, "masked_global_hessian_coeff/median": -9920.0, "masked_global_hessian_coeff/min": -9920.0, "masked_global_hessian_coeff/p25": -9920.0, "masked_global_hessian_coeff/p75": -9920.0, "masked_global_hessian_coeff/p99": -9920.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 9920.0, "masked_global_hessian_coeff_abs/max": 9920.0, "masked_global_hessian_coeff_abs/median": 9920.0, "masked_global_hessian_coeff_abs/min": 9920.0, "masked_global_hessian_coeff_abs/p25": 9920.0, "masked_global_hessian_coeff_abs/p75": 9920.0, "masked_global_hessian_coeff_abs/p99": 9920.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 3.4717001914978027, "masked_per_sentence_gradient_norm/max": 22.75, "masked_per_sentence_gradient_norm/median": 1.9765625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.37353515625, "masked_per_sentence_gradient_norm/p75": 4.1328125, "masked_per_sentence_gradient_norm/var": 18.49173355102539, "masked_per_token_gradient_norm": 0.07925641536712646, "masked_per_token_gradient_norm/max": 21.25, "masked_per_token_gradient_norm/median": 2.3987922759260982e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 6.798654794692993e-08, "masked_per_token_gradient_norm/var": 0.8813547492027283, "masked_sentence_fisher_curvature": 2105.75537109375, "masked_sentence_fisher_curvature/max": 7520.0, "masked_sentence_fisher_curvature/median": 1760.0, "masked_sentence_fisher_curvature/min": 117.5, "masked_sentence_fisher_curvature/p25": 736.0, "masked_sentence_fisher_curvature/p75": 2656.0, "masked_sentence_fisher_curvature/p85": 3808.0, "masked_sentence_fisher_curvature/p90": 3960.0, "masked_sentence_fisher_curvature/p95": 4904.0, "masked_sentence_fisher_curvature/p99": 7094.4013671875, "masked_sentence_fisher_curvature/var": 2413308.25, "masked_sentence_fisher_kl_divergence": 1.6663156188201356e-08, "masked_sentence_fisher_kl_divergence/max": 5.960464477539063e-08, "masked_sentence_fisher_kl_divergence/median": 1.391163095831871e-08, "masked_sentence_fisher_kl_divergence/min": 9.313225746154785e-10, "masked_sentence_fisher_kl_divergence/p25": 5.820766091346741e-09, "masked_sentence_fisher_kl_divergence/p75": 2.10711732506752e-08, "masked_sentence_fisher_kl_divergence/p85": 3.015156835317612e-08, "masked_sentence_fisher_kl_divergence/p90": 3.1315721571445465e-08, "masked_sentence_fisher_kl_divergence/p95": 3.876630216836929e-08, "masked_sentence_fisher_kl_divergence/p99": 5.606563036053558e-08, "masked_sentence_fisher_kl_divergence/var": 1.5104290191949704e-16, "masked_sentence_full_gradient_variance/max_squared_error": 29.5588436126709, "masked_sentence_full_gradient_variance/metric": 29.5588436126709, "masked_sentence_full_gradient_variance/p75": 29.5588436126709, "masked_sentence_full_gradient_variance/p90": 29.5588436126709, "masked_sentence_full_gradient_variance/p95": 29.5588436126709, "masked_sentence_full_gradient_variance/p99": 29.5588436126709, "masked_sentence_full_update_term": 0.001008450984954834, "masked_sentence_full_update_term/max": 0.00799560546875, "masked_sentence_full_update_term/median": 0.000568389892578125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00011157989501953125, "masked_sentence_full_update_term/p75": 0.001155853271484375, "masked_sentence_full_update_term/p85": 0.001674652099609375, "masked_sentence_full_update_term/p90": 0.0023345947265625, "masked_sentence_full_update_term/p95": 0.004150390625, "masked_sentence_full_update_term/p99": 0.0065170335583388805, "masked_sentence_full_update_term/var": 2.007883495025453e-06, "masked_sentence_hessian_coeff": -34733.3359375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -35328.0, "masked_sentence_hessian_coeff/min": -112640.0, "masked_sentence_hessian_coeff/p25": -50752.0, "masked_sentence_hessian_coeff/p75": -12960.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 735888192.0, "masked_sentence_hessian_coeff_abs": 34733.3359375, "masked_sentence_hessian_coeff_abs/max": 112640.0, "masked_sentence_hessian_coeff_abs/median": 33536.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 12960.0, "masked_sentence_hessian_coeff_abs/p75": 50752.0, "masked_sentence_hessian_coeff_abs/p99": 111667.203125, "masked_sentence_hessian_coeff_abs/var": 735888192.0, "masked_token_fisher_curvature": 2599.30615234375, "masked_token_fisher_curvature/max": 1204224.0, "masked_token_fisher_curvature/median": 1.4568966692773966e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.783608195817466e-24, "masked_token_fisher_curvature/p75": 2.646771690706373e-13, "masked_token_fisher_curvature/p85": 2.510205376893282e-10, "masked_token_fisher_curvature/p90": 5.2852556109428406e-08, "masked_token_fisher_curvature/p95": 0.00031967461109161377, "masked_token_fisher_curvature/p99": 12416.0, "masked_token_fisher_curvature/var": 1622709504.0, "masked_token_fisher_kl_divergence": 2.057743841987758e-08, "masked_token_fisher_kl_divergence/max": 9.5367431640625e-06, "masked_token_fisher_kl_divergence/median": 1.1537090738857298e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.410593220986745e-35, "masked_token_fisher_kl_divergence/p75": 2.0938009255248513e-24, "masked_token_fisher_kl_divergence/p85": 1.9852334701272664e-21, "masked_token_fisher_kl_divergence/p90": 4.1843427594362437e-19, "masked_token_fisher_kl_divergence/p95": 2.5341057377503695e-15, "masked_token_fisher_kl_divergence/p99": 9.825453162193298e-08, "masked_token_fisher_kl_divergence/var": 1.0172054577773856e-13, "masked_token_full_update_term": 1.5097695722943172e-05, "masked_token_full_update_term/max": 0.00421142578125, "masked_token_full_update_term/median": 1.737079286361358e-23, "masked_token_full_update_term/min": -1.4454126358032227e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 9.880984919163893e-15, "masked_token_full_update_term/p85": 1.0871303857129533e-12, "masked_token_full_update_term/p90": 2.9331204132176936e-11, "masked_token_full_update_term/p95": 6.461050361394882e-09, "masked_token_full_update_term/p99": 0.00022499263286590576, "masked_token_full_update_term/var": 3.319498986797953e-08, "masked_token_hessian_coeff": -46961.234375, "masked_token_hessian_coeff/max": 318.0, "masked_token_hessian_coeff/median": -1.1581846592889633e-12, "masked_token_hessian_coeff/min": -12451840.0, "masked_token_hessian_coeff/p25": -2.682209014892578e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.00616455078125, "masked_token_hessian_coeff/var": 314314653696.0, "masked_token_hessian_coeff_abs": 46961.3359375, "masked_token_hessian_coeff_abs/max": 12451840.0, "masked_token_hessian_coeff_abs/median": 3.0995579436421394e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.5079975128173828e-05, "masked_token_hessian_coeff_abs/p99": 769408.0, "masked_token_hessian_coeff_abs/var": 314314653696.0, "mean_logprobs": -0.00885009765625, "mean_logprobs/var": 3.3855438232421875e-05, "num_completions/total": 6720, "per_sentence_gradient_norm": 51.39453125, "per_sentence_gradient_norm/max": 197.0, "per_sentence_gradient_norm/median": 41.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 7.59375, "per_sentence_gradient_norm/p75": 88.375, "per_sentence_gradient_norm/var": 2197.625732421875, "per_token_feature_norm": 189.87518310546875, "per_token_feature_norm/max": 246.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 106.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 156.19802856445312, "per_token_gradient_norm": 0.9227308034896851, "per_token_gradient_norm/max": 284.0, "per_token_gradient_norm/median": 2.8762769943568856e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 8.614733815193176e-08, "per_token_gradient_norm/var": 126.06593322753906, "per_token_policy_error_norm": 0.005270900204777718, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005543719511479139, "policy_entropy": 0.009056547656655312, "policy_entropy/max": 1.8984375, "policy_entropy/median": 5.675246939063072e-10, "policy_entropy/min": 2.6205081805679917e-21, "policy_entropy/p25": 1.0658141036401503e-12, "policy_entropy/p75": 1.2898817658424377e-07, "policy_entropy/var": 0.004819940775632858, "policy_loss": -0.75, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -0.75, "policy_loss/var": 0.1894737035036087, "policy_sharpness": 9.739771842956543, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6861151456832886, "reward": 0.75, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.75, "reward/p75": 1.0, "reward/var": 0.1894737035036087, "rewards/accuracy_reward": 0.75, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.75, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.1894737035036087, "sentence_fisher_curvature": 329970.90625, "sentence_fisher_curvature/max": 1220608.0, "sentence_fisher_curvature/median": 286720.0, "sentence_fisher_curvature/min": 117.5, "sentence_fisher_curvature/p25": 31864.0, "sentence_fisher_curvature/p75": 526336.0, "sentence_fisher_curvature/p85": 668672.0, "sentence_fisher_curvature/p90": 827392.0, "sentence_fisher_curvature/p95": 989184.0, "sentence_fisher_curvature/p99": 1205043.25, "sentence_fisher_curvature/var": 106535297024.0, "sentence_fisher_kl_divergence": 2.611237277960754e-06, "sentence_fisher_kl_divergence/max": 9.655952453613281e-06, "sentence_fisher_kl_divergence/median": 2.2649765014648438e-06, "sentence_fisher_kl_divergence/min": 9.313225746154785e-10, "sentence_fisher_kl_divergence/p25": 2.5227200239896774e-07, "sentence_fisher_kl_divergence/p75": 4.157423973083496e-06, "sentence_fisher_kl_divergence/p85": 5.282461643218994e-06, "sentence_fisher_kl_divergence/p90": 6.541609764099121e-06, "sentence_fisher_kl_divergence/p95": 7.82310962677002e-06, "sentence_fisher_kl_divergence/p99": 9.54270399233792e-06, "sentence_fisher_kl_divergence/var": 6.671743886521586e-12, "sentence_full_gradient_variance/max_squared_error": 4715.1318359375, "sentence_full_gradient_variance/metric": 4715.1318359375, "sentence_full_gradient_variance/p75": 4715.1318359375, "sentence_full_gradient_variance/p90": 4715.1318359375, "sentence_full_gradient_variance/p95": 4715.1318359375, "sentence_full_gradient_variance/p99": 4715.1318359375, "sentence_full_update_term": 0.014864127151668072, "sentence_full_update_term/max": 0.0615234375, "sentence_full_update_term/median": 0.01177978515625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.002460479736328125, "sentence_full_update_term/p75": 0.023101806640625, "sentence_full_update_term/p85": 0.027801513671875, "sentence_full_update_term/p90": 0.0311279296875, "sentence_full_update_term/p95": 0.04345703125, "sentence_full_update_term/p99": 0.05502931773662567, "sentence_full_update_term/var": 0.0001958772336365655, "sentence_hessian_coeff": 40238.66796875, "sentence_hessian_coeff/max": 966656.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -266240.0, "sentence_hessian_coeff/p25": -126976.0, "sentence_hessian_coeff/p75": 94592.0, "sentence_hessian_coeff/p99": 729293.5625, "sentence_hessian_coeff/var": 55668244480.0, "sentence_hessian_coeff_abs": 159257.34375, "sentence_hessian_coeff_abs/max": 966656.0, "sentence_hessian_coeff_abs/median": 125440.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 8208.0, "sentence_hessian_coeff_abs/p75": 248320.0, "sentence_hessian_coeff_abs/p99": 729293.5625, "sentence_hessian_coeff_abs/var": 31674560512.0, "step": 70, "token_fisher_curvature": 323085.375, "token_fisher_curvature/max": 189792256.0, "token_fisher_curvature/median": 1.7889335846010823e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.029177440169146e-24, "token_fisher_curvature/p75": 3.907985046680551e-13, "token_fisher_curvature/p85": 4.347384674474597e-10, "token_fisher_curvature/p90": 1.4435499906539917e-07, "token_fisher_curvature/p95": 0.003753662109375, "token_fisher_curvature/p99": 667648.0, "token_fisher_curvature/var": 32545817231360.0, "token_fisher_kl_divergence": 2.5576712232577847e-06, "token_fisher_kl_divergence/max": 0.00150299072265625, "token_fisher_kl_divergence/median": 1.4199496293978212e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.6080762719248893e-35, "token_fisher_kl_divergence/p75": 3.0890026000027127e-24, "token_fisher_kl_divergence/p85": 3.441071348220595e-21, "token_fisher_kl_divergence/p90": 1.145188544687814e-18, "token_fisher_kl_divergence/p95": 2.9753977059954195e-14, "token_fisher_kl_divergence/p99": 5.27501106262207e-06, "token_fisher_kl_divergence/var": 2.0397552535911245e-09, "token_full_update_term": 0.0001852380228228867, "token_full_update_term/max": 0.05615234375, "token_full_update_term/median": 8.726755462434442e-23, "token_full_update_term/min": -1.4454126358032227e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.3433698597964394e-14, "token_full_update_term/p85": 1.687538997430238e-12, "token_full_update_term/p90": 5.3432813729159534e-11, "token_full_update_term/p95": 2.0721927285194397e-08, "token_full_update_term/p99": 0.00299835205078125, "token_full_update_term/var": 5.187060651223874e-06, "token_hessian_coeff": 18965.21484375, "token_hessian_coeff/max": 188743680.0, "token_hessian_coeff/median": -1.4424017535930034e-12, "token_hessian_coeff/min": -24510464.0, "token_hessian_coeff/p25": -3.1813979148864746e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.013671875, "token_hessian_coeff/var": 24228988452864.0, "token_hessian_coeff_abs": 339956.125, "token_hessian_coeff_abs/max": 188743680.0, "token_hessian_coeff_abs/median": 3.7107383832335472e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.8477439880371094e-05, "token_hessian_coeff_abs/p99": 7667712.0, "token_hessian_coeff_abs/var": 24113773019136.0 }, { "accuracy_reward": 0.7083333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20877192914485931, "adam_stats/lm_head/lr_effective_max": 1.7915490388986655e-05, "adam_stats/lm_head/lr_effective_mean": -1.4524214296540716e-11, "adam_stats/lm_head/lr_effective_min": -1.6210331523325294e-05, "adam_stats/lm_head/lr_effective_std": 4.308936354391335e-07, "adam_stats/lr_effective_max": 1.8751890820567496e-05, "adam_stats/lr_effective_mean": 5.95893126509317e-11, "adam_stats/lr_effective_min": -1.8720029402174987e-05, "adam_stats/m_t_max": 0.0008633401594124734, "adam_stats/m_t_mean": 8.83863295053633e-12, "adam_stats/m_t_min": -0.0005370216094888747, "adam_stats/v_t_max": 2.5069201001315378e-05, "adam_stats/v_t_mean": 1.721827324815961e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7083333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20877192914485931, "all_logprobs": -0.00880623608827591, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -1.0728836059570312e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -6.103515625e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.013551111333072186, "clip_ratio": 0.0, "completion_length": 492.85418701171875, "completion_length/correct": 431.7205810546875, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 393.0, "completion_length/correct/min": 177.0, "completion_length/correct/p25": 349.5, "completion_length/correct/p75": 429.0, "completion_length/correct/var": 23750.29296875, "completion_length/incorrect": 641.3214721679688, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 513.0, "completion_length/incorrect/min": 379.0, "completion_length/incorrect/p25": 397.75, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 70523.0390625, "completion_length/max": 1024.0, "completion_length/median": 410.0, "completion_length/min": 177.0, "completion_length/p25": 354.0, "completion_length/p75": 550.25, "completion_length/var": 45965.47265625, "curvature_clip_ratio_token_fisher": 0.006847867276519537, "curvature_clip_ratio_token_hessian": 0.004607515875250101, "curvature_clip_ratio_total_fisher": 0.006847867276519537, "curvature_clip_ratio_total_full": 0.006847867276519537, "curvature_clip_ratio_total_hessian": 0.004607515875250101, "epoch": 0.1136, "feature_vector_variance/max_squared_error": 76495.9296875, "feature_vector_variance/metric": 31276.544921875, "generated_tokens/total": 3954066.0, "global_fisher_curvature": 137216.0, "global_fisher_curvature/max": 137216.0, "global_fisher_curvature/median": 137216.0, "global_fisher_curvature/min": 137216.0, "global_fisher_curvature/p25": 137216.0, "global_fisher_curvature/p75": 137216.0, "global_fisher_curvature/p85": 137216.0, "global_fisher_curvature/p90": 137216.0, "global_fisher_curvature/p95": 137216.0, "global_fisher_curvature/p99": 137216.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 9.611248970031738e-07, "global_fisher_kl_divergence/max": 9.611248970031738e-07, "global_fisher_kl_divergence/median": 9.611248970031738e-07, "global_fisher_kl_divergence/min": 9.611248970031738e-07, "global_fisher_kl_divergence/p25": 9.611248970031738e-07, "global_fisher_kl_divergence/p75": 9.611248970031738e-07, "global_fisher_kl_divergence/p85": 9.611248970031738e-07, "global_fisher_kl_divergence/p90": 9.611248970031738e-07, "global_fisher_kl_divergence/p95": 9.611248970031738e-07, "global_fisher_kl_divergence/p99": 9.611248970031738e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.033447265625, "global_full_update_term/max": 0.033447265625, "global_full_update_term/median": 0.033447265625, "global_full_update_term/min": 0.033447265625, "global_full_update_term/p25": 0.033447265625, "global_full_update_term/p75": 0.033447265625, "global_full_update_term/p85": 0.033447265625, "global_full_update_term/p90": 0.033447265625, "global_full_update_term/p95": 0.033447265625, "global_full_update_term/p99": 0.033447265625, "global_full_update_term/var": NaN, "global_hessian_coeff": 15808.0, "global_hessian_coeff/max": 15808.0, "global_hessian_coeff/median": 15808.0, "global_hessian_coeff/min": 15808.0, "global_hessian_coeff/p25": 15808.0, "global_hessian_coeff/p75": 15808.0, "global_hessian_coeff/p99": 15808.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 15808.0, "global_hessian_coeff_abs/max": 15808.0, "global_hessian_coeff_abs/median": 15808.0, "global_hessian_coeff_abs/min": 15808.0, "global_hessian_coeff_abs/p25": 15808.0, "global_hessian_coeff_abs/p75": 15808.0, "global_hessian_coeff_abs/p99": 15808.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.06220652535557747, "learning_rate": 3.525605518250964e-06, "loss": -0.7083, "masked_global_fisher_curvature": 4128.0, "masked_global_fisher_curvature/max": 4128.0, "masked_global_fisher_curvature/median": 4128.0, "masked_global_fisher_curvature/min": 4128.0, "masked_global_fisher_curvature/p25": 4128.0, "masked_global_fisher_curvature/p75": 4128.0, "masked_global_fisher_curvature/p85": 4128.0, "masked_global_fisher_curvature/p90": 4128.0, "masked_global_fisher_curvature/p95": 4128.0, "masked_global_fisher_curvature/p99": 4128.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/max": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/median": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/min": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/p25": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/p75": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/p85": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/p90": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/p95": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/p99": 2.898741513490677e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0030517578125, "masked_global_full_update_term/max": 0.0030517578125, "masked_global_full_update_term/median": 0.0030517578125, "masked_global_full_update_term/min": 0.0030517578125, "masked_global_full_update_term/p25": 0.0030517578125, "masked_global_full_update_term/p75": 0.0030517578125, "masked_global_full_update_term/p85": 0.0030517578125, "masked_global_full_update_term/p90": 0.0030517578125, "masked_global_full_update_term/p95": 0.0030517578125, "masked_global_full_update_term/p99": 0.0030517578125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -11392.0, "masked_global_hessian_coeff/max": -11392.0, "masked_global_hessian_coeff/median": -11392.0, "masked_global_hessian_coeff/min": -11392.0, "masked_global_hessian_coeff/p25": -11392.0, "masked_global_hessian_coeff/p75": -11392.0, "masked_global_hessian_coeff/p99": -11392.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 11392.0, "masked_global_hessian_coeff_abs/max": 11392.0, "masked_global_hessian_coeff_abs/median": 11392.0, "masked_global_hessian_coeff_abs/min": 11392.0, "masked_global_hessian_coeff_abs/p25": 11392.0, "masked_global_hessian_coeff_abs/p75": 11392.0, "masked_global_hessian_coeff_abs/p99": 11392.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 3.6789348125457764, "masked_per_sentence_gradient_norm/max": 23.125, "masked_per_sentence_gradient_norm/median": 1.7109375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 7.15625, "masked_per_sentence_gradient_norm/var": 22.786630630493164, "masked_per_token_gradient_norm": 0.05786101892590523, "masked_per_token_gradient_norm/max": 29.625, "masked_per_token_gradient_norm/median": 3.5349501104064984e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.0838342607021332e-08, "masked_per_token_gradient_norm/var": 0.6260552406311035, "masked_sentence_fisher_curvature": 1993.9091796875, "masked_sentence_fisher_curvature/max": 10240.0, "masked_sentence_fisher_curvature/median": 1560.0, "masked_sentence_fisher_curvature/min": 0.01251220703125, "masked_sentence_fisher_curvature/p25": 717.0, "masked_sentence_fisher_curvature/p75": 2912.0, "masked_sentence_fisher_curvature/p85": 3728.0, "masked_sentence_fisher_curvature/p90": 4208.0, "masked_sentence_fisher_curvature/p95": 5920.0, "masked_sentence_fisher_curvature/p99": 6865.61083984375, "masked_sentence_fisher_curvature/var": 3350903.25, "masked_sentence_fisher_kl_divergence": 1.401553362256891e-08, "masked_sentence_fisher_kl_divergence/max": 7.217749953269958e-08, "masked_sentence_fisher_kl_divergence/median": 1.0943040251731873e-08, "masked_sentence_fisher_kl_divergence/min": 8.79296635503124e-14, "masked_sentence_fisher_kl_divergence/p25": 5.042238626629114e-09, "masked_sentence_fisher_kl_divergence/p75": 2.0489096641540527e-08, "masked_sentence_fisher_kl_divergence/p85": 2.6193447411060333e-08, "masked_sentence_fisher_kl_divergence/p90": 2.9511284083127975e-08, "masked_sentence_fisher_kl_divergence/p95": 4.1676685214042664e-08, "masked_sentence_fisher_kl_divergence/p99": 4.828915223242802e-08, "masked_sentence_fisher_kl_divergence/var": 1.6583528678699912e-16, "masked_sentence_full_gradient_variance/max_squared_error": 34.529144287109375, "masked_sentence_full_gradient_variance/metric": 34.529144287109375, "masked_sentence_full_gradient_variance/p75": 34.529144287109375, "masked_sentence_full_gradient_variance/p90": 34.529144287109375, "masked_sentence_full_gradient_variance/p95": 34.529144287109375, "masked_sentence_full_gradient_variance/p99": 34.529144287109375, "masked_sentence_full_update_term": 0.0010577056091278791, "masked_sentence_full_update_term/max": 0.006988525390625, "masked_sentence_full_update_term/median": 0.0003814697265625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0020904541015625, "masked_sentence_full_update_term/p85": 0.002223968505859375, "masked_sentence_full_update_term/p90": 0.0029144287109375, "masked_sentence_full_update_term/p95": 0.003704071044921875, "masked_sentence_full_update_term/p99": 0.00504608778283, "masked_sentence_full_update_term/var": 1.9861581677105278e-06, "masked_sentence_hessian_coeff": -33446.9296875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -28544.0, "masked_sentence_hessian_coeff/min": -162816.0, "masked_sentence_hessian_coeff/p25": -63360.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 1183715072.0, "masked_sentence_hessian_coeff_abs": 33446.9296875, "masked_sentence_hessian_coeff_abs/max": 162816.0, "masked_sentence_hessian_coeff_abs/median": 28544.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 63360.0, "masked_sentence_hessian_coeff_abs/p99": 132172.90625, "masked_sentence_hessian_coeff_abs/var": 1183715072.0, "masked_token_fisher_curvature": 2081.132568359375, "masked_token_fisher_curvature/max": 1417216.0, "masked_token_fisher_curvature/median": 2.3852447794681098e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 3.567216391634932e-24, "masked_token_fisher_curvature/p75": 3.028688411177427e-13, "masked_token_fisher_curvature/p85": 1.5643308870494366e-10, "masked_token_fisher_curvature/p90": 2.6542693376541138e-08, "masked_token_fisher_curvature/p95": 0.0001847781240940094, "masked_token_fisher_curvature/p99": 8000.0, "masked_token_fisher_curvature/var": 1234517888.0, "masked_token_fisher_kl_divergence": 1.4630744615828917e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.67632942359465e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.501451978549828e-35, "masked_token_fisher_kl_divergence/p75": 2.126112668202704e-24, "masked_token_fisher_kl_divergence/p85": 1.0984958534704208e-21, "masked_token_fisher_kl_divergence/p90": 1.8634724839594607e-19, "masked_token_fisher_kl_divergence/p95": 1.2989826228548829e-15, "masked_token_fisher_kl_divergence/p99": 5.634501576423645e-08, "masked_token_fisher_kl_divergence/var": 6.100462068012527e-14, "masked_token_full_update_term": 1.0637118066370022e-05, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.2665987014770508e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.033895191682177e-15, "masked_token_full_update_term/p85": 2.7355895326763857e-13, "masked_token_full_update_term/p90": 6.849631972727366e-12, "masked_token_full_update_term/p95": 9.204086381942034e-10, "masked_token_full_update_term/p99": 3.411993384361267e-05, "masked_token_full_update_term/var": 2.1830745211559588e-08, "masked_token_hessian_coeff": -35641.1484375, "masked_token_hessian_coeff/max": 1064.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -13041664.0, "masked_token_hessian_coeff/p25": -3.7439167499542236e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.005947113037109375, "masked_token_hessian_coeff/var": 239162441728.0, "masked_token_hessian_coeff_abs": 35641.2421875, "masked_token_hessian_coeff_abs/max": 13041664.0, "masked_token_hessian_coeff_abs/median": 6.230038707144558e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 4.947185516357422e-06, "masked_token_hessian_coeff_abs/p99": 59136.0, "masked_token_hessian_coeff_abs/var": 239162425344.0, "mean_logprobs": -0.00836181640625, "mean_logprobs/var": 2.6106834411621094e-05, "num_completions/total": 6816, "per_sentence_gradient_norm": 39.552085876464844, "per_sentence_gradient_norm/max": 210.0, "per_sentence_gradient_norm/median": 35.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 60.875, "per_sentence_gradient_norm/var": 1459.02783203125, "per_token_feature_norm": 190.43174743652344, "per_token_feature_norm/max": 290.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 102.0, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 142.21218872070312, "per_token_gradient_norm": 0.7815237641334534, "per_token_gradient_norm/max": 290.0, "per_token_gradient_norm/median": 4.3520742565306136e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.4796463549137115e-08, "per_token_gradient_norm/var": 106.70511627197266, "per_token_policy_error_norm": 0.0051911333575844765, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004984017927199602, "policy_entropy": 0.009006836451590061, "policy_entropy/max": 1.6015625, "policy_entropy/median": 7.203198038041592e-10, "policy_entropy/min": 1.1117307432712692e-20, "policy_entropy/p25": 1.6413537196058314e-12, "policy_entropy/p75": 1.2479722499847412e-07, "policy_entropy/var": 0.004960998892784119, "policy_loss": -0.7083333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20877192914485931, "policy_sharpness": 9.751312255859375, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6340453624725342, "reward": 0.7083333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20877192914485931, "rewards/accuracy_reward": 0.7083333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20877192914485931, "sentence_fisher_curvature": 297596.0625, "sentence_fisher_curvature/max": 1327104.0, "sentence_fisher_curvature/median": 219136.0, "sentence_fisher_curvature/min": 56.5, "sentence_fisher_curvature/p25": 6048.0, "sentence_fisher_curvature/p75": 467968.0, "sentence_fisher_curvature/p85": 637952.0, "sentence_fisher_curvature/p90": 815104.0, "sentence_fisher_curvature/p95": 887808.0, "sentence_fisher_curvature/p99": 1109197.5, "sentence_fisher_curvature/var": 95395414016.0, "sentence_fisher_kl_divergence": 2.0923962438246235e-06, "sentence_fisher_kl_divergence/max": 9.357929229736328e-06, "sentence_fisher_kl_divergence/median": 1.5422701835632324e-06, "sentence_fisher_kl_divergence/min": 3.965396899729967e-10, "sentence_fisher_kl_divergence/p25": 4.260800778865814e-08, "sentence_fisher_kl_divergence/p75": 3.285706043243408e-06, "sentence_fisher_kl_divergence/p85": 4.492700099945068e-06, "sentence_fisher_kl_divergence/p90": 5.7220458984375e-06, "sentence_fisher_kl_divergence/p95": 6.251037120819092e-06, "sentence_fisher_kl_divergence/p99": 7.772450771881267e-06, "sentence_fisher_kl_divergence/var": 4.717587431812831e-12, "sentence_full_gradient_variance/max_squared_error": 2955.697021484375, "sentence_full_gradient_variance/metric": 2955.697021484375, "sentence_full_gradient_variance/p75": 2955.697021484375, "sentence_full_gradient_variance/p90": 2955.697021484375, "sentence_full_gradient_variance/p95": 2955.697021484375, "sentence_full_gradient_variance/p99": 2955.697021484375, "sentence_full_update_term": 0.011204719543457031, "sentence_full_update_term/max": 0.053955078125, "sentence_full_update_term/median": 0.00921630859375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.016937255859375, "sentence_full_update_term/p85": 0.02105712890625, "sentence_full_update_term/p90": 0.022705078125, "sentence_full_update_term/p95": 0.0328369140625, "sentence_full_update_term/p99": 0.04421389847993851, "sentence_full_update_term/var": 0.00012200040509924293, "sentence_hessian_coeff": 22741.5, "sentence_hessian_coeff/max": 552960.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -286720.0, "sentence_hessian_coeff/p25": -95488.0, "sentence_hessian_coeff/p75": 40192.0, "sentence_hessian_coeff/p99": 549068.8125, "sentence_hessian_coeff/var": 31349839872.0, "sentence_hessian_coeff_abs": 116573.171875, "sentence_hessian_coeff_abs/max": 552960.0, "sentence_hessian_coeff_abs/median": 57344.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 169472.0, "sentence_hessian_coeff_abs/p99": 549068.8125, "sentence_hessian_coeff_abs/var": 18140110848.0, "step": 71, "token_fisher_curvature": 275131.75, "token_fisher_curvature/max": 195035136.0, "token_fisher_curvature/median": 2.7511630126819675e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 3.903258515484599e-24, "token_fisher_curvature/p75": 4.050093593832571e-13, "token_fisher_curvature/p85": 2.4374458007514477e-10, "token_fisher_curvature/p90": 7.189737516455352e-08, "token_fisher_curvature/p95": 0.0011573433876037598, "token_fisher_curvature/p99": 175996.0, "token_fisher_curvature/var": 23700950745088.0, "token_fisher_kl_divergence": 1.9336882814968703e-06, "token_fisher_kl_divergence/max": 0.001373291015625, "token_fisher_kl_divergence/median": 1.932709217791479e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.7459548035208636e-35, "token_fisher_kl_divergence/p75": 2.8434333556510326e-24, "token_fisher_kl_divergence/p85": 1.707300784309449e-21, "token_fisher_kl_divergence/p90": 5.061815953232495e-19, "token_fisher_kl_divergence/p95": 8.143225677104127e-15, "token_fisher_kl_divergence/p99": 1.2358359526842833e-06, "token_fisher_kl_divergence/var": 1.1710336034198576e-09, "token_full_update_term": 0.00014813782763667405, "token_full_update_term/max": 0.0537109375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.2665987014770508e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.4085954624931674e-15, "token_full_update_term/p85": 3.765876499528531e-13, "token_full_update_term/p90": 9.663381206337363e-12, "token_full_update_term/p95": 2.542151378293056e-09, "token_full_update_term/p99": 0.0011444091796875, "token_full_update_term/var": 3.891716460202588e-06, "token_hessian_coeff": 12094.841796875, "token_hessian_coeff/max": 193986560.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -24903680.0, "token_hessian_coeff/p25": -4.377216100692749e-07, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.01324462890625, "token_hessian_coeff/var": 14206796562432.0, "token_hessian_coeff_abs": 234962.40625, "token_hessian_coeff_abs/max": 193986560.0, "token_hessian_coeff_abs/median": 7.321432349272072e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 6.318092346191406e-06, "token_hessian_coeff_abs/p99": 3829632.0, "token_hessian_coeff_abs/var": 14151734788096.0 }, { "accuracy_reward": 0.5, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 0.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2526315748691559, "adam_stats/lm_head/lr_effective_max": 1.5857200196478516e-05, "adam_stats/lm_head/lr_effective_mean": -2.6762458568696523e-11, "adam_stats/lm_head/lr_effective_min": -1.668915501795709e-05, "adam_stats/lm_head/lr_effective_std": 4.037333383166697e-07, "adam_stats/lr_effective_max": 1.7197093256982043e-05, "adam_stats/lr_effective_mean": 5.4270532512390446e-11, "adam_stats/lr_effective_min": -1.757414429448545e-05, "adam_stats/m_t_max": 0.0008004665141925216, "adam_stats/m_t_mean": 8.33151256318665e-12, "adam_stats/m_t_min": -0.0005186836933717132, "adam_stats/v_t_max": 2.5045192160177976e-05, "adam_stats/v_t_mean": 1.7209046687671758e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5, "advantages/max": 1.0, "advantages/median": 0.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2526315748691559, "all_logprobs": -0.008241240866482258, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.25, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -3.5762786865234375e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -5.817413330078125e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012241596356034279, "clip_ratio": 0.0, "completion_length": 659.2396240234375, "completion_length/correct": 438.8333435058594, "completion_length/correct/max": 1017.0, "completion_length/correct/median": 346.0, "completion_length/correct/min": 240.0, "completion_length/correct/p25": 259.75, "completion_length/correct/p75": 584.0, "completion_length/correct/var": 42010.39453125, "completion_length/incorrect": 879.6458740234375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 259.0, "completion_length/incorrect/p25": 844.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 65609.171875, "completion_length/max": 1024.0, "completion_length/median": 614.0, "completion_length/min": 240.0, "completion_length/p25": 312.5, "completion_length/p75": 1024.0, "completion_length/var": 102333.640625, "curvature_clip_ratio_token_fisher": 0.003097002627328038, "curvature_clip_ratio_token_hessian": 0.0018961239838972688, "curvature_clip_ratio_total_fisher": 0.003097002627328038, "curvature_clip_ratio_total_full": 0.003097002627328038, "curvature_clip_ratio_total_hessian": 0.0018961239838972688, "epoch": 0.1152, "feature_vector_variance/max_squared_error": 65125.38671875, "feature_vector_variance/metric": 30782.064453125, "generated_tokens/total": 4017353.0, "global_fisher_curvature": 93696.0, "global_fisher_curvature/max": 93696.0, "global_fisher_curvature/median": 93696.0, "global_fisher_curvature/min": 93696.0, "global_fisher_curvature/p25": 93696.0, "global_fisher_curvature/p75": 93696.0, "global_fisher_curvature/p85": 93696.0, "global_fisher_curvature/p90": 93696.0, "global_fisher_curvature/p95": 93696.0, "global_fisher_curvature/p99": 93696.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.811452865600586e-07, "global_fisher_kl_divergence/max": 5.811452865600586e-07, "global_fisher_kl_divergence/median": 5.811452865600586e-07, "global_fisher_kl_divergence/min": 5.811452865600586e-07, "global_fisher_kl_divergence/p25": 5.811452865600586e-07, "global_fisher_kl_divergence/p75": 5.811452865600586e-07, "global_fisher_kl_divergence/p85": 5.811452865600586e-07, "global_fisher_kl_divergence/p90": 5.811452865600586e-07, "global_fisher_kl_divergence/p95": 5.811452865600586e-07, "global_fisher_kl_divergence/p99": 5.811452865600586e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.010986328125, "global_full_update_term/max": 0.010986328125, "global_full_update_term/median": 0.010986328125, "global_full_update_term/min": 0.010986328125, "global_full_update_term/p25": 0.010986328125, "global_full_update_term/p75": 0.010986328125, "global_full_update_term/p85": 0.010986328125, "global_full_update_term/p90": 0.010986328125, "global_full_update_term/p95": 0.010986328125, "global_full_update_term/p99": 0.010986328125, "global_full_update_term/var": NaN, "global_hessian_coeff": 17792.0, "global_hessian_coeff/max": 17792.0, "global_hessian_coeff/median": 17792.0, "global_hessian_coeff/min": 17792.0, "global_hessian_coeff/p25": 17792.0, "global_hessian_coeff/p75": 17792.0, "global_hessian_coeff/p99": 17792.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 17792.0, "global_hessian_coeff_abs/max": 17792.0, "global_hessian_coeff_abs/median": 17792.0, "global_hessian_coeff_abs/min": 17792.0, "global_hessian_coeff_abs/p25": 17792.0, "global_hessian_coeff_abs/p75": 17792.0, "global_hessian_coeff_abs/p99": 17792.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.08569902926683426, "learning_rate": 3.3060532239694e-06, "loss": -0.5, "masked_global_fisher_curvature": 1800.0, "masked_global_fisher_curvature/max": 1800.0, "masked_global_fisher_curvature/median": 1800.0, "masked_global_fisher_curvature/min": 1800.0, "masked_global_fisher_curvature/p25": 1800.0, "masked_global_fisher_curvature/p75": 1800.0, "masked_global_fisher_curvature/p85": 1800.0, "masked_global_fisher_curvature/p90": 1800.0, "masked_global_fisher_curvature/p95": 1800.0, "masked_global_fisher_curvature/p99": 1800.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/max": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/median": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/min": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/p25": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/p75": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/p85": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/p90": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/p95": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/p99": 1.1175870895385742e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00150299072265625, "masked_global_full_update_term/max": 0.00150299072265625, "masked_global_full_update_term/median": 0.00150299072265625, "masked_global_full_update_term/min": 0.00150299072265625, "masked_global_full_update_term/p25": 0.00150299072265625, "masked_global_full_update_term/p75": 0.00150299072265625, "masked_global_full_update_term/p85": 0.00150299072265625, "masked_global_full_update_term/p90": 0.00150299072265625, "masked_global_full_update_term/p95": 0.00150299072265625, "masked_global_full_update_term/p99": 0.00150299072265625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -10496.0, "masked_global_hessian_coeff/max": -10496.0, "masked_global_hessian_coeff/median": -10496.0, "masked_global_hessian_coeff/min": -10496.0, "masked_global_hessian_coeff/p25": -10496.0, "masked_global_hessian_coeff/p75": -10496.0, "masked_global_hessian_coeff/p99": -10496.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 10496.0, "masked_global_hessian_coeff_abs/max": 10496.0, "masked_global_hessian_coeff_abs/median": 10496.0, "masked_global_hessian_coeff_abs/min": 10496.0, "masked_global_hessian_coeff_abs/p25": 10496.0, "masked_global_hessian_coeff_abs/p75": 10496.0, "masked_global_hessian_coeff_abs/p99": 10496.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 3.0254721641540527, "masked_per_sentence_gradient_norm/max": 12.5625, "masked_per_sentence_gradient_norm/median": 0.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 4.125, "masked_per_sentence_gradient_norm/var": 16.576101303100586, "masked_per_token_gradient_norm": 0.03676395118236542, "masked_per_token_gradient_norm/max": 29.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.8332002582610585e-12, "masked_per_token_gradient_norm/var": 0.4425015449523926, "masked_sentence_fisher_curvature": 2160.9375, "masked_sentence_fisher_curvature/max": 8192.0, "masked_sentence_fisher_curvature/median": 1720.0, "masked_sentence_fisher_curvature/min": 204.0, "masked_sentence_fisher_curvature/p25": 789.0, "masked_sentence_fisher_curvature/p75": 2872.0, "masked_sentence_fisher_curvature/p85": 3584.0, "masked_sentence_fisher_curvature/p90": 3784.0, "masked_sentence_fisher_curvature/p95": 6576.0, "masked_sentence_fisher_curvature/p99": 7067.20361328125, "masked_sentence_fisher_curvature/var": 3151737.0, "masked_sentence_fisher_kl_divergence": 1.3434298118397692e-08, "masked_sentence_fisher_kl_divergence/max": 5.098991096019745e-08, "masked_sentence_fisher_kl_divergence/median": 1.0710209608078003e-08, "masked_sentence_fisher_kl_divergence/min": 1.2660166248679161e-09, "masked_sentence_fisher_kl_divergence/p25": 4.9112713895738125e-09, "masked_sentence_fisher_kl_divergence/p75": 1.7869751900434494e-08, "masked_sentence_fisher_kl_divergence/p85": 2.232263796031475e-08, "masked_sentence_fisher_kl_divergence/p90": 2.3515895009040833e-08, "masked_sentence_fisher_kl_divergence/p95": 4.086177796125412e-08, "masked_sentence_fisher_kl_divergence/p99": 4.3911882130487356e-08, "masked_sentence_fisher_kl_divergence/var": 1.218909792554296e-16, "masked_sentence_full_gradient_variance/max_squared_error": 23.970975875854492, "masked_sentence_full_gradient_variance/metric": 23.970975875854492, "masked_sentence_full_gradient_variance/p75": 23.970975875854492, "masked_sentence_full_gradient_variance/p90": 23.970975875854492, "masked_sentence_full_gradient_variance/p95": 23.970975875854492, "masked_sentence_full_gradient_variance/p99": 23.970975875854492, "masked_sentence_full_update_term": 0.0007373392581939697, "masked_sentence_full_update_term/max": 0.0029754638671875, "masked_sentence_full_update_term/median": 0.0, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0009679794311523438, "masked_sentence_full_update_term/p85": 0.002410888671875, "masked_sentence_full_update_term/p90": 0.002410888671875, "masked_sentence_full_update_term/p95": 0.00260162353515625, "masked_sentence_full_update_term/p99": 0.0028015142306685448, "masked_sentence_full_update_term/var": 9.367899451717676e-07, "masked_sentence_hessian_coeff": -27323.333984375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -14592.0, "masked_sentence_hessian_coeff/min": -111104.0, "masked_sentence_hessian_coeff/p25": -56064.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 1043938112.0, "masked_sentence_hessian_coeff_abs": 27323.333984375, "masked_sentence_hessian_coeff_abs/max": 111104.0, "masked_sentence_hessian_coeff_abs/median": 0.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 56064.0, "masked_sentence_hessian_coeff_abs/p99": 102835.2265625, "masked_sentence_hessian_coeff_abs/var": 1043938112.0, "masked_token_fisher_curvature": 2042.161376953125, "masked_token_fisher_curvature/max": 1597440.0, "masked_token_fisher_curvature/median": 1.3129010682441655e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 5.299125799167834e-25, "masked_token_fisher_curvature/p75": 3.375077994860476e-14, "masked_token_fisher_curvature/p85": 2.2509993868879974e-11, "masked_token_fisher_curvature/p90": 3.1141098588705063e-09, "masked_token_fisher_curvature/p95": 7.82012939453125e-05, "masked_token_fisher_curvature/p99": 4768.0, "masked_token_fisher_curvature/var": 1252400512.0, "masked_token_fisher_kl_divergence": 1.2694831852400057e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 8.135128085091684e-31, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.291384182302405e-36, "masked_token_fisher_kl_divergence/p75": 2.100263274060422e-25, "masked_token_fisher_kl_divergence/p85": 1.3979352352146168e-22, "masked_token_fisher_kl_divergence/p90": 1.937587866844212e-20, "masked_token_fisher_kl_divergence/p95": 4.85722573273506e-16, "masked_token_fisher_kl_divergence/p99": 2.9685907065868378e-08, "masked_token_fisher_kl_divergence/var": 4.8380356930952453e-14, "masked_token_full_update_term": 6.363669399434002e-06, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.1175870895385742e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 0.0, "masked_token_full_update_term/p85": 9.128982292327947e-17, "masked_token_full_update_term/p90": 9.492406860545088e-15, "masked_token_full_update_term/p95": 2.9558577807620168e-12, "masked_token_full_update_term/p99": 1.3352837413549423e-06, "masked_token_full_update_term/var": 1.3261285403132206e-08, "masked_token_hessian_coeff": -22636.787109375, "masked_token_hessian_coeff/max": 668.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -14417920.0, "masked_token_hessian_coeff/p25": -7.815970093361102e-14, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.0004024505615234375, "masked_token_hessian_coeff/var": 161573552128.0, "masked_token_hessian_coeff_abs": 22636.853515625, "masked_token_hessian_coeff_abs/max": 14417920.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.837623469531536e-10, "masked_token_hessian_coeff_abs/p99": 1452.3125, "masked_token_hessian_coeff_abs/var": 161573552128.0, "mean_logprobs": -0.0081787109375, "mean_logprobs/var": 3.2901763916015625e-05, "num_completions/total": 6912, "per_sentence_gradient_norm": 28.671875, "per_sentence_gradient_norm/max": 165.0, "per_sentence_gradient_norm/median": 0.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 49.125, "per_sentence_gradient_norm/var": 1481.6458740234375, "per_token_feature_norm": 190.2211456298828, "per_token_feature_norm/max": 255.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 106.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 140.93710327148438, "per_token_gradient_norm": 0.3545885980129242, "per_token_gradient_norm/max": 284.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.3163693185779266e-12, "per_token_gradient_norm/var": 48.43502426147461, "per_token_policy_error_norm": 0.004966277163475752, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004786770790815353, "policy_entropy": 0.008433857932686806, "policy_entropy/max": 2.046875, "policy_entropy/median": 2.191882231272757e-10, "policy_entropy/min": 5.590417451878382e-20, "policy_entropy/p25": 6.608047442568932e-13, "policy_entropy/p75": 5.029141902923584e-08, "policy_entropy/var": 0.004302043933421373, "policy_loss": -0.5, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2526315748691559, "policy_sharpness": 9.752978324890137, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6049309968948364, "reward": 0.5, "reward/max": 1.0, "reward/median": 0.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2526315748691559, "rewards/accuracy_reward": 0.5, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 0.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2526315748691559, "sentence_fisher_curvature": 189648.65625, "sentence_fisher_curvature/max": 1089536.0, "sentence_fisher_curvature/median": 8704.0, "sentence_fisher_curvature/min": 268.0, "sentence_fisher_curvature/p25": 1550.0, "sentence_fisher_curvature/p75": 262144.0, "sentence_fisher_curvature/p85": 466944.0, "sentence_fisher_curvature/p90": 634880.0, "sentence_fisher_curvature/p95": 681984.0, "sentence_fisher_curvature/p99": 933888.5, "sentence_fisher_curvature/var": 64205406208.0, "sentence_fisher_kl_divergence": 1.1793582643804257e-06, "sentence_fisher_kl_divergence/max": 6.765127182006836e-06, "sentence_fisher_kl_divergence/median": 5.4016709327697754e-08, "sentence_fisher_kl_divergence/min": 1.6661942936480045e-09, "sentence_fisher_kl_divergence/p25": 9.633367881178856e-09, "sentence_fisher_kl_divergence/p75": 1.6316771507263184e-06, "sentence_fisher_kl_divergence/p85": 2.905726432800293e-06, "sentence_fisher_kl_divergence/p90": 3.948807716369629e-06, "sentence_fisher_kl_divergence/p95": 4.246830940246582e-06, "sentence_fisher_kl_divergence/p99": 5.802515261166263e-06, "sentence_fisher_kl_divergence/var": 2.4828474779609033e-12, "sentence_full_gradient_variance/max_squared_error": 2243.03857421875, "sentence_full_gradient_variance/metric": 2243.03857421875, "sentence_full_gradient_variance/p75": 2243.03857421875, "sentence_full_gradient_variance/p90": 2243.03857421875, "sentence_full_gradient_variance/p95": 2243.03857421875, "sentence_full_gradient_variance/p99": 2243.03857421875, "sentence_full_update_term": 0.007013003341853619, "sentence_full_update_term/max": 0.03515625, "sentence_full_update_term/median": 0.0, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.012298583984375, "sentence_full_update_term/p85": 0.0155792236328125, "sentence_full_update_term/p90": 0.0201416015625, "sentence_full_update_term/p95": 0.027587890625, "sentence_full_update_term/p99": 0.029589861631393433, "sentence_full_update_term/var": 7.858981552999467e-05, "sentence_hessian_coeff": 12929.5, "sentence_hessian_coeff/max": 749568.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -378880.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 19372.0, "sentence_hessian_coeff/p99": 749568.0, "sentence_hessian_coeff/var": 32054687744.0, "sentence_hessian_coeff_abs": 94161.5, "sentence_hessian_coeff_abs/max": 749568.0, "sentence_hessian_coeff_abs/median": 0.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 122880.0, "sentence_hessian_coeff_abs/p99": 749568.0, "sentence_hessian_coeff_abs/var": 23263899648.0, "step": 72, "token_fisher_curvature": 126004.0546875, "token_fisher_curvature/max": 182452224.0, "token_fisher_curvature/median": 1.4484263398048536e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.62224322594636e-25, "token_fisher_curvature/p75": 3.930189507173054e-14, "token_fisher_curvature/p85": 2.8762769943568856e-11, "token_fisher_curvature/p90": 4.0745362639427185e-09, "token_fisher_curvature/p95": 0.0001807175576686859, "token_fisher_curvature/p99": 23130.0, "token_fisher_curvature/var": 11201909096448.0, "token_fisher_kl_divergence": 7.830969366295903e-07, "token_fisher_kl_divergence/max": 0.00113677978515625, "token_fisher_kl_divergence/median": 8.997944700177166e-31, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.502973165450417e-36, "token_fisher_kl_divergence/p75": 2.4395365721778746e-25, "token_fisher_kl_divergence/p85": 1.7867101231145398e-22, "token_fisher_kl_divergence/p90": 2.5305109299222223e-20, "token_fisher_kl_divergence/p95": 1.1226913496087398e-15, "token_fisher_kl_divergence/p99": 1.439475454390049e-07, "token_fisher_kl_divergence/var": 4.324498537044974e-10, "token_full_update_term": 6.331505574053153e-05, "token_full_update_term/max": 0.048828125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.1175870895385742e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 0.0, "token_full_update_term/p85": 1.1362438767648086e-16, "token_full_update_term/p90": 1.1712852909795402e-14, "token_full_update_term/p95": 4.121147867408581e-12, "token_full_update_term/p99": 1.049041748046875e-05, "token_full_update_term/var": 1.5662286614315235e-06, "token_hessian_coeff": 5232.2958984375, "token_hessian_coeff/max": 178257920.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -24117248.0, "token_hessian_coeff/p25": -1.0169642905566434e-13, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.000621795654296875, "token_hessian_coeff/var": 6717193060352.0, "token_hessian_coeff_abs": 113026.046875, "token_hessian_coeff_abs/max": 178257920.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.1377567211166024e-10, "token_hessian_coeff_abs/p99": 11840.0, "token_hessian_coeff_abs/var": 6704446046208.0 }, { "accuracy_reward": 0.5416666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2508772015571594, "adam_stats/lm_head/lr_effective_max": 1.4414193174161483e-05, "adam_stats/lm_head/lr_effective_mean": -7.648440908392118e-12, "adam_stats/lm_head/lr_effective_min": -1.504622650827514e-05, "adam_stats/lm_head/lr_effective_std": 3.706111044721183e-07, "adam_stats/lr_effective_max": 1.614463144505862e-05, "adam_stats/lr_effective_mean": 2.8531147930332956e-11, "adam_stats/lr_effective_min": -1.5815587175893597e-05, "adam_stats/m_t_max": 0.000828757300041616, "adam_stats/m_t_mean": 7.857511516440319e-12, "adam_stats/m_t_min": -0.0005240362952463329, "adam_stats/v_t_max": 2.5020270186359994e-05, "adam_stats/v_t_mean": 1.7199502455947369e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5416666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2508772015571594, "all_logprobs": -0.00762057164683938, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.0, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -3.5762786865234375e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -2.1696090698242188e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01057741791009903, "clip_ratio": 0.0, "completion_length": 604.8646240234375, "completion_length/correct": 456.0000305175781, "completion_length/correct/max": 705.0, "completion_length/correct/median": 457.0, "completion_length/correct/min": 206.0, "completion_length/correct/p25": 385.75, "completion_length/correct/p75": 539.0, "completion_length/correct/var": 16466.234375, "completion_length/incorrect": 780.7954711914062, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 841.0, "completion_length/incorrect/min": 285.0, "completion_length/incorrect/p25": 593.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 76856.171875, "completion_length/max": 1024.0, "completion_length/median": 523.0, "completion_length/min": 206.0, "completion_length/p25": 386.0, "completion_length/p75": 810.75, "completion_length/var": 70092.859375, "curvature_clip_ratio_token_fisher": 0.004322592634707689, "curvature_clip_ratio_token_hessian": 0.002755437744781375, "curvature_clip_ratio_total_fisher": 0.004322592634707689, "curvature_clip_ratio_total_full": 0.004322592634707689, "curvature_clip_ratio_total_hessian": 0.002755437744781375, "epoch": 0.1168, "feature_vector_variance/max_squared_error": 58480.3046875, "feature_vector_variance/metric": 30837.09765625, "generated_tokens/total": 4075420.0, "global_fisher_curvature": 102912.0, "global_fisher_curvature/max": 102912.0, "global_fisher_curvature/median": 102912.0, "global_fisher_curvature/min": 102912.0, "global_fisher_curvature/p25": 102912.0, "global_fisher_curvature/p75": 102912.0, "global_fisher_curvature/p85": 102912.0, "global_fisher_curvature/p90": 102912.0, "global_fisher_curvature/p95": 102912.0, "global_fisher_curvature/p99": 102912.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.62518835067749e-07, "global_fisher_kl_divergence/max": 5.62518835067749e-07, "global_fisher_kl_divergence/median": 5.62518835067749e-07, "global_fisher_kl_divergence/min": 5.62518835067749e-07, "global_fisher_kl_divergence/p25": 5.62518835067749e-07, "global_fisher_kl_divergence/p75": 5.62518835067749e-07, "global_fisher_kl_divergence/p85": 5.62518835067749e-07, "global_fisher_kl_divergence/p90": 5.62518835067749e-07, "global_fisher_kl_divergence/p95": 5.62518835067749e-07, "global_fisher_kl_divergence/p99": 5.62518835067749e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.02734375, "global_full_update_term/max": 0.02734375, "global_full_update_term/median": 0.02734375, "global_full_update_term/min": 0.02734375, "global_full_update_term/p25": 0.02734375, "global_full_update_term/p75": 0.02734375, "global_full_update_term/p85": 0.02734375, "global_full_update_term/p90": 0.02734375, "global_full_update_term/p95": 0.02734375, "global_full_update_term/p99": 0.02734375, "global_full_update_term/var": NaN, "global_hessian_coeff": 33024.0, "global_hessian_coeff/max": 33024.0, "global_hessian_coeff/median": 33024.0, "global_hessian_coeff/min": 33024.0, "global_hessian_coeff/p25": 33024.0, "global_hessian_coeff/p75": 33024.0, "global_hessian_coeff/p99": 33024.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 33024.0, "global_hessian_coeff_abs/max": 33024.0, "global_hessian_coeff_abs/median": 33024.0, "global_hessian_coeff_abs/min": 33024.0, "global_hessian_coeff_abs/p25": 33024.0, "global_hessian_coeff_abs/p75": 33024.0, "global_hessian_coeff_abs/p99": 33024.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.08195909857749939, "learning_rate": 3.0916106078064522e-06, "loss": -0.5417, "masked_global_fisher_curvature": 868.0, "masked_global_fisher_curvature/max": 868.0, "masked_global_fisher_curvature/median": 868.0, "masked_global_fisher_curvature/min": 868.0, "masked_global_fisher_curvature/p25": 868.0, "masked_global_fisher_curvature/p75": 868.0, "masked_global_fisher_curvature/p85": 868.0, "masked_global_fisher_curvature/p90": 868.0, "masked_global_fisher_curvature/p95": 868.0, "masked_global_fisher_curvature/p99": 868.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/max": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/median": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/min": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/p25": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/p75": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/p85": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/p90": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/p95": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/p99": 4.743924364447594e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0035247802734375, "masked_global_full_update_term/max": 0.0035247802734375, "masked_global_full_update_term/median": 0.0035247802734375, "masked_global_full_update_term/min": 0.0035247802734375, "masked_global_full_update_term/p25": 0.0035247802734375, "masked_global_full_update_term/p75": 0.0035247802734375, "masked_global_full_update_term/p85": 0.0035247802734375, "masked_global_full_update_term/p90": 0.0035247802734375, "masked_global_full_update_term/p95": 0.0035247802734375, "masked_global_full_update_term/p99": 0.0035247802734375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -6112.0, "masked_global_hessian_coeff/max": -6112.0, "masked_global_hessian_coeff/median": -6112.0, "masked_global_hessian_coeff/min": -6112.0, "masked_global_hessian_coeff/p25": -6112.0, "masked_global_hessian_coeff/p75": -6112.0, "masked_global_hessian_coeff/p99": -6112.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 6112.0, "masked_global_hessian_coeff_abs/max": 6112.0, "masked_global_hessian_coeff_abs/median": 6112.0, "masked_global_hessian_coeff_abs/min": 6112.0, "masked_global_hessian_coeff_abs/p25": 6112.0, "masked_global_hessian_coeff_abs/p75": 6112.0, "masked_global_hessian_coeff_abs/p99": 6112.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 3.2865397930145264, "masked_per_sentence_gradient_norm/max": 29.125, "masked_per_sentence_gradient_norm/median": 0.38671875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 4.375, "masked_per_sentence_gradient_norm/var": 27.609596252441406, "masked_per_token_gradient_norm": 0.03953636437654495, "masked_per_token_gradient_norm/max": 27.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 6.45741238258779e-11, "masked_per_token_gradient_norm/var": 0.5009949803352356, "masked_sentence_fisher_curvature": 1991.4281005859375, "masked_sentence_fisher_curvature/max": 11328.0, "masked_sentence_fisher_curvature/median": 1192.0, "masked_sentence_fisher_curvature/min": 2.234375, "masked_sentence_fisher_curvature/p25": 453.5, "masked_sentence_fisher_curvature/p75": 2852.0, "masked_sentence_fisher_curvature/p85": 3752.0, "masked_sentence_fisher_curvature/p90": 4672.0, "masked_sentence_fisher_curvature/p95": 5568.0, "masked_sentence_fisher_curvature/p99": 9017.607421875, "masked_sentence_fisher_curvature/var": 4658969.5, "masked_sentence_fisher_kl_divergence": 1.0884798840038457e-08, "masked_sentence_fisher_kl_divergence/max": 6.193295121192932e-08, "masked_sentence_fisher_kl_divergence/median": 6.51925802230835e-09, "masked_sentence_fisher_kl_divergence/min": 1.2221335055073723e-11, "masked_sentence_fisher_kl_divergence/p25": 2.4774635676294565e-09, "masked_sentence_fisher_kl_divergence/p75": 1.5599653124809265e-08, "masked_sentence_fisher_kl_divergence/p85": 2.0547304302453995e-08, "masked_sentence_fisher_kl_divergence/p90": 2.5553163141012192e-08, "masked_sentence_fisher_kl_divergence/p95": 3.050081431865692e-08, "masked_sentence_fisher_kl_divergence/p99": 4.932521235900822e-08, "masked_sentence_fisher_kl_divergence/var": 1.3936402487764454e-16, "masked_sentence_full_gradient_variance/max_squared_error": 37.01396560668945, "masked_sentence_full_gradient_variance/metric": 37.01396560668945, "masked_sentence_full_gradient_variance/p75": 37.01396560668945, "masked_sentence_full_gradient_variance/p90": 37.01396560668945, "masked_sentence_full_gradient_variance/p95": 37.01396560668945, "masked_sentence_full_gradient_variance/p99": 37.01396560668945, "masked_sentence_full_update_term": 0.0007753521203994751, "masked_sentence_full_update_term/max": 0.005859375, "masked_sentence_full_update_term/median": 5.412101745605469e-05, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00116729736328125, "masked_sentence_full_update_term/p85": 0.00139617919921875, "masked_sentence_full_update_term/p90": 0.002033233642578125, "masked_sentence_full_update_term/p95": 0.00342559814453125, "masked_sentence_full_update_term/p99": 0.004525761120021343, "masked_sentence_full_update_term/var": 1.3183259852667106e-06, "masked_sentence_hessian_coeff": -26569.41796875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -4608.0, "masked_sentence_hessian_coeff/min": -137216.0, "masked_sentence_hessian_coeff/p25": -46848.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 1067570432.0, "masked_sentence_hessian_coeff_abs": 26569.41796875, "masked_sentence_hessian_coeff_abs/max": 137216.0, "masked_sentence_hessian_coeff_abs/median": 1064.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 46848.0, "masked_sentence_hessian_coeff_abs/p99": 99763.3203125, "masked_sentence_hessian_coeff_abs/var": 1067570432.0, "masked_token_fisher_curvature": 1935.134033203125, "masked_token_fisher_curvature/max": 1826816.0, "masked_token_fisher_curvature/median": 2.490276864927643e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 6.23616633682556e-25, "masked_token_fisher_curvature/p75": 1.7291723608536813e-14, "masked_token_fisher_curvature/p85": 9.777068044058979e-12, "masked_token_fisher_curvature/p90": 1.2514647096395493e-09, "masked_token_fisher_curvature/p95": 1.3768672943115234e-05, "masked_token_fisher_curvature/p99": 2811.25, "masked_token_fisher_curvature/var": 1599505664.0, "masked_token_fisher_kl_divergence": 1.0570492037231816e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.3620176566706532e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.408933617384634e-36, "masked_token_fisher_kl_divergence/p75": 9.451184733271898e-26, "masked_token_fisher_kl_divergence/p85": 5.335314950967029e-23, "masked_token_fisher_kl_divergence/p90": 6.829203137237796e-21, "masked_token_fisher_kl_divergence/p95": 7.546047120499111e-17, "masked_token_fisher_kl_divergence/p99": 1.534954208182171e-08, "masked_token_fisher_kl_divergence/var": 4.770763319984772e-14, "masked_token_full_update_term": 6.495546585938428e-06, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -7.674098014831543e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.516302068993631e-20, "masked_token_full_update_term/p85": 1.124100812432971e-15, "masked_token_full_update_term/p90": 5.218048215738236e-14, "masked_token_full_update_term/p95": 8.029132914089132e-12, "masked_token_full_update_term/p99": 1.0505318641662598e-06, "masked_token_full_update_term/var": 1.3999730263947185e-08, "masked_token_hessian_coeff": -24640.6953125, "masked_token_hessian_coeff/max": 524.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -15728640.0, "masked_token_hessian_coeff/p25": -1.9872459233738482e-10, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0002498626708984375, "masked_token_hessian_coeff/var": 196187373568.0, "masked_token_hessian_coeff_abs": 24640.76953125, "masked_token_hessian_coeff_abs/max": 15728640.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 9.138602763414383e-09, "masked_token_hessian_coeff_abs/p99": 1438.5, "masked_token_hessian_coeff_abs/var": 196187373568.0, "mean_logprobs": -0.007720947265625, "mean_logprobs/var": 3.170967102050781e-05, "num_completions/total": 7008, "per_sentence_gradient_norm": 33.26725387573242, "per_sentence_gradient_norm/max": 237.0, "per_sentence_gradient_norm/median": 6.8125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 53.4375, "per_sentence_gradient_norm/var": 2627.687744140625, "per_token_feature_norm": 190.1714630126953, "per_token_feature_norm/max": 256.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 114.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 122.28585052490234, "per_token_gradient_norm": 0.4683763384819031, "per_token_gradient_norm/max": 286.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 7.958078640513122e-11, "per_token_gradient_norm/var": 63.18245315551758, "per_token_policy_error_norm": 0.0045371889136731625, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004256901331245899, "policy_entropy": 0.008282640017569065, "policy_entropy/max": 2.0, "policy_entropy/median": 2.8558133635669947e-10, "policy_entropy/min": 7.047578818951796e-22, "policy_entropy/p25": 7.283063041541027e-13, "policy_entropy/p75": 4.1676685214042664e-08, "policy_entropy/var": 0.00449270149692893, "policy_loss": -0.5416666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2508772015571594, "policy_sharpness": 9.77677059173584, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.4624582529067993, "reward": 0.5416666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2508772015571594, "rewards/accuracy_reward": 0.5416666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2508772015571594, "sentence_fisher_curvature": 232518.671875, "sentence_fisher_curvature/max": 1810432.0, "sentence_fisher_curvature/median": 182272.0, "sentence_fisher_curvature/min": 71.0, "sentence_fisher_curvature/p25": 789.0, "sentence_fisher_curvature/p75": 345088.0, "sentence_fisher_curvature/p85": 501760.0, "sentence_fisher_curvature/p90": 600064.0, "sentence_fisher_curvature/p95": 845824.0, "sentence_fisher_curvature/p99": 1117800.625, "sentence_fisher_curvature/var": 95206940672.0, "sentence_fisher_kl_divergence": 1.2702207641268615e-06, "sentence_fisher_kl_divergence/max": 9.894371032714844e-06, "sentence_fisher_kl_divergence/median": 9.98377799987793e-07, "sentence_fisher_kl_divergence/min": 3.8744474295526743e-10, "sentence_fisher_kl_divergence/p25": 4.300090949982405e-09, "sentence_fisher_kl_divergence/p75": 1.8831342458724976e-06, "sentence_fisher_kl_divergence/p85": 2.738088369369507e-06, "sentence_fisher_kl_divergence/p90": 3.2782554626464844e-06, "sentence_fisher_kl_divergence/p95": 4.6193599700927734e-06, "sentence_fisher_kl_divergence/p99": 6.100547579990234e-06, "sentence_fisher_kl_divergence/var": 2.8403692158701643e-12, "sentence_full_gradient_variance/max_squared_error": 3633.02587890625, "sentence_full_gradient_variance/metric": 3633.02587890625, "sentence_full_gradient_variance/p75": 3633.02587890625, "sentence_full_gradient_variance/p90": 3633.02587890625, "sentence_full_gradient_variance/p95": 3633.02587890625, "sentence_full_gradient_variance/p99": 3633.02587890625, "sentence_full_update_term": 0.009295145981013775, "sentence_full_update_term/max": 0.07373046875, "sentence_full_update_term/median": 0.002288818359375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0115509033203125, "sentence_full_update_term/p85": 0.0186767578125, "sentence_full_update_term/p90": 0.03057861328125, "sentence_full_update_term/p95": 0.04364013671875, "sentence_full_update_term/p99": 0.06491702049970627, "sentence_full_update_term/var": 0.00022425575298257172, "sentence_hessian_coeff": 23893.0, "sentence_hessian_coeff/max": 987136.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -303104.0, "sentence_hessian_coeff/p25": -18432.0, "sentence_hessian_coeff/p75": 24896.0, "sentence_hessian_coeff/p99": 613582.0, "sentence_hessian_coeff/var": 28752510976.0, "sentence_hessian_coeff_abs": 88287.671875, "sentence_hessian_coeff_abs/max": 987136.0, "sentence_hessian_coeff_abs/median": 18432.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 105472.0, "sentence_hessian_coeff_abs/p99": 613582.0, "sentence_hessian_coeff_abs/var": 21452634112.0, "step": 73, "token_fisher_curvature": 169886.6875, "token_fisher_curvature/max": 203423744.0, "token_fisher_curvature/median": 2.676624113323589e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 6.72084247699335e-25, "token_fisher_curvature/p75": 2.0872192862952943e-14, "token_fisher_curvature/p85": 1.3642420526593924e-11, "token_fisher_curvature/p90": 2.3137545213103294e-09, "token_fisher_curvature/p95": 4.38690185546875e-05, "token_fisher_curvature/p99": 25431.0, "token_fisher_curvature/var": 16084569686016.0, "token_fisher_kl_divergence": 9.283205031351827e-07, "token_fisher_kl_divergence/max": 0.0011138916015625, "token_fisher_kl_divergence/median": 1.4606252698232797e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.667542374565537e-36, "token_fisher_kl_divergence/p75": 1.1389889293943057e-25, "token_fisher_kl_divergence/p85": 7.444625512977249e-23, "token_fisher_kl_divergence/p90": 1.2652554649611111e-20, "token_fisher_kl_divergence/p95": 2.393918396847994e-16, "token_fisher_kl_divergence/p99": 1.3908356777392328e-07, "token_fisher_kl_divergence/var": 4.804233122435164e-10, "token_full_update_term": 8.000039815669879e-05, "token_full_update_term/max": 0.04833984375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -7.674098014831543e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 8.682087709356578e-20, "token_full_update_term/p85": 1.4849232954361469e-15, "token_full_update_term/p90": 7.061018436615996e-14, "token_full_update_term/p95": 1.474509403465163e-11, "token_full_update_term/p99": 0.00011205673217773438, "token_full_update_term/var": 1.8651863911145483e-06, "token_hessian_coeff": 2639.775390625, "token_hessian_coeff/max": 203423744.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -25427968.0, "token_hessian_coeff/p25": -2.382876118645072e-10, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00041969120502471924, "token_hessian_coeff/var": 10524340256768.0, "token_hessian_coeff_abs": 163205.3125, "token_hessian_coeff_abs/max": 203423744.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.0884832590818405e-08, "token_hessian_coeff_abs/p99": 296384.0, "token_hessian_coeff_abs/var": 10497710620672.0 }, { "accuracy_reward": 0.8125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.1539473980665207, "adam_stats/lm_head/lr_effective_max": 1.367405002383748e-05, "adam_stats/lm_head/lr_effective_mean": -1.3401441414928605e-11, "adam_stats/lm_head/lr_effective_min": -1.3971268344903365e-05, "adam_stats/lm_head/lr_effective_std": 3.5765356187766884e-07, "adam_stats/lr_effective_max": 1.5103984878805932e-05, "adam_stats/lr_effective_mean": 3.130855991129167e-11, "adam_stats/lr_effective_min": -1.524612889625132e-05, "adam_stats/m_t_max": 0.0008693786803632975, "adam_stats/m_t_mean": 3.7318741846759096e-12, "adam_stats/m_t_min": -0.0009186482639051974, "adam_stats/v_t_max": 2.4998771550599486e-05, "adam_stats/v_t_mean": 1.7227016254478533e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.1539473980665207, "all_logprobs": -0.007711982820183039, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.5, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -4.76837158203125e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -4.312989767640829e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.010422117076814175, "clip_ratio": 0.0, "completion_length": 449.19793701171875, "completion_length/correct": 462.1410217285156, "completion_length/correct/max": 946.0, "completion_length/correct/median": 439.0, "completion_length/correct/min": 223.0, "completion_length/correct/p25": 323.0, "completion_length/correct/p75": 546.5, "completion_length/correct/var": 30662.794921875, "completion_length/incorrect": 393.1111145019531, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 377.0, "completion_length/incorrect/min": 280.0, "completion_length/incorrect/p25": 280.0, "completion_length/incorrect/p75": 390.0, "completion_length/incorrect/var": 29810.927734375, "completion_length/max": 1024.0, "completion_length/median": 391.0, "completion_length/min": 223.0, "completion_length/p25": 323.0, "completion_length/p75": 543.5, "completion_length/var": 30921.173828125, "curvature_clip_ratio_token_fisher": 0.008371402509510517, "curvature_clip_ratio_token_hessian": 0.005681423004716635, "curvature_clip_ratio_total_fisher": 0.008371402509510517, "curvature_clip_ratio_total_full": 0.008371402509510517, "curvature_clip_ratio_total_hessian": 0.005681423004716635, "epoch": 0.1184, "feature_vector_variance/max_squared_error": 65339.359375, "feature_vector_variance/metric": 31281.4453125, "generated_tokens/total": 4118543.0, "global_fisher_curvature": 148480.0, "global_fisher_curvature/max": 148480.0, "global_fisher_curvature/median": 148480.0, "global_fisher_curvature/min": 148480.0, "global_fisher_curvature/p25": 148480.0, "global_fisher_curvature/p75": 148480.0, "global_fisher_curvature/p85": 148480.0, "global_fisher_curvature/p90": 148480.0, "global_fisher_curvature/p95": 148480.0, "global_fisher_curvature/p99": 148480.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.078051567077637e-07, "global_fisher_kl_divergence/max": 7.078051567077637e-07, "global_fisher_kl_divergence/median": 7.078051567077637e-07, "global_fisher_kl_divergence/min": 7.078051567077637e-07, "global_fisher_kl_divergence/p25": 7.078051567077637e-07, "global_fisher_kl_divergence/p75": 7.078051567077637e-07, "global_fisher_kl_divergence/p85": 7.078051567077637e-07, "global_fisher_kl_divergence/p90": 7.078051567077637e-07, "global_fisher_kl_divergence/p95": 7.078051567077637e-07, "global_fisher_kl_divergence/p99": 7.078051567077637e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0306396484375, "global_full_update_term/max": 0.0306396484375, "global_full_update_term/median": 0.0306396484375, "global_full_update_term/min": 0.0306396484375, "global_full_update_term/p25": 0.0306396484375, "global_full_update_term/p75": 0.0306396484375, "global_full_update_term/p85": 0.0306396484375, "global_full_update_term/p90": 0.0306396484375, "global_full_update_term/p95": 0.0306396484375, "global_full_update_term/p99": 0.0306396484375, "global_full_update_term/var": NaN, "global_hessian_coeff": 19328.0, "global_hessian_coeff/max": 19328.0, "global_hessian_coeff/median": 19328.0, "global_hessian_coeff/min": 19328.0, "global_hessian_coeff/p25": 19328.0, "global_hessian_coeff/p75": 19328.0, "global_hessian_coeff/p99": 19328.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 19328.0, "global_hessian_coeff_abs/max": 19328.0, "global_hessian_coeff_abs/median": 19328.0, "global_hessian_coeff_abs/min": 19328.0, "global_hessian_coeff_abs/p25": 19328.0, "global_hessian_coeff_abs/p75": 19328.0, "global_hessian_coeff_abs/p99": 19328.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.14115741848945618, "learning_rate": 2.882538935057563e-06, "loss": -0.8125, "masked_global_fisher_curvature": 1840.0, "masked_global_fisher_curvature/max": 1840.0, "masked_global_fisher_curvature/median": 1840.0, "masked_global_fisher_curvature/min": 1840.0, "masked_global_fisher_curvature/p25": 1840.0, "masked_global_fisher_curvature/p75": 1840.0, "masked_global_fisher_curvature/p85": 1840.0, "masked_global_fisher_curvature/p90": 1840.0, "masked_global_fisher_curvature/p95": 1840.0, "masked_global_fisher_curvature/p99": 1840.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/max": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/median": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/min": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/p25": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/p75": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/p85": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/p90": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/p95": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/p99": 8.789356797933578e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.002899169921875, "masked_global_full_update_term/max": 0.002899169921875, "masked_global_full_update_term/median": 0.002899169921875, "masked_global_full_update_term/min": 0.002899169921875, "masked_global_full_update_term/p25": 0.002899169921875, "masked_global_full_update_term/p75": 0.002899169921875, "masked_global_full_update_term/p85": 0.002899169921875, "masked_global_full_update_term/p90": 0.002899169921875, "masked_global_full_update_term/p95": 0.002899169921875, "masked_global_full_update_term/p99": 0.002899169921875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -17536.0, "masked_global_hessian_coeff/max": -17536.0, "masked_global_hessian_coeff/median": -17536.0, "masked_global_hessian_coeff/min": -17536.0, "masked_global_hessian_coeff/p25": -17536.0, "masked_global_hessian_coeff/p75": -17536.0, "masked_global_hessian_coeff/p99": -17536.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 17536.0, "masked_global_hessian_coeff_abs/max": 17536.0, "masked_global_hessian_coeff_abs/median": 17536.0, "masked_global_hessian_coeff_abs/min": 17536.0, "masked_global_hessian_coeff_abs/p25": 17536.0, "masked_global_hessian_coeff_abs/p75": 17536.0, "masked_global_hessian_coeff_abs/p99": 17536.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 4.6410322189331055, "masked_per_sentence_gradient_norm/max": 17.75, "masked_per_sentence_gradient_norm/median": 3.375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 1.623046875, "masked_per_sentence_gradient_norm/p75": 5.2890625, "masked_per_sentence_gradient_norm/var": 24.895265579223633, "masked_per_token_gradient_norm": 0.09253464639186859, "masked_per_token_gradient_norm/max": 31.375, "masked_per_token_gradient_norm/median": 1.7189449863508344e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 1.587618925213974e-14, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.0710209608078003e-07, "masked_per_token_gradient_norm/var": 1.223606824874878, "masked_sentence_fisher_curvature": 3081.5, "masked_sentence_fisher_curvature/max": 9600.0, "masked_sentence_fisher_curvature/median": 2672.0, "masked_sentence_fisher_curvature/min": 34.0, "masked_sentence_fisher_curvature/p25": 1104.0, "masked_sentence_fisher_curvature/p75": 4128.0, "masked_sentence_fisher_curvature/p85": 5840.0, "masked_sentence_fisher_curvature/p90": 7712.0, "masked_sentence_fisher_curvature/p95": 8160.0, "masked_sentence_fisher_curvature/p99": 9539.2001953125, "masked_sentence_fisher_curvature/var": 6344170.5, "masked_sentence_fisher_kl_divergence": 1.4717765672855876e-08, "masked_sentence_fisher_kl_divergence/max": 4.586763679981232e-08, "masked_sentence_fisher_kl_divergence/median": 1.2747477740049362e-08, "masked_sentence_fisher_kl_divergence/min": 1.6279955161735415e-10, "masked_sentence_fisher_kl_divergence/p25": 5.289621185511351e-09, "masked_sentence_fisher_kl_divergence/p75": 1.9674189388751984e-08, "masked_sentence_fisher_kl_divergence/p85": 2.7939677238464355e-08, "masked_sentence_fisher_kl_divergence/p90": 3.67872416973114e-08, "masked_sentence_fisher_kl_divergence/p95": 3.888271749019623e-08, "masked_sentence_fisher_kl_divergence/p99": 4.5646448398883877e-08, "masked_sentence_fisher_kl_divergence/var": 1.445579647357589e-16, "masked_sentence_full_gradient_variance/max_squared_error": 43.81574630737305, "masked_sentence_full_gradient_variance/metric": 43.81574630737305, "masked_sentence_full_gradient_variance/p75": 43.81574630737305, "masked_sentence_full_gradient_variance/p90": 43.81574630737305, "masked_sentence_full_gradient_variance/p95": 43.81574630737305, "masked_sentence_full_gradient_variance/p99": 43.81574630737305, "masked_sentence_full_update_term": 0.0009638468618504703, "masked_sentence_full_update_term/max": 0.003753662109375, "masked_sentence_full_update_term/median": 0.000736236572265625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00029754638671875, "masked_sentence_full_update_term/p75": 0.00131988525390625, "masked_sentence_full_update_term/p85": 0.0019207000732421875, "masked_sentence_full_update_term/p90": 0.002532958984375, "masked_sentence_full_update_term/p95": 0.00260162353515625, "masked_sentence_full_update_term/p99": 0.0035507208667695522, "masked_sentence_full_update_term/var": 8.318459094880382e-07, "masked_sentence_hessian_coeff": -44373.3359375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -46080.0, "masked_sentence_hessian_coeff/min": -117248.0, "masked_sentence_hessian_coeff/p25": -62208.0, "masked_sentence_hessian_coeff/p75": -19808.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 969352640.0, "masked_sentence_hessian_coeff_abs": 44373.3359375, "masked_sentence_hessian_coeff_abs/max": 117248.0, "masked_sentence_hessian_coeff_abs/median": 45568.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 19808.0, "masked_sentence_hessian_coeff_abs/p75": 62208.0, "masked_sentence_hessian_coeff_abs/p99": 103628.84375, "masked_sentence_hessian_coeff_abs/var": 969352640.0, "masked_token_fisher_curvature": 3529.084716796875, "masked_token_fisher_curvature/max": 2031616.0, "masked_token_fisher_curvature/median": 2.558039500707987e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 6.23616633682556e-25, "masked_token_fisher_curvature/p75": 3.6193270602780103e-14, "masked_token_fisher_curvature/p85": 2.751221472863108e-11, "masked_token_fisher_curvature/p90": 5.326000973582268e-09, "masked_token_fisher_curvature/p95": 9.5367431640625e-05, "masked_token_fisher_curvature/p99": 11531.0, "masked_token_fisher_curvature/var": 3684718592.0, "masked_token_fisher_kl_divergence": 1.6860614238112248e-08, "masked_token_fisher_kl_divergence/max": 9.715557098388672e-06, "masked_token_fisher_kl_divergence/median": 1.2202692127637526e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.9857556510886103e-36, "masked_token_fisher_kl_divergence/p75": 1.7286782332651164e-25, "masked_token_fisher_kl_divergence/p85": 1.315217173959314e-22, "masked_token_fisher_kl_divergence/p90": 2.541098841762901e-20, "masked_token_fisher_kl_divergence/p95": 4.544975507059235e-16, "masked_token_fisher_kl_divergence/p99": 5.5078999139368534e-08, "masked_token_fisher_kl_divergence/var": 8.406991736879116e-14, "masked_token_full_update_term": 1.4301894225354772e-05, "masked_token_full_update_term/max": 0.004150390625, "masked_token_full_update_term/median": 6.437450399132683e-19, "masked_token_full_update_term/min": -3.0174851417541504e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.8984813721090177e-14, "masked_token_full_update_term/p85": 1.1489698081845745e-12, "masked_token_full_update_term/p90": 1.77351466845721e-11, "masked_token_full_update_term/p95": 4.045432433485985e-09, "masked_token_full_update_term/p99": 0.000232696533203125, "masked_token_full_update_term/var": 3.0373385584425705e-08, "masked_token_hessian_coeff": -57427.84765625, "masked_token_hessian_coeff/max": 2464.0, "masked_token_hessian_coeff/median": -5.311449058353901e-10, "masked_token_hessian_coeff/min": -16318464.0, "masked_token_hessian_coeff/p25": -5.714595317840576e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.011363029479980469, "masked_token_hessian_coeff/var": 468364066816.0, "masked_token_hessian_coeff_abs": 57428.33984375, "masked_token_hessian_coeff_abs/max": 16318464.0, "masked_token_hessian_coeff_abs/median": 1.8044374883174896e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 1.6999734953060397e-12, "masked_token_hessian_coeff_abs/p75": 2.384185791015625e-05, "masked_token_hessian_coeff_abs/p99": 913408.0, "masked_token_hessian_coeff_abs/var": 468364034048.0, "mean_logprobs": -0.007720947265625, "mean_logprobs/var": 2.7060508728027344e-05, "num_completions/total": 7104, "per_sentence_gradient_norm": 57.982421875, "per_sentence_gradient_norm/max": 252.0, "per_sentence_gradient_norm/median": 39.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 14.4375, "per_sentence_gradient_norm/p75": 92.5, "per_sentence_gradient_norm/var": 3161.39111328125, "per_token_feature_norm": 190.42137145996094, "per_token_feature_norm/max": 249.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 105.0, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 156.77108764648438, "per_token_gradient_norm": 0.9731777310371399, "per_token_gradient_norm/max": 288.0, "per_token_gradient_norm/median": 1.964508555829525e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 1.687538997430238e-14, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.2852251529693604e-07, "per_token_gradient_norm/var": 124.74996185302734, "per_token_policy_error_norm": 0.004576072562485933, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004133489448577166, "policy_entropy": 0.008484801277518272, "policy_entropy/max": 1.7578125, "policy_entropy/median": 3.219611244276166e-10, "policy_entropy/min": 2.7660919683773245e-21, "policy_entropy/p25": 8.348877145181177e-13, "policy_entropy/p75": 6.263144314289093e-08, "policy_entropy/var": 0.004956931807100773, "policy_loss": -0.8125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.1539473980665207, "policy_sharpness": 9.767621994018555, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.5098189115524292, "reward": 0.8125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.1539473980665207, "rewards/accuracy_reward": 0.8125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.1539473980665207, "sentence_fisher_curvature": 331692.375, "sentence_fisher_curvature/max": 1286144.0, "sentence_fisher_curvature/median": 262144.0, "sentence_fisher_curvature/min": 34.0, "sentence_fisher_curvature/p25": 122240.0, "sentence_fisher_curvature/p75": 495616.0, "sentence_fisher_curvature/p85": 568320.0, "sentence_fisher_curvature/p90": 688128.0, "sentence_fisher_curvature/p95": 901120.0, "sentence_fisher_curvature/p99": 1052672.75, "sentence_fisher_curvature/var": 79285723136.0, "sentence_fisher_kl_divergence": 1.5853465811233036e-06, "sentence_fisher_kl_divergence/max": 6.139278411865234e-06, "sentence_fisher_kl_divergence/median": 1.2516975402832031e-06, "sentence_fisher_kl_divergence/min": 1.6279955161735415e-10, "sentence_fisher_kl_divergence/p25": 5.848705768585205e-07, "sentence_fisher_kl_divergence/p75": 2.3655593395233154e-06, "sentence_fisher_kl_divergence/p85": 2.7194619178771973e-06, "sentence_fisher_kl_divergence/p90": 3.293156623840332e-06, "sentence_fisher_kl_divergence/p95": 4.306435585021973e-06, "sentence_fisher_kl_divergence/p99": 5.03510591443046e-06, "sentence_fisher_kl_divergence/var": 1.8122337398090993e-12, "sentence_full_gradient_variance/max_squared_error": 6378.9208984375, "sentence_full_gradient_variance/metric": 6378.9208984375, "sentence_full_gradient_variance/p75": 6378.9208984375, "sentence_full_gradient_variance/p90": 6378.9208984375, "sentence_full_gradient_variance/p95": 6378.9208984375, "sentence_full_gradient_variance/p99": 6378.9208984375, "sentence_full_update_term": 0.012637297622859478, "sentence_full_update_term/max": 0.05224609375, "sentence_full_update_term/median": 0.01068115234375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0035400390625, "sentence_full_update_term/p75": 0.017974853515625, "sentence_full_update_term/p85": 0.0235595703125, "sentence_full_update_term/p90": 0.02703857421875, "sentence_full_update_term/p95": 0.0367431640625, "sentence_full_update_term/p99": 0.049230966717004776, "sentence_full_update_term/var": 0.0001319508155575022, "sentence_hessian_coeff": 20158.66796875, "sentence_hessian_coeff/max": 729088.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -342016.0, "sentence_hessian_coeff/p25": -63488.0, "sentence_hessian_coeff/p75": 81664.0, "sentence_hessian_coeff/p99": 639590.6875, "sentence_hessian_coeff/var": 38571720704.0, "sentence_hessian_coeff_abs": 128212.0, "sentence_hessian_coeff_abs/max": 729088.0, "sentence_hessian_coeff_abs/median": 70656.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 13824.0, "sentence_hessian_coeff_abs/p75": 175360.0, "sentence_hessian_coeff_abs/p99": 639590.6875, "sentence_hessian_coeff_abs/var": 22371020800.0, "step": 74, "token_fisher_curvature": 355674.4375, "token_fisher_curvature/max": 202375168.0, "token_fisher_curvature/median": 3.049318610115481e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 7.140895131805434e-25, "token_fisher_curvature/p75": 5.639932965095795e-14, "token_fisher_curvature/p85": 5.397815527885541e-11, "token_fisher_curvature/p90": 1.2863893061876297e-08, "token_fisher_curvature/p95": 0.00156402587890625, "token_fisher_curvature/p99": 692224.0, "token_fisher_curvature/var": 32134077087744.0, "token_fisher_kl_divergence": 1.6996774547806126e-06, "token_fisher_kl_divergence/max": 0.00096893310546875, "token_fisher_kl_divergence/median": 1.4544622940012405e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.408933617384634e-36, "token_fisher_kl_divergence/p75": 2.6980305136006957e-25, "token_fisher_kl_divergence/p85": 2.575827502793057e-22, "token_fisher_kl_divergence/p90": 6.140988867593677e-20, "token_fisher_kl_divergence/p95": 7.494005416219807e-15, "token_fisher_kl_divergence/p99": 3.3080577850341797e-06, "token_fisher_kl_divergence/var": 7.338815111168628e-10, "token_full_update_term": 0.0001585828431416303, "token_full_update_term/max": 0.044921875, "token_full_update_term/median": 8.199278929421627e-19, "token_full_update_term/min": -3.0174851417541504e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.4091839634365897e-14, "token_full_update_term/p85": 1.6200374375330284e-12, "token_full_update_term/p90": 3.183231456205249e-11, "token_full_update_term/p95": 1.9429535313975066e-08, "token_full_update_term/p99": 0.0025177001953125, "token_full_update_term/var": 3.4090351164195454e-06, "token_hessian_coeff": 417.22552490234375, "token_hessian_coeff/max": 202375168.0, "token_hessian_coeff/median": -5.602487362921238e-10, "token_hessian_coeff/min": -25690112.0, "token_hessian_coeff/p25": -6.362795829772949e-06, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.0465240478515625, "token_hessian_coeff/var": 20164825317376.0, "token_hessian_coeff_abs": 331082.375, "token_hessian_coeff_abs/max": 202375168.0, "token_hessian_coeff_abs/median": 2.0023435354232788e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 1.8616219676914625e-12, "token_hessian_coeff_abs/p75": 2.8133392333984375e-05, "token_hessian_coeff_abs/p99": 8159232.0, "token_hessian_coeff_abs/var": 20055205085184.0 }, { "accuracy_reward": 0.6145833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.23936404287815094, "adam_stats/lm_head/lr_effective_max": 1.2181551937828772e-05, "adam_stats/lm_head/lr_effective_mean": -1.5062012401201308e-11, "adam_stats/lm_head/lr_effective_min": -1.2625140698219184e-05, "adam_stats/lm_head/lr_effective_std": 3.2606590139039326e-07, "adam_stats/lr_effective_max": 1.4254182133299764e-05, "adam_stats/lr_effective_mean": 1.9121796160370685e-11, "adam_stats/lr_effective_min": -1.3763751667283941e-05, "adam_stats/m_t_max": 0.0009184476220980287, "adam_stats/m_t_mean": 4.744630036079833e-12, "adam_stats/m_t_min": -0.000854975136462599, "adam_stats/v_t_max": 2.4982626200653613e-05, "adam_stats/v_t_mean": 1.7227663523175507e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6145833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.23936404287815094, "all_logprobs": -0.008987415581941605, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.75, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -4.76837158203125e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -3.652571467682719e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.013710846193134785, "clip_ratio": 0.0, "completion_length": 555.8021240234375, "completion_length/correct": 454.6440734863281, "completion_length/correct/max": 1021.0, "completion_length/correct/median": 395.0, "completion_length/correct/min": 295.0, "completion_length/correct/p25": 364.5, "completion_length/correct/p75": 437.0, "completion_length/correct/var": 28383.201171875, "completion_length/incorrect": 717.108154296875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 804.0, "completion_length/incorrect/min": 229.0, "completion_length/incorrect/p25": 568.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 94893.9921875, "completion_length/max": 1024.0, "completion_length/median": 399.0, "completion_length/min": 229.0, "completion_length/p25": 369.75, "completion_length/p75": 784.25, "completion_length/var": 69777.671875, "curvature_clip_ratio_token_fisher": 0.004423037171363831, "curvature_clip_ratio_token_hessian": 0.0027925109025090933, "curvature_clip_ratio_total_fisher": 0.004423037171363831, "curvature_clip_ratio_total_full": 0.004423037171363831, "curvature_clip_ratio_total_hessian": 0.0027925109025090933, "epoch": 0.12, "feature_vector_variance/max_squared_error": 64961.79296875, "feature_vector_variance/metric": 31098.65234375, "generated_tokens/total": 4171900.0, "global_fisher_curvature": 107008.0, "global_fisher_curvature/max": 107008.0, "global_fisher_curvature/median": 107008.0, "global_fisher_curvature/min": 107008.0, "global_fisher_curvature/p25": 107008.0, "global_fisher_curvature/p75": 107008.0, "global_fisher_curvature/p85": 107008.0, "global_fisher_curvature/p90": 107008.0, "global_fisher_curvature/p95": 107008.0, "global_fisher_curvature/p99": 107008.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.4517219066619873e-07, "global_fisher_kl_divergence/max": 4.4517219066619873e-07, "global_fisher_kl_divergence/median": 4.4517219066619873e-07, "global_fisher_kl_divergence/min": 4.4517219066619873e-07, "global_fisher_kl_divergence/p25": 4.4517219066619873e-07, "global_fisher_kl_divergence/p75": 4.4517219066619873e-07, "global_fisher_kl_divergence/p85": 4.4517219066619873e-07, "global_fisher_kl_divergence/p90": 4.4517219066619873e-07, "global_fisher_kl_divergence/p95": 4.4517219066619873e-07, "global_fisher_kl_divergence/p99": 4.4517219066619873e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.012451171875, "global_full_update_term/max": 0.012451171875, "global_full_update_term/median": 0.012451171875, "global_full_update_term/min": 0.012451171875, "global_full_update_term/p25": 0.012451171875, "global_full_update_term/p75": 0.012451171875, "global_full_update_term/p85": 0.012451171875, "global_full_update_term/p90": 0.012451171875, "global_full_update_term/p95": 0.012451171875, "global_full_update_term/p99": 0.012451171875, "global_full_update_term/var": NaN, "global_hessian_coeff": 26112.0, "global_hessian_coeff/max": 26112.0, "global_hessian_coeff/median": 26112.0, "global_hessian_coeff/min": 26112.0, "global_hessian_coeff/p25": 26112.0, "global_hessian_coeff/p75": 26112.0, "global_hessian_coeff/p99": 26112.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 26112.0, "global_hessian_coeff_abs/max": 26112.0, "global_hessian_coeff_abs/median": 26112.0, "global_hessian_coeff_abs/min": 26112.0, "global_hessian_coeff_abs/p25": 26112.0, "global_hessian_coeff_abs/p75": 26112.0, "global_hessian_coeff_abs/p99": 26112.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.08877868950366974, "learning_rate": 2.6790929273509547e-06, "loss": -0.6146, "masked_global_fisher_curvature": 2800.0, "masked_global_fisher_curvature/max": 2800.0, "masked_global_fisher_curvature/median": 2800.0, "masked_global_fisher_curvature/min": 2800.0, "masked_global_fisher_curvature/p25": 2800.0, "masked_global_fisher_curvature/p75": 2800.0, "masked_global_fisher_curvature/p85": 2800.0, "masked_global_fisher_curvature/p90": 2800.0, "masked_global_fisher_curvature/p95": 2800.0, "masked_global_fisher_curvature/p99": 2800.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/max": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/median": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/min": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/p25": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/p75": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/p85": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/p90": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/p95": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/p99": 1.1641532182693481e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00147247314453125, "masked_global_full_update_term/max": 0.00147247314453125, "masked_global_full_update_term/median": 0.00147247314453125, "masked_global_full_update_term/min": 0.00147247314453125, "masked_global_full_update_term/p25": 0.00147247314453125, "masked_global_full_update_term/p75": 0.00147247314453125, "masked_global_full_update_term/p85": 0.00147247314453125, "masked_global_full_update_term/p90": 0.00147247314453125, "masked_global_full_update_term/p95": 0.00147247314453125, "masked_global_full_update_term/p99": 0.00147247314453125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -9536.0, "masked_global_hessian_coeff/max": -9536.0, "masked_global_hessian_coeff/median": -9536.0, "masked_global_hessian_coeff/min": -9536.0, "masked_global_hessian_coeff/p25": -9536.0, "masked_global_hessian_coeff/p75": -9536.0, "masked_global_hessian_coeff/p99": -9536.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 9536.0, "masked_global_hessian_coeff_abs/max": 9536.0, "masked_global_hessian_coeff_abs/median": 9536.0, "masked_global_hessian_coeff_abs/min": 9536.0, "masked_global_hessian_coeff_abs/p25": 9536.0, "masked_global_hessian_coeff_abs/p75": 9536.0, "masked_global_hessian_coeff_abs/p99": 9536.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 2.486994504928589, "masked_per_sentence_gradient_norm/max": 19.25, "masked_per_sentence_gradient_norm/median": 0.419921875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 3.1875, "masked_per_sentence_gradient_norm/var": 15.624754905700684, "masked_per_token_gradient_norm": 0.047308556735515594, "masked_per_token_gradient_norm/max": 37.5, "masked_per_token_gradient_norm/median": 2.185345003916095e-19, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.469743438065052e-09, "masked_per_token_gradient_norm/var": 0.7355052828788757, "masked_sentence_fisher_curvature": 2709.619140625, "masked_sentence_fisher_curvature/max": 11968.0, "masked_sentence_fisher_curvature/median": 1760.0, "masked_sentence_fisher_curvature/min": 0.0084228515625, "masked_sentence_fisher_curvature/p25": 688.0, "masked_sentence_fisher_curvature/p75": 3564.0, "masked_sentence_fisher_curvature/p85": 5048.0, "masked_sentence_fisher_curvature/p90": 5696.0, "masked_sentence_fisher_curvature/p95": 7496.0, "masked_sentence_fisher_curvature/p99": 11420.8017578125, "masked_sentence_fisher_curvature/var": 6653954.0, "masked_sentence_fisher_kl_divergence": 1.1256815923843533e-08, "masked_sentence_fisher_kl_divergence/max": 4.98257577419281e-08, "masked_sentence_fisher_kl_divergence/median": 7.30506144464016e-09, "masked_sentence_fisher_kl_divergence/min": 3.5083047578154947e-14, "masked_sentence_fisher_kl_divergence/p25": 2.852175384759903e-09, "masked_sentence_fisher_kl_divergence/p75": 1.4813849702477455e-08, "masked_sentence_fisher_kl_divergence/p85": 2.0983861759305e-08, "masked_sentence_fisher_kl_divergence/p90": 2.3632310330867767e-08, "masked_sentence_fisher_kl_divergence/p95": 3.114109858870506e-08, "masked_sentence_fisher_kl_divergence/p99": 4.7392685331715256e-08, "masked_sentence_fisher_kl_divergence/var": 1.1480691184529687e-16, "masked_sentence_full_gradient_variance/max_squared_error": 20.959640502929688, "masked_sentence_full_gradient_variance/metric": 20.959640502929688, "masked_sentence_full_gradient_variance/p75": 20.959640502929688, "masked_sentence_full_gradient_variance/p90": 20.959640502929688, "masked_sentence_full_gradient_variance/p95": 20.959640502929688, "masked_sentence_full_gradient_variance/p99": 20.959640502929688, "masked_sentence_full_update_term": 0.0005122401053085923, "masked_sentence_full_update_term/max": 0.00604248046875, "masked_sentence_full_update_term/median": 5.626678466796875e-05, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0006561279296875, "masked_sentence_full_update_term/p85": 0.0012359619140625, "masked_sentence_full_update_term/p90": 0.00125885009765625, "masked_sentence_full_update_term/p95": 0.0017528533935546875, "masked_sentence_full_update_term/p99": 0.004476933740079403, "masked_sentence_full_update_term/var": 8.81532116636663e-07, "masked_sentence_hessian_coeff": -25689.640625, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -13184.0, "masked_sentence_hessian_coeff/min": -124416.0, "masked_sentence_hessian_coeff/p25": -44544.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 937221888.0, "masked_sentence_hessian_coeff_abs": 25689.640625, "masked_sentence_hessian_coeff_abs/max": 124416.0, "masked_sentence_hessian_coeff_abs/median": 13184.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 44544.0, "masked_sentence_hessian_coeff_abs/p99": 119065.6171875, "masked_sentence_hessian_coeff_abs/var": 937221888.0, "masked_token_fisher_curvature": 2932.5966796875, "masked_token_fisher_curvature/max": 2359296.0, "masked_token_fisher_curvature/median": 3.1001405869507392e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 9.499652347288677e-25, "masked_token_fisher_curvature/p75": 4.6629367034256575e-14, "masked_token_fisher_curvature/p85": 2.6943780540023e-11, "masked_token_fisher_curvature/p90": 4.190951585769653e-09, "masked_token_fisher_curvature/p95": 5.221366882324219e-05, "masked_token_fisher_curvature/p99": 5811.375, "masked_token_fisher_curvature/var": 3271228672.0, "masked_token_fisher_kl_divergence": 1.2180705333264541e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 1.2880619468061833e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.949661018762886e-36, "masked_token_fisher_kl_divergence/p75": 1.9387045606711586e-25, "masked_token_fisher_kl_divergence/p85": 1.1166938269465874e-22, "masked_token_fisher_kl_divergence/p90": 1.7364175418713157e-20, "masked_token_fisher_kl_divergence/p95": 2.1684043449710089e-16, "masked_token_fisher_kl_divergence/p99": 2.41452653426677e-08, "masked_token_fisher_kl_divergence/var": 5.6429314617290754e-14, "masked_token_full_update_term": 6.7795758695865516e-06, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.6391277313232422e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.3715157481941631e-17, "masked_token_full_update_term/p85": 1.1046719095020308e-14, "masked_token_full_update_term/p90": 2.6290081223123707e-13, "masked_token_full_update_term/p95": 2.2168933355715126e-11, "masked_token_full_update_term/p99": 3.1173694878816605e-06, "masked_token_full_update_term/var": 1.5542431341941665e-08, "masked_token_hessian_coeff": -28859.025390625, "masked_token_hessian_coeff/max": 600.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -17301504.0, "masked_token_hessian_coeff/p25": -1.0652001947164536e-08, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0013885498046875, "masked_token_hessian_coeff/var": 268580143104.0, "masked_token_hessian_coeff_abs": 28859.115234375, "masked_token_hessian_coeff_abs/max": 17301504.0, "masked_token_hessian_coeff_abs/median": 2.45029690981724e-17, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.2165477275848389e-07, "masked_token_hessian_coeff_abs/p99": 4294.5, "masked_token_hessian_coeff_abs/var": 268580126720.0, "mean_logprobs": -0.0081787109375, "mean_logprobs/var": 2.8967857360839844e-05, "num_completions/total": 7200, "per_sentence_gradient_norm": 32.29557418823242, "per_sentence_gradient_norm/max": 175.0, "per_sentence_gradient_norm/median": 21.375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 45.3125, "per_sentence_gradient_norm/var": 1459.4385986328125, "per_token_feature_norm": 190.69566345214844, "per_token_feature_norm/max": 264.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 104.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 151.55645751953125, "per_token_gradient_norm": 0.5143224596977234, "per_token_gradient_norm/max": 294.0, "per_token_gradient_norm/median": 2.6156377411212794e-18, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.6952981241047382e-09, "per_token_gradient_norm/var": 70.77129364013672, "per_token_policy_error_norm": 0.005363891366869211, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005116615444421768, "policy_entropy": 0.008926451206207275, "policy_entropy/max": 1.546875, "policy_entropy/median": 3.474269760772586e-10, "policy_entropy/min": 2.80579663777987e-21, "policy_entropy/p25": 7.460698725481052e-13, "policy_entropy/p75": 6.658956408500671e-08, "policy_entropy/var": 0.004729392938315868, "policy_loss": -0.6145833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.23936404287815094, "policy_sharpness": 9.757746696472168, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.5819478034973145, "reward": 0.6145833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.23936404287815094, "rewards/accuracy_reward": 0.6145833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.23936404287815094, "sentence_fisher_curvature": 227444.28125, "sentence_fisher_curvature/max": 1089536.0, "sentence_fisher_curvature/median": 147456.0, "sentence_fisher_curvature/min": 124.0, "sentence_fisher_curvature/p25": 3088.0, "sentence_fisher_curvature/p75": 400896.0, "sentence_fisher_curvature/p85": 508928.0, "sentence_fisher_curvature/p90": 602112.0, "sentence_fisher_curvature/p95": 690176.0, "sentence_fisher_curvature/p99": 945562.0625, "sentence_fisher_curvature/var": 63214886912.0, "sentence_fisher_kl_divergence": 9.451616733713308e-07, "sentence_fisher_kl_divergence/max": 4.5299530029296875e-06, "sentence_fisher_kl_divergence/median": 6.109476089477539e-07, "sentence_fisher_kl_divergence/min": 5.165929906070232e-10, "sentence_fisher_kl_divergence/p25": 1.280568540096283e-08, "sentence_fisher_kl_divergence/p75": 1.6689300537109375e-06, "sentence_fisher_kl_divergence/p85": 2.1122395992279053e-06, "sentence_fisher_kl_divergence/p90": 2.5033950805664062e-06, "sentence_fisher_kl_divergence/p95": 2.8721988201141357e-06, "sentence_fisher_kl_divergence/p99": 3.93539858123404e-06, "sentence_fisher_kl_divergence/var": 1.091994875600244e-12, "sentence_full_gradient_variance/max_squared_error": 2443.490234375, "sentence_full_gradient_variance/metric": 2443.490234375, "sentence_full_gradient_variance/p75": 2443.490234375, "sentence_full_gradient_variance/p90": 2443.490234375, "sentence_full_gradient_variance/p95": 2443.490234375, "sentence_full_gradient_variance/p99": 2443.490234375, "sentence_full_update_term": 0.006626765243709087, "sentence_full_update_term/max": 0.02294921875, "sentence_full_update_term/median": 0.005035400390625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0106201171875, "sentence_full_update_term/p85": 0.015899658203125, "sentence_full_update_term/p90": 0.0177001953125, "sentence_full_update_term/p95": 0.020050048828125, "sentence_full_update_term/p99": 0.02167358808219433, "sentence_full_update_term/var": 4.7646768507547677e-05, "sentence_hessian_coeff": 33239.3359375, "sentence_hessian_coeff/max": 659456.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -292864.0, "sentence_hessian_coeff/p25": -77312.0, "sentence_hessian_coeff/p75": 71296.0, "sentence_hessian_coeff/p99": 531046.8125, "sentence_hessian_coeff/var": 31300820992.0, "sentence_hessian_coeff_abs": 112570.0, "sentence_hessian_coeff_abs/max": 659456.0, "sentence_hessian_coeff_abs/median": 77312.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 194816.0, "sentence_hessian_coeff_abs/p99": 531046.8125, "sentence_hessian_coeff_abs/var": 19611908096.0, "step": 75, "token_fisher_curvature": 190615.109375, "token_fisher_curvature/max": 198180864.0, "token_fisher_curvature/median": 3.4728350837426314e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.0469004627624257e-24, "token_fisher_curvature/p75": 5.639932965095795e-14, "token_fisher_curvature/p85": 3.965539008277119e-11, "token_fisher_curvature/p90": 7.683411240577698e-09, "token_fisher_curvature/p95": 0.0002088509500026703, "token_fisher_curvature/p99": 52533.0, "token_fisher_curvature/var": 18175926206464.0, "token_fisher_kl_divergence": 7.917326456663432e-07, "token_fisher_kl_divergence/max": 0.000823974609375, "token_fisher_kl_divergence/median": 1.4421363423571622e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.349329098042464e-36, "token_fisher_kl_divergence/p75": 2.3426013441443167e-25, "token_fisher_kl_divergence/p85": 1.6461540424658808e-22, "token_fisher_kl_divergence/p90": 3.197549375884984e-20, "token_fisher_kl_divergence/p95": 8.68039364346207e-16, "token_fisher_kl_divergence/p99": 2.185333869419992e-07, "token_fisher_kl_divergence/var": 3.1359173591205547e-10, "token_full_update_term": 7.669459591852501e-05, "token_full_update_term/max": 0.041259765625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.6391277313232422e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.8214596497756474e-17, "token_full_update_term/p85": 1.4654943925052066e-14, "token_full_update_term/p90": 3.5242642137944813e-13, "token_full_update_term/p95": 4.006306397741355e-11, "token_full_update_term/p99": 0.0001248195767402649, "token_full_update_term/var": 1.5882685602264246e-06, "token_hessian_coeff": 9665.0966796875, "token_hessian_coeff/max": 196083712.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -25952256.0, "token_hessian_coeff/p25": -1.1757947504520416e-08, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.002471923828125, "token_hessian_coeff/var": 11768372920320.0, "token_hessian_coeff_abs": 179967.75, "token_hessian_coeff_abs/max": 196083712.0, "token_hessian_coeff_abs/median": 2.0556473190325164e-16, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.5331974029541016e-07, "token_hessian_coeff_abs/p99": 417552.0, "token_hessian_coeff_abs/var": 11736077828096.0 }, { "accuracy_reward": 0.6875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.21710526943206787, "adam_stats/lm_head/lr_effective_max": 1.2098799743398558e-05, "adam_stats/lm_head/lr_effective_mean": -1.704402070867861e-11, "adam_stats/lm_head/lr_effective_min": -1.2328607226663735e-05, "adam_stats/lm_head/lr_effective_std": 3.236734187339607e-07, "adam_stats/lr_effective_max": 1.3021375707467087e-05, "adam_stats/lr_effective_mean": -5.458568709884593e-13, "adam_stats/lr_effective_min": -1.3177407709008548e-05, "adam_stats/m_t_max": 0.0008290329715237021, "adam_stats/m_t_mean": 7.34809890885757e-13, "adam_stats/m_t_min": -0.0009803111897781491, "adam_stats/v_t_max": 2.4959024813142605e-05, "adam_stats/v_t_mean": 1.7238058853605298e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.21710526943206787, "all_logprobs": -0.008952450007200241, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -4.59375, "all_logprobs/p1": -0.1708398461341858, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.0001615523360669613, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012226302176713943, "clip_ratio": 0.0, "completion_length": 495.90625, "completion_length/correct": 442.42425537109375, "completion_length/correct/max": 1006.0, "completion_length/correct/median": 431.0, "completion_length/correct/min": 235.0, "completion_length/correct/p25": 321.5, "completion_length/correct/p75": 537.0, "completion_length/correct/var": 22684.1875, "completion_length/incorrect": 613.5667114257812, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 427.0, "completion_length/incorrect/min": 340.0, "completion_length/incorrect/p25": 346.75, "completion_length/incorrect/p75": 1019.25, "completion_length/incorrect/var": 94411.984375, "completion_length/max": 1024.0, "completion_length/median": 429.0, "completion_length/min": 235.0, "completion_length/p25": 340.75, "completion_length/p75": 541.5, "completion_length/var": 50700.2109375, "curvature_clip_ratio_token_fisher": 0.006133551709353924, "curvature_clip_ratio_token_hessian": 0.003970004618167877, "curvature_clip_ratio_total_fisher": 0.006133551709353924, "curvature_clip_ratio_total_full": 0.006133551709353924, "curvature_clip_ratio_total_hessian": 0.003970004618167877, "epoch": 0.1216, "feature_vector_variance/max_squared_error": 57606.5546875, "feature_vector_variance/metric": 31234.232421875, "generated_tokens/total": 4219507.0, "global_fisher_curvature": 181248.0, "global_fisher_curvature/max": 181248.0, "global_fisher_curvature/median": 181248.0, "global_fisher_curvature/min": 181248.0, "global_fisher_curvature/p25": 181248.0, "global_fisher_curvature/p75": 181248.0, "global_fisher_curvature/p85": 181248.0, "global_fisher_curvature/p90": 181248.0, "global_fisher_curvature/p95": 181248.0, "global_fisher_curvature/p99": 181248.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 6.51925802230835e-07, "global_fisher_kl_divergence/max": 6.51925802230835e-07, "global_fisher_kl_divergence/median": 6.51925802230835e-07, "global_fisher_kl_divergence/min": 6.51925802230835e-07, "global_fisher_kl_divergence/p25": 6.51925802230835e-07, "global_fisher_kl_divergence/p75": 6.51925802230835e-07, "global_fisher_kl_divergence/p85": 6.51925802230835e-07, "global_fisher_kl_divergence/p90": 6.51925802230835e-07, "global_fisher_kl_divergence/p95": 6.51925802230835e-07, "global_fisher_kl_divergence/p99": 6.51925802230835e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0184326171875, "global_full_update_term/max": 0.0184326171875, "global_full_update_term/median": 0.0184326171875, "global_full_update_term/min": 0.0184326171875, "global_full_update_term/p25": 0.0184326171875, "global_full_update_term/p75": 0.0184326171875, "global_full_update_term/p85": 0.0184326171875, "global_full_update_term/p90": 0.0184326171875, "global_full_update_term/p95": 0.0184326171875, "global_full_update_term/p99": 0.0184326171875, "global_full_update_term/var": NaN, "global_hessian_coeff": 45312.0, "global_hessian_coeff/max": 45312.0, "global_hessian_coeff/median": 45312.0, "global_hessian_coeff/min": 45312.0, "global_hessian_coeff/p25": 45312.0, "global_hessian_coeff/p75": 45312.0, "global_hessian_coeff/p99": 45312.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 45312.0, "global_hessian_coeff_abs/max": 45312.0, "global_hessian_coeff_abs/median": 45312.0, "global_hessian_coeff_abs/min": 45312.0, "global_hessian_coeff_abs/p25": 45312.0, "global_hessian_coeff_abs/p75": 45312.0, "global_hessian_coeff_abs/p99": 45312.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.1563119888305664, "learning_rate": 2.4815204523085656e-06, "loss": -0.6875, "masked_global_fisher_curvature": 5440.0, "masked_global_fisher_curvature/max": 5440.0, "masked_global_fisher_curvature/median": 5440.0, "masked_global_fisher_curvature/min": 5440.0, "masked_global_fisher_curvature/p25": 5440.0, "masked_global_fisher_curvature/p75": 5440.0, "masked_global_fisher_curvature/p85": 5440.0, "masked_global_fisher_curvature/p90": 5440.0, "masked_global_fisher_curvature/p95": 5440.0, "masked_global_fisher_curvature/p99": 5440.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/max": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/median": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/min": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/p25": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/p75": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/p85": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/p90": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/p95": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/p99": 1.955777406692505e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.003173828125, "masked_global_full_update_term/max": 0.003173828125, "masked_global_full_update_term/median": 0.003173828125, "masked_global_full_update_term/min": 0.003173828125, "masked_global_full_update_term/p25": 0.003173828125, "masked_global_full_update_term/p75": 0.003173828125, "masked_global_full_update_term/p85": 0.003173828125, "masked_global_full_update_term/p90": 0.003173828125, "masked_global_full_update_term/p95": 0.003173828125, "masked_global_full_update_term/p99": 0.003173828125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -19840.0, "masked_global_hessian_coeff/max": -19840.0, "masked_global_hessian_coeff/median": -19840.0, "masked_global_hessian_coeff/min": -19840.0, "masked_global_hessian_coeff/p25": -19840.0, "masked_global_hessian_coeff/p75": -19840.0, "masked_global_hessian_coeff/p99": -19840.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 19840.0, "masked_global_hessian_coeff_abs/max": 19840.0, "masked_global_hessian_coeff_abs/median": 19840.0, "masked_global_hessian_coeff_abs/min": 19840.0, "masked_global_hessian_coeff_abs/p25": 19840.0, "masked_global_hessian_coeff_abs/p75": 19840.0, "masked_global_hessian_coeff_abs/p99": 19840.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 4.37744140625, "masked_per_sentence_gradient_norm/max": 15.6875, "masked_per_sentence_gradient_norm/median": 3.1875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 7.8125, "masked_per_sentence_gradient_norm/var": 17.33644676208496, "masked_per_token_gradient_norm": 0.099974624812603, "masked_per_token_gradient_norm/max": 32.0, "masked_per_token_gradient_norm/median": 8.570921750106208e-14, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.335865817964077e-08, "masked_per_token_gradient_norm/var": 1.5610065460205078, "masked_sentence_fisher_curvature": 4813.9794921875, "masked_sentence_fisher_curvature/max": 16896.0, "masked_sentence_fisher_curvature/median": 3504.0, "masked_sentence_fisher_curvature/min": 169.0, "masked_sentence_fisher_curvature/p25": 947.0, "masked_sentence_fisher_curvature/p75": 7920.0, "masked_sentence_fisher_curvature/p85": 10288.0, "masked_sentence_fisher_curvature/p90": 11200.0, "masked_sentence_fisher_curvature/p95": 15248.0, "masked_sentence_fisher_curvature/p99": 15923.203125, "masked_sentence_fisher_curvature/var": 22080070.0, "masked_sentence_fisher_kl_divergence": 1.728263576694644e-08, "masked_sentence_fisher_kl_divergence/max": 6.05359673500061e-08, "masked_sentence_fisher_kl_divergence/median": 1.257285475730896e-08, "masked_sentence_fisher_kl_divergence/min": 6.075424607843161e-10, "masked_sentence_fisher_kl_divergence/p25": 3.4015101846307516e-09, "masked_sentence_fisher_kl_divergence/p75": 2.8463546186685562e-08, "masked_sentence_fisher_kl_divergence/p85": 3.6961864680051804e-08, "masked_sentence_fisher_kl_divergence/p90": 4.0279701352119446e-08, "masked_sentence_fisher_kl_divergence/p95": 5.477340891957283e-08, "masked_sentence_fisher_kl_divergence/p99": 5.72181413360795e-08, "masked_sentence_fisher_kl_divergence/var": 2.8486713279875144e-16, "masked_sentence_full_gradient_variance/max_squared_error": 34.380348205566406, "masked_sentence_full_gradient_variance/metric": 34.380348205566406, "masked_sentence_full_gradient_variance/p75": 34.380348205566406, "masked_sentence_full_gradient_variance/p90": 34.380348205566406, "masked_sentence_full_gradient_variance/p95": 34.380348205566406, "masked_sentence_full_gradient_variance/p99": 34.380348205566406, "masked_sentence_full_update_term": 0.0008996824617497623, "masked_sentence_full_update_term/max": 0.0029296875, "masked_sentence_full_update_term/median": 0.00079345703125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0013980865478515625, "masked_sentence_full_update_term/p85": 0.00191497802734375, "masked_sentence_full_update_term/p90": 0.0021820068359375, "masked_sentence_full_update_term/p95": 0.0025787353515625, "masked_sentence_full_update_term/p99": 0.0029006958939135075, "masked_sentence_full_update_term/var": 7.618252197971742e-07, "masked_sentence_hessian_coeff": -57320.66796875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -51456.0, "masked_sentence_hessian_coeff/min": -176128.0, "masked_sentence_hessian_coeff/p25": -102912.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 3255034368.0, "masked_sentence_hessian_coeff_abs": 57320.66796875, "masked_sentence_hessian_coeff_abs/max": 176128.0, "masked_sentence_hessian_coeff_abs/median": 51200.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 102912.0, "masked_sentence_hessian_coeff_abs/p99": 176128.0, "masked_sentence_hessian_coeff_abs/var": 3255034368.0, "masked_token_fisher_curvature": 4659.8974609375, "masked_token_fisher_curvature/max": 2670592.0, "masked_token_fisher_curvature/median": 4.2859867131067597e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.6026624368214911e-24, "masked_token_fisher_curvature/p75": 1.5631940186722204e-13, "masked_token_fisher_curvature/p85": 1.1723599868673773e-10, "masked_token_fisher_curvature/p90": 3.632158041000366e-08, "masked_token_fisher_curvature/p95": 0.00213623046875, "masked_token_fisher_curvature/p99": 25254.0, "masked_token_fisher_curvature/var": 5464350208.0, "masked_token_fisher_kl_divergence": 1.6724150952995842e-08, "masked_token_fisher_kl_divergence/max": 9.59634780883789e-06, "masked_token_fisher_kl_divergence/median": 1.5407439555097887e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 5.759922319029209e-36, "masked_token_fisher_kl_divergence/p75": 5.62224322594636e-25, "masked_token_fisher_kl_divergence/p85": 4.215389949752656e-22, "masked_token_fisher_kl_divergence/p90": 1.3044307387716225e-19, "masked_token_fisher_kl_divergence/p95": 7.66053886991358e-15, "masked_token_fisher_kl_divergence/p99": 9.04765329323709e-08, "masked_token_fisher_kl_divergence/var": 7.038569353008325e-14, "masked_token_full_update_term": 1.3351241250347812e-05, "masked_token_full_update_term/max": 0.0042724609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.296881914138794e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.93275428784068e-16, "masked_token_full_update_term/p85": 1.420218109782212e-13, "masked_token_full_update_term/p90": 3.979039320256561e-12, "masked_token_full_update_term/p95": 1.811713445931673e-09, "masked_token_full_update_term/p99": 0.00024387240409851074, "masked_token_full_update_term/var": 2.800873488695288e-08, "masked_token_hessian_coeff": -62031.796875, "masked_token_hessian_coeff/max": 1248.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -17956864.0, "masked_token_hessian_coeff/p25": -2.477318048477173e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.002960205078125, "masked_token_hessian_coeff/var": 575135875072.0, "masked_token_hessian_coeff_abs": 62032.23046875, "masked_token_hessian_coeff_abs/max": 17956864.0, "masked_token_hessian_coeff_abs/median": 1.8417267710901797e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.592802047729492e-06, "masked_token_hessian_coeff_abs/p99": 1120000.0, "masked_token_hessian_coeff_abs/var": 575135809536.0, "mean_logprobs": -0.00860595703125, "mean_logprobs/var": 2.6464462280273438e-05, "num_completions/total": 7296, "per_sentence_gradient_norm": 36.244140625, "per_sentence_gradient_norm/max": 157.0, "per_sentence_gradient_norm/median": 21.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 51.3125, "per_sentence_gradient_norm/var": 1851.9906005859375, "per_token_feature_norm": 190.2818145751953, "per_token_feature_norm/max": 260.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 106.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 145.7677459716797, "per_token_gradient_norm": 0.784809410572052, "per_token_gradient_norm/max": 278.0, "per_token_gradient_norm/median": 1.2079226507921703e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.57160684466362e-08, "per_token_gradient_norm/var": 107.33507537841797, "per_token_policy_error_norm": 0.005416499450802803, "per_token_policy_error_norm/max": 1.953125, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.00532033946365118, "policy_entropy": 0.009334779344499111, "policy_entropy/max": 1.7421875, "policy_entropy/median": 2.8194335754960775e-10, "policy_entropy/min": 2.0117032497289633e-20, "policy_entropy/p25": 5.933031843596837e-13, "policy_entropy/p75": 8.940696716308594e-08, "policy_entropy/var": 0.004728489089757204, "policy_loss": -0.6875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.21710526943206787, "policy_sharpness": 9.725627899169922, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7237046957015991, "reward": 0.6875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.21710526943206787, "rewards/accuracy_reward": 0.6875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.21710526943206787, "sentence_fisher_curvature": 333112.78125, "sentence_fisher_curvature/max": 1212416.0, "sentence_fisher_curvature/median": 270336.0, "sentence_fisher_curvature/min": 386.0, "sentence_fisher_curvature/p25": 6856.0, "sentence_fisher_curvature/p75": 537600.0, "sentence_fisher_curvature/p85": 731136.0, "sentence_fisher_curvature/p90": 831488.0, "sentence_fisher_curvature/p95": 1124352.0, "sentence_fisher_curvature/p99": 1204633.625, "sentence_fisher_curvature/var": 122287063040.0, "sentence_fisher_kl_divergence": 1.19551202715229e-06, "sentence_fisher_kl_divergence/max": 4.351139068603516e-06, "sentence_fisher_kl_divergence/median": 9.685754776000977e-07, "sentence_fisher_kl_divergence/min": 1.382431946694851e-09, "sentence_fisher_kl_divergence/p25": 2.463639248162508e-08, "sentence_fisher_kl_divergence/p75": 1.9259750843048096e-06, "sentence_fisher_kl_divergence/p85": 2.6300549507141113e-06, "sentence_fisher_kl_divergence/p90": 2.9802322387695312e-06, "sentence_fisher_kl_divergence/p95": 4.030764102935791e-06, "sentence_fisher_kl_divergence/p99": 4.322826953284675e-06, "sentence_fisher_kl_divergence/var": 1.5741616994288665e-12, "sentence_full_gradient_variance/max_squared_error": 3023.336669921875, "sentence_full_gradient_variance/metric": 3023.336669921875, "sentence_full_gradient_variance/p75": 3023.336669921875, "sentence_full_gradient_variance/p90": 3023.336669921875, "sentence_full_gradient_variance/p95": 3023.336669921875, "sentence_full_gradient_variance/p99": 3023.336669921875, "sentence_full_update_term": 0.0070087118074297905, "sentence_full_update_term/max": 0.028076171875, "sentence_full_update_term/median": 0.004425048828125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.010772705078125, "sentence_full_update_term/p85": 0.014495849609375, "sentence_full_update_term/p90": 0.01898193359375, "sentence_full_update_term/p95": 0.0224609375, "sentence_full_update_term/p99": 0.025177011266350746, "sentence_full_update_term/var": 5.3637209930457175e-05, "sentence_hessian_coeff": 60661.5, "sentence_hessian_coeff/max": 1015808.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -268288.0, "sentence_hessian_coeff/p25": -65344.0, "sentence_hessian_coeff/p75": 128000.0, "sentence_hessian_coeff/p99": 786227.9375, "sentence_hessian_coeff/var": 54109233152.0, "sentence_hessian_coeff_abs": 146108.171875, "sentence_hessian_coeff_abs/max": 1015808.0, "sentence_hessian_coeff_abs/median": 83456.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 217344.0, "sentence_hessian_coeff_abs/p99": 786227.9375, "sentence_hessian_coeff_abs/var": 36255473664.0, "step": 76, "token_fisher_curvature": 298020.46875, "token_fisher_curvature/max": 202375168.0, "token_fisher_curvature/median": 4.980553729855286e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.783608195817466e-24, "token_fisher_curvature/p75": 1.9628743075372768e-13, "token_fisher_curvature/p85": 1.864464138634503e-10, "token_fisher_curvature/p90": 7.962808012962341e-08, "token_fisher_curvature/p95": 0.01628398895263672, "token_fisher_curvature/p99": 294912.0, "token_fisher_curvature/var": 30621692854272.0, "token_fisher_kl_divergence": 1.0696708159230184e-06, "token_fisher_kl_divergence/max": 0.00072479248046875, "token_fisher_kl_divergence/median": 1.787262988391355e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 6.394689268473244e-36, "token_fisher_kl_divergence/p75": 7.043959903771876e-25, "token_fisher_kl_divergence/p85": 6.683619349428464e-22, "token_fisher_kl_divergence/p90": 2.862971361719535e-19, "token_fisher_kl_divergence/p95": 5.835262828490784e-14, "token_fisher_kl_divergence/p99": 1.0579824447631836e-06, "token_fisher_kl_divergence/var": 3.94589194652184e-10, "token_full_update_term": 0.00011015161726390943, "token_full_update_term/max": 0.0390625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.296881914138794e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 8.083811398051921e-16, "token_full_update_term/p85": 1.9184653865522705e-13, "token_full_update_term/p90": 6.0254023992456496e-12, "token_full_update_term/p95": 4.48198989033699e-09, "token_full_update_term/p99": 0.00112152099609375, "token_full_update_term/var": 2.147349960068823e-06, "token_hessian_coeff": 15430.2646484375, "token_hessian_coeff/max": 200278016.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -26476544.0, "token_hessian_coeff/p25": -2.738088369369507e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00792384147644043, "token_hessian_coeff/var": 20350242914304.0, "token_hessian_coeff_abs": 295781.84375, "token_hessian_coeff_abs/max": 200278016.0, "token_hessian_coeff_abs/median": 2.2168933355715126e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.069639205932617e-06, "token_hessian_coeff_abs/p99": 4552832.0, "token_hessian_coeff_abs/var": 20262990905344.0 }, { "accuracy_reward": 0.59375, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24375000596046448, "adam_stats/lm_head/lr_effective_max": 1.1546385394467507e-05, "adam_stats/lm_head/lr_effective_mean": 1.4120041941234618e-12, "adam_stats/lm_head/lr_effective_min": -1.1866270142490976e-05, "adam_stats/lm_head/lr_effective_std": 2.971142691876594e-07, "adam_stats/lr_effective_max": 1.1862962310260627e-05, "adam_stats/lr_effective_mean": 1.0283524899679097e-11, "adam_stats/lr_effective_min": -1.2648909432755318e-05, "adam_stats/m_t_max": 0.0008235680288635194, "adam_stats/m_t_mean": -1.0805533984942217e-12, "adam_stats/m_t_min": -0.0008105637389235198, "adam_stats/v_t_max": 2.4936212867032737e-05, "adam_stats/v_t_mean": 1.7255715085984225e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.59375, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24375000596046448, "all_logprobs": -0.008823679760098457, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -9.25, "all_logprobs/p1": -0.1611328125, "all_logprobs/p10": -4.76837158203125e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -4.5299530029296875e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.013088286854326725, "clip_ratio": 0.0, "completion_length": 724.5729370117188, "completion_length/correct": 599.4210815429688, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 582.0, "completion_length/correct/min": 396.0, "completion_length/correct/p25": 485.0, "completion_length/correct/p75": 634.0, "completion_length/correct/var": 20349.85546875, "completion_length/incorrect": 907.4871826171875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 299.0, "completion_length/incorrect/p25": 859.5, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 39599.19921875, "completion_length/max": 1024.0, "completion_length/median": 634.0, "completion_length/min": 299.0, "completion_length/p25": 555.0, "completion_length/p75": 1003.0, "completion_length/var": 50968.421875, "curvature_clip_ratio_token_fisher": 0.003910349681973457, "curvature_clip_ratio_token_hessian": 0.0025158498901873827, "curvature_clip_ratio_total_fisher": 0.003910349681973457, "curvature_clip_ratio_total_full": 0.003910349681973457, "curvature_clip_ratio_total_hessian": 0.0025158498901873827, "epoch": 0.1232, "feature_vector_variance/max_squared_error": 72025.6484375, "feature_vector_variance/metric": 30739.9765625, "generated_tokens/total": 4289066.0, "global_fisher_curvature": 92672.0, "global_fisher_curvature/max": 92672.0, "global_fisher_curvature/median": 92672.0, "global_fisher_curvature/min": 92672.0, "global_fisher_curvature/p25": 92672.0, "global_fisher_curvature/p75": 92672.0, "global_fisher_curvature/p85": 92672.0, "global_fisher_curvature/p90": 92672.0, "global_fisher_curvature/p95": 92672.0, "global_fisher_curvature/p99": 92672.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.849847078323364e-07, "global_fisher_kl_divergence/max": 2.849847078323364e-07, "global_fisher_kl_divergence/median": 2.849847078323364e-07, "global_fisher_kl_divergence/min": 2.849847078323364e-07, "global_fisher_kl_divergence/p25": 2.849847078323364e-07, "global_fisher_kl_divergence/p75": 2.849847078323364e-07, "global_fisher_kl_divergence/p85": 2.849847078323364e-07, "global_fisher_kl_divergence/p90": 2.849847078323364e-07, "global_fisher_kl_divergence/p95": 2.849847078323364e-07, "global_fisher_kl_divergence/p99": 2.849847078323364e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.015869140625, "global_full_update_term/max": 0.015869140625, "global_full_update_term/median": 0.015869140625, "global_full_update_term/min": 0.015869140625, "global_full_update_term/p25": 0.015869140625, "global_full_update_term/p75": 0.015869140625, "global_full_update_term/p85": 0.015869140625, "global_full_update_term/p90": 0.015869140625, "global_full_update_term/p95": 0.015869140625, "global_full_update_term/p99": 0.015869140625, "global_full_update_term/var": NaN, "global_hessian_coeff": 14528.0, "global_hessian_coeff/max": 14528.0, "global_hessian_coeff/median": 14528.0, "global_hessian_coeff/min": 14528.0, "global_hessian_coeff/p25": 14528.0, "global_hessian_coeff/p75": 14528.0, "global_hessian_coeff/p99": 14528.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 14528.0, "global_hessian_coeff_abs/max": 14528.0, "global_hessian_coeff_abs/median": 14528.0, "global_hessian_coeff_abs/min": 14528.0, "global_hessian_coeff_abs/p25": 14528.0, "global_hessian_coeff_abs/p75": 14528.0, "global_hessian_coeff_abs/p99": 14528.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.12587007880210876, "learning_rate": 2.29006222155752e-06, "loss": -0.5938, "masked_global_fisher_curvature": 2768.0, "masked_global_fisher_curvature/max": 2768.0, "masked_global_fisher_curvature/median": 2768.0, "masked_global_fisher_curvature/min": 2768.0, "masked_global_fisher_curvature/p25": 2768.0, "masked_global_fisher_curvature/p75": 2768.0, "masked_global_fisher_curvature/p85": 2768.0, "masked_global_fisher_curvature/p90": 2768.0, "masked_global_fisher_curvature/p95": 2768.0, "masked_global_fisher_curvature/p99": 2768.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/max": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/median": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/min": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/p25": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/p75": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/p85": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/p90": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/p95": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/p99": 8.498318493366241e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00177764892578125, "masked_global_full_update_term/max": 0.00177764892578125, "masked_global_full_update_term/median": 0.00177764892578125, "masked_global_full_update_term/min": 0.00177764892578125, "masked_global_full_update_term/p25": 0.00177764892578125, "masked_global_full_update_term/p75": 0.00177764892578125, "masked_global_full_update_term/p85": 0.00177764892578125, "masked_global_full_update_term/p90": 0.00177764892578125, "masked_global_full_update_term/p95": 0.00177764892578125, "masked_global_full_update_term/p99": 0.00177764892578125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -13568.0, "masked_global_hessian_coeff/max": -13568.0, "masked_global_hessian_coeff/median": -13568.0, "masked_global_hessian_coeff/min": -13568.0, "masked_global_hessian_coeff/p25": -13568.0, "masked_global_hessian_coeff/p75": -13568.0, "masked_global_hessian_coeff/p99": -13568.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 13568.0, "masked_global_hessian_coeff_abs/max": 13568.0, "masked_global_hessian_coeff_abs/median": 13568.0, "masked_global_hessian_coeff_abs/min": 13568.0, "masked_global_hessian_coeff_abs/p25": 13568.0, "masked_global_hessian_coeff_abs/p75": 13568.0, "masked_global_hessian_coeff_abs/p99": 13568.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 4.7691650390625, "masked_per_sentence_gradient_norm/max": 24.125, "masked_per_sentence_gradient_norm/median": 2.484375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 6.109375, "masked_per_sentence_gradient_norm/var": 41.41403579711914, "masked_per_token_gradient_norm": 0.06859590113162994, "masked_per_token_gradient_norm/max": 37.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.1059455573558807e-09, "masked_per_token_gradient_norm/var": 1.1592283248901367, "masked_sentence_fisher_curvature": 4427.2919921875, "masked_sentence_fisher_curvature/max": 19072.0, "masked_sentence_fisher_curvature/median": 4032.0, "masked_sentence_fisher_curvature/min": 56.0, "masked_sentence_fisher_curvature/p25": 1222.0, "masked_sentence_fisher_curvature/p75": 5248.0, "masked_sentence_fisher_curvature/p85": 7432.0, "masked_sentence_fisher_curvature/p90": 8544.0, "masked_sentence_fisher_curvature/p95": 14784.0, "masked_sentence_fisher_curvature/p99": 17856.00390625, "masked_sentence_fisher_curvature/var": 16213054.0, "masked_sentence_fisher_kl_divergence": 1.3639332330228626e-08, "masked_sentence_fisher_kl_divergence/max": 5.8673322200775146e-08, "masked_sentence_fisher_kl_divergence/median": 1.2398231774568558e-08, "masked_sentence_fisher_kl_divergence/min": 1.7280399333685637e-10, "masked_sentence_fisher_kl_divergence/p25": 3.754394128918648e-09, "masked_sentence_fisher_kl_divergence/p75": 1.618172973394394e-08, "masked_sentence_fisher_kl_divergence/p85": 2.2904714569449425e-08, "masked_sentence_fisher_kl_divergence/p90": 2.6309862732887268e-08, "masked_sentence_fisher_kl_divergence/p95": 4.563480615615845e-08, "masked_sentence_fisher_kl_divergence/p99": 5.491311938499166e-08, "masked_sentence_fisher_kl_divergence/var": 1.539749461710872e-16, "masked_sentence_full_gradient_variance/max_squared_error": 61.336944580078125, "masked_sentence_full_gradient_variance/metric": 61.336944580078125, "masked_sentence_full_gradient_variance/p75": 61.336944580078125, "masked_sentence_full_gradient_variance/p90": 61.336944580078125, "masked_sentence_full_gradient_variance/p95": 61.336944580078125, "masked_sentence_full_gradient_variance/p99": 61.336944580078125, "masked_sentence_full_update_term": 0.0008711914415471256, "masked_sentence_full_update_term/max": 0.0050048828125, "masked_sentence_full_update_term/median": 0.000568389892578125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.001338958740234375, "masked_sentence_full_update_term/p85": 0.0020771026611328125, "masked_sentence_full_update_term/p90": 0.0022125244140625, "masked_sentence_full_update_term/p95": 0.003021240234375, "masked_sentence_full_update_term/p99": 0.004396059084683657, "masked_sentence_full_update_term/var": 1.1890341511389124e-06, "masked_sentence_hessian_coeff": -42241.3359375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -33536.0, "masked_sentence_hessian_coeff/min": -175104.0, "masked_sentence_hessian_coeff/p25": -67840.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 2136936448.0, "masked_sentence_hessian_coeff_abs": 42241.3359375, "masked_sentence_hessian_coeff_abs/max": 175104.0, "masked_sentence_hessian_coeff_abs/median": 31744.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 67840.0, "masked_sentence_hessian_coeff_abs/p99": 159539.25, "masked_sentence_hessian_coeff_abs/var": 2136936448.0, "masked_token_fisher_curvature": 4380.01318359375, "masked_token_fisher_curvature/max": 3063808.0, "masked_token_fisher_curvature/median": 1.1926223897340549e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 6.539896717997375e-24, "masked_token_fisher_curvature/p75": 5.950795411990839e-14, "masked_token_fisher_curvature/p85": 3.319655661471188e-11, "masked_token_fisher_curvature/p90": 5.710035111405887e-09, "masked_token_fisher_curvature/p95": 4.1961669921875e-05, "masked_token_fisher_curvature/p99": 13010.0, "masked_token_fisher_curvature/var": 6383507456.0, "masked_token_fisher_kl_divergence": 1.348746625495778e-08, "masked_token_fisher_kl_divergence/max": 9.417533874511719e-06, "masked_token_fisher_kl_divergence/median": 3.673133589935336e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.0124463286077562e-35, "masked_token_fisher_kl_divergence/p75": 1.8336913969681375e-25, "masked_token_fisher_kl_divergence/p85": 1.0215680565029892e-22, "masked_token_fisher_kl_divergence/p90": 1.7596613170840558e-20, "masked_token_fisher_kl_divergence/p95": 1.2923689896027213e-16, "masked_token_fisher_kl_divergence/p99": 4.00796125177294e-08, "masked_token_fisher_kl_divergence/var": 6.050918094488086e-14, "masked_token_full_update_term": 8.713673196325544e-06, "masked_token_full_update_term/max": 0.0042724609375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.0116567611694336e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.5261910618912253e-17, "masked_token_full_update_term/p85": 1.2712053631958042e-14, "masked_token_full_update_term/p90": 2.469136006766348e-13, "masked_token_full_update_term/p95": 3.2741809263825417e-11, "masked_token_full_update_term/p99": 6.530433893203735e-06, "masked_token_full_update_term/var": 1.909569924407606e-08, "masked_token_hessian_coeff": -42748.14453125, "masked_token_hessian_coeff/max": 760.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -19136512.0, "masked_token_hessian_coeff/p25": -1.1117663234472275e-08, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.00127410888671875, "masked_token_hessian_coeff/var": 426945544192.0, "masked_token_hessian_coeff_abs": 42748.24609375, "masked_token_hessian_coeff_abs/max": 19136512.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.0070001482963562e-07, "masked_token_hessian_coeff_abs/p99": 14536.0, "masked_token_hessian_coeff_abs/var": 426945544192.0, "mean_logprobs": -0.00860595703125, "mean_logprobs/var": 4.291534423828125e-05, "num_completions/total": 7392, "per_sentence_gradient_norm": 35.80924606323242, "per_sentence_gradient_norm/max": 200.0, "per_sentence_gradient_norm/median": 24.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 57.6875, "per_sentence_gradient_norm/var": 2014.6409912109375, "per_token_feature_norm": 189.84130859375, "per_token_feature_norm/max": 280.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 101.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 131.58311462402344, "per_token_gradient_norm": 0.5060639977455139, "per_token_gradient_norm/max": 262.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.229636836796999e-09, "per_token_gradient_norm/var": 63.46434020996094, "per_token_policy_error_norm": 0.005190596915781498, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004877733998000622, "policy_entropy": 0.009369797073304653, "policy_entropy/max": 1.6484375, "policy_entropy/median": 4.420144250616431e-10, "policy_entropy/min": 2.0964065444543933e-20, "policy_entropy/p25": 1.3571366253017914e-12, "policy_entropy/p75": 6.845220923423767e-08, "policy_entropy/var": 0.005000210367143154, "policy_loss": -0.59375, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24375000596046448, "policy_sharpness": 9.753591537475586, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6120655536651611, "reward": 0.59375, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24375000596046448, "rewards/accuracy_reward": 0.59375, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24375000596046448, "sentence_fisher_curvature": 221842.171875, "sentence_fisher_curvature/max": 1253376.0, "sentence_fisher_curvature/median": 166912.0, "sentence_fisher_curvature/min": 56.0, "sentence_fisher_curvature/p25": 3076.0, "sentence_fisher_curvature/p75": 361984.0, "sentence_fisher_curvature/p85": 519168.0, "sentence_fisher_curvature/p90": 589824.0, "sentence_fisher_curvature/p95": 746496.0, "sentence_fisher_curvature/p99": 1136640.375, "sentence_fisher_curvature/var": 75939463168.0, "sentence_fisher_kl_divergence": 6.828571486039436e-07, "sentence_fisher_kl_divergence/max": 3.844499588012695e-06, "sentence_fisher_kl_divergence/median": 5.140900611877441e-07, "sentence_fisher_kl_divergence/min": 1.7280399333685637e-10, "sentence_fisher_kl_divergence/p25": 9.47329681366682e-09, "sentence_fisher_kl_divergence/p75": 1.1138617992401123e-06, "sentence_fisher_kl_divergence/p85": 1.5981495380401611e-06, "sentence_fisher_kl_divergence/p90": 1.817941665649414e-06, "sentence_fisher_kl_divergence/p95": 2.298504114151001e-06, "sentence_fisher_kl_divergence/p99": 3.504754204186611e-06, "sentence_fisher_kl_divergence/var": 7.193492221162223e-13, "sentence_full_gradient_variance/max_squared_error": 3207.95703125, "sentence_full_gradient_variance/metric": 3207.95703125, "sentence_full_gradient_variance/p75": 3207.95703125, "sentence_full_gradient_variance/p90": 3207.95703125, "sentence_full_gradient_variance/p95": 3207.95703125, "sentence_full_gradient_variance/p99": 3207.95703125, "sentence_full_update_term": 0.007030169479548931, "sentence_full_update_term/max": 0.038330078125, "sentence_full_update_term/median": 0.00506591796875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.01080322265625, "sentence_full_update_term/p85": 0.014556884765625, "sentence_full_update_term/p90": 0.017822265625, "sentence_full_update_term/p95": 0.025360107421875, "sentence_full_update_term/p99": 0.03276368975639343, "sentence_full_update_term/var": 7.182125409599394e-05, "sentence_hessian_coeff": 4345.33349609375, "sentence_hessian_coeff/max": 716800.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -311296.0, "sentence_hessian_coeff/p25": -90624.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 561152.5, "sentence_hessian_coeff/var": 21450240000.0, "sentence_hessian_coeff_abs": 83848.0, "sentence_hessian_coeff_abs/max": 716800.0, "sentence_hessian_coeff_abs/median": 40192.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 117632.0, "sentence_hessian_coeff_abs/p99": 561152.5, "sentence_hessian_coeff_abs/var": 14364826624.0, "step": 77, "token_fisher_curvature": 179612.71875, "token_fisher_curvature/max": 198180864.0, "token_fisher_curvature/median": 1.2874900798265365e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 6.927637630131607e-24, "token_fisher_curvature/p75": 7.105427357601002e-14, "token_fisher_curvature/p85": 4.615685611497611e-11, "token_fisher_curvature/p90": 9.022187441587448e-09, "token_fisher_curvature/p95": 0.0001293271780014038, "token_fisher_curvature/p99": 76288.0, "token_fisher_curvature/var": 15819010473984.0, "token_fisher_kl_divergence": 5.527805342353531e-07, "token_fisher_kl_divergence/max": 0.0006103515625, "token_fisher_kl_divergence/median": 3.9689564293932156e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.134697741093274e-35, "token_fisher_kl_divergence/p75": 2.1810426307550534e-25, "token_fisher_kl_divergence/p85": 1.4227506535912076e-22, "token_fisher_kl_divergence/p90": 2.7740329022578336e-20, "token_fisher_kl_divergence/p95": 3.9901350452897777e-16, "token_fisher_kl_divergence/p99": 2.3469328880310059e-07, "token_fisher_kl_divergence/var": 1.4986858976051565e-10, "token_full_update_term": 6.596493039978668e-05, "token_full_update_term/max": 0.035400390625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.0116567611694336e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 3.122502256758253e-17, "token_full_update_term/p85": 1.5765166949677223e-14, "token_full_update_term/p90": 3.2152058793144533e-13, "token_full_update_term/p95": 5.482014842073113e-11, "token_full_update_term/p99": 0.00015926361083984375, "token_full_update_term/var": 1.102187525248155e-06, "token_hessian_coeff": -13310.7685546875, "token_hessian_coeff/max": 195035136.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -26869760.0, "token_hessian_coeff/p25": -1.234002411365509e-08, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00201416015625, "token_hessian_coeff/var": 9870910685184.0, "token_hessian_coeff_abs": 165025.6875, "token_hessian_coeff_abs/max": 195035136.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.2724270820617676e-07, "token_hessian_coeff_abs/p99": 736640.0, "token_hessian_coeff_abs/var": 9843853230080.0 }, { "accuracy_reward": 0.8854166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.10252193361520767, "adam_stats/lm_head/lr_effective_max": 9.941762982634827e-06, "adam_stats/lm_head/lr_effective_mean": 4.190018557809205e-12, "adam_stats/lm_head/lr_effective_min": -1.0615819519443903e-05, "adam_stats/lm_head/lr_effective_std": 2.890762118568091e-07, "adam_stats/lr_effective_max": 1.1249891031184234e-05, "adam_stats/lr_effective_mean": 5.850948198160566e-12, "adam_stats/lr_effective_min": -1.1070844266214408e-05, "adam_stats/m_t_max": 0.0006275332416407764, "adam_stats/m_t_mean": -1.4036295997027493e-12, "adam_stats/m_t_min": -0.0005677641602233052, "adam_stats/v_t_max": 2.491468512744177e-05, "adam_stats/v_t_mean": 1.7259361257890293e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8854166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.10252193361520767, "all_logprobs": -0.007974243722856045, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.0, "all_logprobs/p1": -0.13917958736419678, "all_logprobs/p10": -7.152557373046875e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -5.817413330078125e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012237329967319965, "clip_ratio": 0.0, "completion_length": 552.7604370117188, "completion_length/correct": 532.4235229492188, "completion_length/correct/max": 954.0, "completion_length/correct/median": 503.0, "completion_length/correct/min": 261.0, "completion_length/correct/p25": 419.0, "completion_length/correct/p75": 634.0, "completion_length/correct/var": 36668.26953125, "completion_length/incorrect": 709.9091186523438, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 632.0, "completion_length/incorrect/min": 498.0, "completion_length/incorrect/p25": 580.5, "completion_length/incorrect/p75": 838.0, "completion_length/incorrect/var": 42940.0859375, "completion_length/max": 1024.0, "completion_length/median": 508.0, "completion_length/min": 261.0, "completion_length/p25": 424.75, "completion_length/p75": 645.25, "completion_length/var": 40172.0390625, "curvature_clip_ratio_token_fisher": 0.007556769996881485, "curvature_clip_ratio_token_hessian": 0.004145858809351921, "curvature_clip_ratio_total_fisher": 0.007556769996881485, "curvature_clip_ratio_total_full": 0.007556769996881485, "curvature_clip_ratio_total_hessian": 0.004145858809351921, "epoch": 0.1248, "feature_vector_variance/max_squared_error": 54701.390625, "feature_vector_variance/metric": 31161.33203125, "generated_tokens/total": 4342131.0, "global_fisher_curvature": 173056.0, "global_fisher_curvature/max": 173056.0, "global_fisher_curvature/median": 173056.0, "global_fisher_curvature/min": 173056.0, "global_fisher_curvature/p25": 173056.0, "global_fisher_curvature/p75": 173056.0, "global_fisher_curvature/p85": 173056.0, "global_fisher_curvature/p90": 173056.0, "global_fisher_curvature/p95": 173056.0, "global_fisher_curvature/p99": 173056.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.544854164123535e-07, "global_fisher_kl_divergence/max": 4.544854164123535e-07, "global_fisher_kl_divergence/median": 4.544854164123535e-07, "global_fisher_kl_divergence/min": 4.544854164123535e-07, "global_fisher_kl_divergence/p25": 4.544854164123535e-07, "global_fisher_kl_divergence/p75": 4.544854164123535e-07, "global_fisher_kl_divergence/p85": 4.544854164123535e-07, "global_fisher_kl_divergence/p90": 4.544854164123535e-07, "global_fisher_kl_divergence/p95": 4.544854164123535e-07, "global_fisher_kl_divergence/p99": 4.544854164123535e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.017578125, "global_full_update_term/max": 0.017578125, "global_full_update_term/median": 0.017578125, "global_full_update_term/min": 0.017578125, "global_full_update_term/p25": 0.017578125, "global_full_update_term/p75": 0.017578125, "global_full_update_term/p85": 0.017578125, "global_full_update_term/p90": 0.017578125, "global_full_update_term/p95": 0.017578125, "global_full_update_term/p99": 0.017578125, "global_full_update_term/var": NaN, "global_hessian_coeff": 51200.0, "global_hessian_coeff/max": 51200.0, "global_hessian_coeff/median": 51200.0, "global_hessian_coeff/min": 51200.0, "global_hessian_coeff/p25": 51200.0, "global_hessian_coeff/p75": 51200.0, "global_hessian_coeff/p99": 51200.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 51200.0, "global_hessian_coeff_abs/max": 51200.0, "global_hessian_coeff_abs/median": 51200.0, "global_hessian_coeff_abs/min": 51200.0, "global_hessian_coeff_abs/p25": 51200.0, "global_hessian_coeff_abs/p75": 51200.0, "global_hessian_coeff_abs/p99": 51200.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.12133652716875076, "learning_rate": 2.104951497460118e-06, "loss": -0.8854, "masked_global_fisher_curvature": 3104.0, "masked_global_fisher_curvature/max": 3104.0, "masked_global_fisher_curvature/median": 3104.0, "masked_global_fisher_curvature/min": 3104.0, "masked_global_fisher_curvature/p25": 3104.0, "masked_global_fisher_curvature/p75": 3104.0, "masked_global_fisher_curvature/p85": 3104.0, "masked_global_fisher_curvature/p90": 3104.0, "masked_global_fisher_curvature/p95": 3104.0, "masked_global_fisher_curvature/p99": 3104.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/max": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/median": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/min": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p25": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p75": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p85": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p90": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p95": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/p99": 8.149072527885437e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.003387451171875, "masked_global_full_update_term/max": 0.003387451171875, "masked_global_full_update_term/median": 0.003387451171875, "masked_global_full_update_term/min": 0.003387451171875, "masked_global_full_update_term/p25": 0.003387451171875, "masked_global_full_update_term/p75": 0.003387451171875, "masked_global_full_update_term/p85": 0.003387451171875, "masked_global_full_update_term/p90": 0.003387451171875, "masked_global_full_update_term/p95": 0.003387451171875, "masked_global_full_update_term/p99": 0.003387451171875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -22144.0, "masked_global_hessian_coeff/max": -22144.0, "masked_global_hessian_coeff/median": -22144.0, "masked_global_hessian_coeff/min": -22144.0, "masked_global_hessian_coeff/p25": -22144.0, "masked_global_hessian_coeff/p75": -22144.0, "masked_global_hessian_coeff/p99": -22144.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 22144.0, "masked_global_hessian_coeff_abs/max": 22144.0, "masked_global_hessian_coeff_abs/median": 22144.0, "masked_global_hessian_coeff_abs/min": 22144.0, "masked_global_hessian_coeff_abs/p25": 22144.0, "masked_global_hessian_coeff_abs/p75": 22144.0, "masked_global_hessian_coeff_abs/p99": 22144.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 5.9609375, "masked_per_sentence_gradient_norm/max": 26.0, "masked_per_sentence_gradient_norm/median": 4.53125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 2.9375, "masked_per_sentence_gradient_norm/p75": 8.875, "masked_per_sentence_gradient_norm/var": 26.745798110961914, "masked_per_token_gradient_norm": 0.11709114909172058, "masked_per_token_gradient_norm/max": 36.5, "masked_per_token_gradient_norm/median": 1.9736035028472543e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 1.687538997430238e-14, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.4901161193847656e-07, "masked_per_token_gradient_norm/var": 1.803993821144104, "masked_sentence_fisher_curvature": 5286.55224609375, "masked_sentence_fisher_curvature/max": 23168.0, "masked_sentence_fisher_curvature/median": 3648.0, "masked_sentence_fisher_curvature/min": 32.5, "masked_sentence_fisher_curvature/p25": 1888.0, "masked_sentence_fisher_curvature/p75": 7512.0, "masked_sentence_fisher_curvature/p85": 9920.0, "masked_sentence_fisher_curvature/p90": 10976.0, "masked_sentence_fisher_curvature/p95": 14176.0, "masked_sentence_fisher_curvature/p99": 22560.001953125, "masked_sentence_fisher_curvature/var": 23407618.0, "masked_sentence_fisher_kl_divergence": 1.3861376935153658e-08, "masked_sentence_fisher_kl_divergence/max": 6.05359673500061e-08, "masked_sentence_fisher_kl_divergence/median": 9.546056389808655e-09, "masked_sentence_fisher_kl_divergence/min": 8.503775461576879e-11, "masked_sentence_fisher_kl_divergence/p25": 4.94765117764473e-09, "masked_sentence_fisher_kl_divergence/p75": 1.9703293219208717e-08, "masked_sentence_fisher_kl_divergence/p85": 2.601882442831993e-08, "masked_sentence_fisher_kl_divergence/p90": 2.8812792152166367e-08, "masked_sentence_fisher_kl_divergence/p95": 3.719469532370567e-08, "masked_sentence_fisher_kl_divergence/p99": 5.920883694443546e-08, "masked_sentence_fisher_kl_divergence/var": 1.6093129689464175e-16, "masked_sentence_full_gradient_variance/max_squared_error": 59.718719482421875, "masked_sentence_full_gradient_variance/metric": 59.718719482421875, "masked_sentence_full_gradient_variance/p75": 59.718719482421875, "masked_sentence_full_gradient_variance/p90": 59.718719482421875, "masked_sentence_full_gradient_variance/p95": 59.718719482421875, "masked_sentence_full_gradient_variance/p99": 59.718719482421875, "masked_sentence_full_update_term": 0.0010965144028887153, "masked_sentence_full_update_term/max": 0.005096435546875, "masked_sentence_full_update_term/median": 0.00080108642578125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0004482269287109375, "masked_sentence_full_update_term/p75": 0.001499176025390625, "masked_sentence_full_update_term/p85": 0.00206756591796875, "masked_sentence_full_update_term/p90": 0.00247955322265625, "masked_sentence_full_update_term/p95": 0.003025054931640625, "masked_sentence_full_update_term/p99": 0.0038787880912423134, "masked_sentence_full_update_term/var": 9.38192158628226e-07, "masked_sentence_hessian_coeff": -62949.16796875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -64000.0, "masked_sentence_hessian_coeff/min": -223232.0, "masked_sentence_hessian_coeff/p25": -84224.0, "masked_sentence_hessian_coeff/p75": -28032.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 2320643840.0, "masked_sentence_hessian_coeff_abs": 62949.16796875, "masked_sentence_hessian_coeff_abs/max": 223232.0, "masked_sentence_hessian_coeff_abs/median": 63232.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 28032.0, "masked_sentence_hessian_coeff_abs/p75": 84224.0, "masked_sentence_hessian_coeff_abs/p99": 198912.078125, "masked_sentence_hessian_coeff_abs/var": 2320643840.0, "masked_token_fisher_curvature": 5717.380859375, "masked_token_fisher_curvature/max": 3719168.0, "masked_token_fisher_curvature/median": 6.471331717022855e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.4928025117167921e-24, "masked_token_fisher_curvature/p75": 7.438494264988549e-14, "masked_token_fisher_curvature/p85": 5.161382432561368e-11, "masked_token_fisher_curvature/p90": 1.4551915228366852e-08, "masked_token_fisher_curvature/p95": 0.00012302398681640625, "masked_token_fisher_curvature/p99": 27264.0, "masked_token_fisher_curvature/var": 9127190528.0, "masked_token_fisher_kl_divergence": 1.499607726884733e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 1.7009813268828067e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.92615113174644e-36, "masked_token_fisher_kl_divergence/p75": 1.9528409480927191e-25, "masked_token_fisher_kl_divergence/p85": 1.3565762045869654e-22, "masked_token_fisher_kl_divergence/p90": 3.8116482626443515e-20, "masked_token_fisher_kl_divergence/p95": 3.226585665316861e-16, "masked_token_fisher_kl_divergence/p99": 7.171183824539185e-08, "masked_token_fisher_kl_divergence/var": 6.278688642136557e-14, "masked_token_full_update_term": 1.4020960406924132e-05, "masked_token_full_update_term/max": 0.004241943359375, "masked_token_full_update_term/median": 2.0413494028828638e-19, "masked_token_full_update_term/min": -1.9669532775878906e-06, "masked_token_full_update_term/p25": -5.1543691919710537e-23, "masked_token_full_update_term/p75": 1.5987211554602254e-14, "masked_token_full_update_term/p85": 8.029132914089132e-13, "masked_token_full_update_term/p90": 1.830358087318018e-11, "masked_token_full_update_term/p95": 3.434251993894577e-09, "masked_token_full_update_term/p99": 0.0002593994140625, "masked_token_full_update_term/var": 2.640149787680457e-08, "masked_token_hessian_coeff": -75193.34375, "masked_token_hessian_coeff/max": 5760.0, "masked_token_hessian_coeff/median": -1.0186340659856796e-10, "masked_token_hessian_coeff/min": -19922944.0, "masked_token_hessian_coeff/p25": -4.112720489501953e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.06591796875, "masked_token_hessian_coeff/var": 694902325248.0, "masked_token_hessian_coeff_abs": 75195.5390625, "masked_token_hessian_coeff_abs/max": 19922944.0, "masked_token_hessian_coeff_abs/median": 3.166496753692627e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 2.5295321393059567e-12, "masked_token_hessian_coeff_abs/p75": 3.74913215637207e-05, "masked_token_hessian_coeff_abs/p99": 1654784.0, "masked_token_hessian_coeff_abs/var": 694901997568.0, "mean_logprobs": -0.008544921875, "mean_logprobs/var": 4.5299530029296875e-05, "num_completions/total": 7488, "per_sentence_gradient_norm": 49.32487106323242, "per_sentence_gradient_norm/max": 298.0, "per_sentence_gradient_norm/median": 31.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 13.625, "per_sentence_gradient_norm/p75": 78.625, "per_sentence_gradient_norm/var": 2624.98779296875, "per_token_feature_norm": 190.56423950195312, "per_token_feature_norm/max": 242.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 107.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 148.2781524658203, "per_token_gradient_norm": 0.9726887941360474, "per_token_gradient_norm/max": 304.0, "per_token_gradient_norm/median": 2.1464074961841106e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 1.8096635301390052e-14, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.7601996660232544e-07, "per_token_gradient_norm/var": 130.90333557128906, "per_token_policy_error_norm": 0.004702884703874588, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.00460317125543952, "policy_entropy": 0.008343495428562164, "policy_entropy/max": 1.7421875, "policy_entropy/median": 4.0381564758718014e-10, "policy_entropy/min": 1.5775988642611344e-20, "policy_entropy/p25": 7.389644451905042e-13, "policy_entropy/p75": 8.568167686462402e-08, "policy_entropy/var": 0.0043138545006513596, "policy_loss": -0.8854166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.10252193361520767, "policy_sharpness": 9.747294425964355, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6509712934494019, "reward": 0.8854166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.10252193361520767, "rewards/accuracy_reward": 0.8854166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.10252193361520767, "sentence_fisher_curvature": 402056.0625, "sentence_fisher_curvature/max": 1679360.0, "sentence_fisher_curvature/median": 352256.0, "sentence_fisher_curvature/min": 828.0, "sentence_fisher_curvature/p25": 76800.0, "sentence_fisher_curvature/p75": 585728.0, "sentence_fisher_curvature/p85": 676864.0, "sentence_fisher_curvature/p90": 948224.0, "sentence_fisher_curvature/p95": 1091584.0, "sentence_fisher_curvature/p99": 1469235.875, "sentence_fisher_curvature/var": 135297826816.0, "sentence_fisher_kl_divergence": 1.0540272796788486e-06, "sentence_fisher_kl_divergence/max": 4.410743713378906e-06, "sentence_fisher_kl_divergence/median": 9.238719940185547e-07, "sentence_fisher_kl_divergence/min": 2.168235369026661e-09, "sentence_fisher_kl_divergence/p25": 2.0116567611694336e-07, "sentence_fisher_kl_divergence/p75": 1.5348196029663086e-06, "sentence_fisher_kl_divergence/p85": 1.773238182067871e-06, "sentence_fisher_kl_divergence/p90": 2.4884939193725586e-06, "sentence_fisher_kl_divergence/p95": 2.864748239517212e-06, "sentence_fisher_kl_divergence/p99": 3.844501407002099e-06, "sentence_fisher_kl_divergence/var": 9.296070857534033e-13, "sentence_full_gradient_variance/max_squared_error": 4962.0869140625, "sentence_full_gradient_variance/metric": 4962.0869140625, "sentence_full_gradient_variance/p75": 4962.0869140625, "sentence_full_gradient_variance/p90": 4962.0869140625, "sentence_full_gradient_variance/p95": 4962.0869140625, "sentence_full_gradient_variance/p99": 4962.0869140625, "sentence_full_update_term": 0.009404420852661133, "sentence_full_update_term/max": 0.058349609375, "sentence_full_update_term/median": 0.006439208984375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0028533935546875, "sentence_full_update_term/p75": 0.0152435302734375, "sentence_full_update_term/p85": 0.019622802734375, "sentence_full_update_term/p90": 0.0225830078125, "sentence_full_update_term/p95": 0.024169921875, "sentence_full_update_term/p99": 0.0284302718937397, "sentence_full_update_term/var": 8.783055818639696e-05, "sentence_hessian_coeff": 47460.91796875, "sentence_hessian_coeff/max": 1171456.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -309248.0, "sentence_hessian_coeff/p25": -104960.0, "sentence_hessian_coeff/p75": 129664.0, "sentence_hessian_coeff/p99": 883508.125, "sentence_hessian_coeff/var": 65735766016.0, "sentence_hessian_coeff_abs": 172635.59375, "sentence_hessian_coeff_abs/max": 1171456.0, "sentence_hessian_coeff_abs/median": 113152.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 52480.0, "sentence_hessian_coeff_abs/p75": 232448.0, "sentence_hessian_coeff_abs/p99": 883508.125, "sentence_hessian_coeff_abs/var": 37895245824.0, "step": 78, "token_fisher_curvature": 365298.40625, "token_fisher_curvature/max": 200278016.0, "token_fisher_curvature/median": 7.386127300057499e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.6543612251060553e-24, "token_fisher_curvature/p75": 9.769962616701378e-14, "token_fisher_curvature/p85": 9.686118573881686e-11, "token_fisher_curvature/p90": 3.864988684654236e-08, "token_fisher_curvature/p95": 0.0019683837890625, "token_fisher_curvature/p99": 712704.0, "token_fisher_curvature/var": 36648112881664.0, "token_fisher_kl_divergence": 9.58066038947436e-07, "token_fisher_kl_divergence/max": 0.00052642822265625, "token_fisher_kl_divergence/median": 1.9351744081202946e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.349329098042464e-36, "token_fisher_kl_divergence/p75": 2.568783542889285e-25, "token_fisher_kl_divergence/p85": 2.5477162866633252e-22, "token_fisher_kl_divergence/p90": 1.0122043719688889e-19, "token_fisher_kl_divergence/p95": 5.162537064506978e-15, "token_fisher_kl_divergence/p99": 1.8700957298278809e-06, "token_fisher_kl_divergence/var": 2.5206703391233987e-10, "token_full_update_term": 0.00011761149653466418, "token_full_update_term/max": 0.032958984375, "token_full_update_term/median": 2.676624113323589e-19, "token_full_update_term/min": -1.9669532775878906e-06, "token_full_update_term/p25": -3.143286327701505e-23, "token_full_update_term/p75": 1.8540724511240114e-14, "token_full_update_term/p85": 1.0871303857129533e-12, "token_full_update_term/p90": 3.092281986027956e-11, "token_full_update_term/p95": 1.2398231774568558e-08, "token_full_update_term/p99": 0.001793503761291504, "token_full_update_term/var": 1.9200499536964344e-06, "token_hessian_coeff": 4346.58984375, "token_hessian_coeff/max": 193986560.0, "token_hessian_coeff/median": -1.1323209037072957e-10, "token_hessian_coeff/min": -27525120.0, "token_hessian_coeff/p25": -4.649162292480469e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.1650390625, "token_hessian_coeff/var": 24437619425280.0, "token_hessian_coeff_abs": 366772.21875, "token_hessian_coeff_abs/max": 193986560.0, "token_hessian_coeff_abs/median": 3.4691765904426575e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 2.8137492336099967e-12, "token_hessian_coeff_abs/p75": 4.38690185546875e-05, "token_hessian_coeff_abs/p99": 9133056.0, "token_hessian_coeff_abs/var": 24303114387456.0 }, { "accuracy_reward": 0.8125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.1539473831653595, "adam_stats/lm_head/lr_effective_max": 9.495853191765491e-06, "adam_stats/lm_head/lr_effective_mean": 8.122371698837672e-12, "adam_stats/lm_head/lr_effective_min": -9.797890015761368e-06, "adam_stats/lm_head/lr_effective_std": 2.811330261920375e-07, "adam_stats/lr_effective_max": 1.0555881999607664e-05, "adam_stats/lr_effective_mean": -2.1107044063939373e-12, "adam_stats/lr_effective_min": -1.0391720934421755e-05, "adam_stats/m_t_max": 0.0005853792536072433, "adam_stats/m_t_mean": 1.9531171385062107e-12, "adam_stats/m_t_min": -0.0007993789040483534, "adam_stats/v_t_max": 2.4895880414987914e-05, "adam_stats/v_t_mean": 1.72793116620662e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.1539473831653595, "all_logprobs": -0.00766310840845108, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.25, "all_logprobs/p1": -0.13863277435302734, "all_logprobs/p10": -9.5367431640625e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -7.486343383789062e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.009964832104742527, "clip_ratio": 0.0, "completion_length": 551.5833740234375, "completion_length/correct": 518.4102783203125, "completion_length/correct/max": 1023.0, "completion_length/correct/median": 408.0, "completion_length/correct/min": 225.0, "completion_length/correct/p25": 332.0, "completion_length/correct/p75": 745.75, "completion_length/correct/var": 55205.390625, "completion_length/incorrect": 695.3333129882812, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 846.0, "completion_length/incorrect/min": 287.0, "completion_length/incorrect/p25": 287.0, "completion_length/incorrect/p75": 1023.5, "completion_length/incorrect/var": 113936.234375, "completion_length/max": 1024.0, "completion_length/median": 439.0, "completion_length/min": 225.0, "completion_length/p25": 332.0, "completion_length/p75": 756.0, "completion_length/var": 69952.84375, "curvature_clip_ratio_token_fisher": 0.006723070051521063, "curvature_clip_ratio_token_hessian": 0.003418190171942115, "curvature_clip_ratio_total_fisher": 0.006723070051521063, "curvature_clip_ratio_total_full": 0.006723070051521063, "curvature_clip_ratio_total_hessian": 0.003418190171942115, "epoch": 0.1264, "feature_vector_variance/max_squared_error": 55591.9921875, "feature_vector_variance/metric": 31287.97265625, "generated_tokens/total": 4395083.0, "global_fisher_curvature": 123392.0, "global_fisher_curvature/max": 123392.0, "global_fisher_curvature/median": 123392.0, "global_fisher_curvature/min": 123392.0, "global_fisher_curvature/p25": 123392.0, "global_fisher_curvature/p75": 123392.0, "global_fisher_curvature/p85": 123392.0, "global_fisher_curvature/p90": 123392.0, "global_fisher_curvature/p95": 123392.0, "global_fisher_curvature/p99": 123392.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.738088369369507e-07, "global_fisher_kl_divergence/max": 2.738088369369507e-07, "global_fisher_kl_divergence/median": 2.738088369369507e-07, "global_fisher_kl_divergence/min": 2.738088369369507e-07, "global_fisher_kl_divergence/p25": 2.738088369369507e-07, "global_fisher_kl_divergence/p75": 2.738088369369507e-07, "global_fisher_kl_divergence/p85": 2.738088369369507e-07, "global_fisher_kl_divergence/p90": 2.738088369369507e-07, "global_fisher_kl_divergence/p95": 2.738088369369507e-07, "global_fisher_kl_divergence/p99": 2.738088369369507e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.01385498046875, "global_full_update_term/max": 0.01385498046875, "global_full_update_term/median": 0.01385498046875, "global_full_update_term/min": 0.01385498046875, "global_full_update_term/p25": 0.01385498046875, "global_full_update_term/p75": 0.01385498046875, "global_full_update_term/p85": 0.01385498046875, "global_full_update_term/p90": 0.01385498046875, "global_full_update_term/p95": 0.01385498046875, "global_full_update_term/p99": 0.01385498046875, "global_full_update_term/var": NaN, "global_hessian_coeff": -10176.0, "global_hessian_coeff/max": -10176.0, "global_hessian_coeff/median": -10176.0, "global_hessian_coeff/min": -10176.0, "global_hessian_coeff/p25": -10176.0, "global_hessian_coeff/p75": -10176.0, "global_hessian_coeff/p99": -10176.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 10176.0, "global_hessian_coeff_abs/max": 10176.0, "global_hessian_coeff_abs/median": 10176.0, "global_hessian_coeff_abs/min": 10176.0, "global_hessian_coeff_abs/p25": 10176.0, "global_hessian_coeff_abs/p75": 10176.0, "global_hessian_coeff_abs/p99": 10176.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.1993715465068817, "learning_rate": 1.9264138089195424e-06, "loss": -0.8125, "masked_global_fisher_curvature": 6208.0, "masked_global_fisher_curvature/max": 6208.0, "masked_global_fisher_curvature/median": 6208.0, "masked_global_fisher_curvature/min": 6208.0, "masked_global_fisher_curvature/p25": 6208.0, "masked_global_fisher_curvature/p75": 6208.0, "masked_global_fisher_curvature/p85": 6208.0, "masked_global_fisher_curvature/p90": 6208.0, "masked_global_fisher_curvature/p95": 6208.0, "masked_global_fisher_curvature/p99": 6208.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/max": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/median": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/min": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/p25": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/p75": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/p85": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/p90": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/p95": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/p99": 1.3737007975578308e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.003204345703125, "masked_global_full_update_term/max": 0.003204345703125, "masked_global_full_update_term/median": 0.003204345703125, "masked_global_full_update_term/min": 0.003204345703125, "masked_global_full_update_term/p25": 0.003204345703125, "masked_global_full_update_term/p75": 0.003204345703125, "masked_global_full_update_term/p85": 0.003204345703125, "masked_global_full_update_term/p90": 0.003204345703125, "masked_global_full_update_term/p95": 0.003204345703125, "masked_global_full_update_term/p99": 0.003204345703125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -33024.0, "masked_global_hessian_coeff/max": -33024.0, "masked_global_hessian_coeff/median": -33024.0, "masked_global_hessian_coeff/min": -33024.0, "masked_global_hessian_coeff/p25": -33024.0, "masked_global_hessian_coeff/p75": -33024.0, "masked_global_hessian_coeff/p99": -33024.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 33024.0, "masked_global_hessian_coeff_abs/max": 33024.0, "masked_global_hessian_coeff_abs/median": 33024.0, "masked_global_hessian_coeff_abs/min": 33024.0, "masked_global_hessian_coeff_abs/p25": 33024.0, "masked_global_hessian_coeff_abs/p75": 33024.0, "masked_global_hessian_coeff_abs/p99": 33024.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 7.81781005859375, "masked_per_sentence_gradient_norm/max": 23.875, "masked_per_sentence_gradient_norm/median": 5.5, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.042724609375, "masked_per_sentence_gradient_norm/p75": 14.078125, "masked_per_sentence_gradient_norm/var": 59.449954986572266, "masked_per_token_gradient_norm": 0.1583470106124878, "masked_per_token_gradient_norm/max": 45.75, "masked_per_token_gradient_norm/median": 2.97859514830634e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 7.047314121155779e-18, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 9.592622518539429e-08, "masked_per_token_gradient_norm/var": 3.2889416217803955, "masked_sentence_fisher_curvature": 8942.755859375, "masked_sentence_fisher_curvature/max": 29312.0, "masked_sentence_fisher_curvature/median": 7104.0, "masked_sentence_fisher_curvature/min": 0.1279296875, "masked_sentence_fisher_curvature/p25": 2404.0, "masked_sentence_fisher_curvature/p75": 12112.0, "masked_sentence_fisher_curvature/p85": 17536.0, "masked_sentence_fisher_curvature/p90": 17920.0, "masked_sentence_fisher_curvature/p95": 28928.0, "masked_sentence_fisher_curvature/p99": 29312.0, "masked_sentence_fisher_curvature/var": 63325708.0, "masked_sentence_fisher_kl_divergence": 1.9805105111458943e-08, "masked_sentence_fisher_kl_divergence/max": 6.472691893577576e-08, "masked_sentence_fisher_kl_divergence/median": 1.57160684466362e-08, "masked_sentence_fisher_kl_divergence/min": 2.8421709430404007e-13, "masked_sentence_fisher_kl_divergence/p25": 5.333276931196451e-09, "masked_sentence_fisher_kl_divergence/p75": 2.6833731681108475e-08, "masked_sentence_fisher_kl_divergence/p85": 3.888271749019623e-08, "masked_sentence_fisher_kl_divergence/p90": 3.9814040064811707e-08, "masked_sentence_fisher_kl_divergence/p95": 6.402842700481415e-08, "masked_sentence_fisher_kl_divergence/p99": 6.472691893577576e-08, "masked_sentence_fisher_kl_divergence/var": 3.099886741764359e-16, "masked_sentence_full_gradient_variance/max_squared_error": 113.35508728027344, "masked_sentence_full_gradient_variance/metric": 113.35508728027344, "masked_sentence_full_gradient_variance/p75": 113.35508728027344, "masked_sentence_full_gradient_variance/p90": 113.35508728027344, "masked_sentence_full_gradient_variance/p95": 113.35508728027344, "masked_sentence_full_gradient_variance/p99": 113.35508728027344, "masked_sentence_full_update_term": 0.0014130936469882727, "masked_sentence_full_update_term/max": 0.004180908203125, "masked_sentence_full_update_term/median": 0.00109100341796875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 7.040798664093018e-07, "masked_sentence_full_update_term/p75": 0.002655029296875, "masked_sentence_full_update_term/p85": 0.00344085693359375, "masked_sentence_full_update_term/p90": 0.003692626953125, "masked_sentence_full_update_term/p95": 0.003875732421875, "masked_sentence_full_update_term/p99": 0.0038909921422600746, "masked_sentence_full_update_term/var": 1.88987235105742e-06, "masked_sentence_hessian_coeff": -82165.796875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -85504.0, "masked_sentence_hessian_coeff/min": -268288.0, "masked_sentence_hessian_coeff/p25": -116224.0, "masked_sentence_hessian_coeff/p75": -105.5, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 4890052608.0, "masked_sentence_hessian_coeff_abs": 82165.796875, "masked_sentence_hessian_coeff_abs/max": 268288.0, "masked_sentence_hessian_coeff_abs/median": 83968.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 105.5, "masked_sentence_hessian_coeff_abs/p75": 116224.0, "masked_sentence_hessian_coeff_abs/p99": 268288.0, "masked_sentence_hessian_coeff_abs/var": 4890052608.0, "masked_token_fisher_curvature": 10125.052734375, "masked_token_fisher_curvature/max": 4456448.0, "masked_token_fisher_curvature/median": 5.72594272343907e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 7.02780403243295e-25, "masked_token_fisher_curvature/p75": 1.5054624213917123e-13, "masked_token_fisher_curvature/p85": 1.000444171950221e-10, "masked_token_fisher_curvature/p90": 2.130400389432907e-08, "masked_token_fisher_curvature/p95": 0.000568389892578125, "masked_token_fisher_curvature/p99": 45312.0, "masked_token_fisher_curvature/var": 22620180480.0, "masked_token_fisher_kl_divergence": 2.2437168567535082e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 1.2695730193400659e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.5604687507165867e-36, "masked_token_fisher_kl_divergence/p75": 3.3361874314882854e-25, "masked_token_fisher_kl_divergence/p85": 2.216844041642114e-22, "masked_token_fisher_kl_divergence/p90": 4.7222086809427244e-20, "masked_token_fisher_kl_divergence/p95": 1.2559397966072083e-15, "masked_token_fisher_kl_divergence/p99": 1.0058283805847168e-07, "masked_token_fisher_kl_divergence/var": 1.11061496371051e-13, "masked_token_full_update_term": 1.7511914848000742e-05, "masked_token_full_update_term/max": 0.004425048828125, "masked_token_full_update_term/median": 3.908428394313056e-23, "masked_token_full_update_term/min": -1.9669532775878906e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.774758283725532e-15, "masked_token_full_update_term/p85": 5.577760475716786e-13, "masked_token_full_update_term/p90": 1.0317080523236655e-11, "masked_token_full_update_term/p95": 3.4415279515087605e-09, "masked_token_full_update_term/p99": 0.00025177001953125, "masked_token_full_update_term/var": 4.133842068654303e-08, "masked_token_hessian_coeff": -99697.734375, "masked_token_hessian_coeff/max": 532.0, "masked_token_hessian_coeff/median": -1.3358203432289883e-12, "masked_token_hessian_coeff/min": -20840448.0, "masked_token_hessian_coeff/p25": -2.7604401111602783e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.014892578125, "masked_token_hessian_coeff/var": 1241250398208.0, "masked_token_hessian_coeff_abs": 99697.9375, "masked_token_hessian_coeff_abs/max": 20840448.0, "masked_token_hessian_coeff_abs/median": 4.307366907596588e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 1.177877240188252e-15, "masked_token_hessian_coeff_abs/p75": 3.170967102050781e-05, "masked_token_hessian_coeff_abs/p99": 1728512.0, "masked_token_hessian_coeff_abs/var": 1241250398208.0, "mean_logprobs": -0.006622314453125, "mean_logprobs/var": 2.47955322265625e-05, "num_completions/total": 7584, "per_sentence_gradient_norm": 34.889976501464844, "per_sentence_gradient_norm/max": 155.0, "per_sentence_gradient_norm/median": 23.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 2.0, "per_sentence_gradient_norm/p75": 53.5625, "per_sentence_gradient_norm/var": 1259.98291015625, "per_token_feature_norm": 190.7575225830078, "per_token_feature_norm/max": 254.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 107.0, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 148.20391845703125, "per_token_gradient_norm": 0.8963007926940918, "per_token_gradient_norm/max": 268.0, "per_token_gradient_norm/median": 3.319655661471188e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 9.703609443745265e-18, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.123407855629921e-07, "per_token_gradient_norm/var": 108.56722259521484, "per_token_policy_error_norm": 0.004382764920592308, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0040017045103013515, "policy_entropy": 0.009177190251648426, "policy_entropy/max": 1.515625, "policy_entropy/median": 3.765308065339923e-10, "policy_entropy/min": 1.2123159057577174e-20, "policy_entropy/p25": 5.222489107836736e-13, "policy_entropy/p75": 9.499490261077881e-08, "policy_entropy/var": 0.004610444884747267, "policy_loss": -0.8125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.1539473831653595, "policy_sharpness": 9.739677429199219, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6999478340148926, "reward": 0.8125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.1539473831653595, "rewards/accuracy_reward": 0.8125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.1539473831653595, "sentence_fisher_curvature": 292162.75, "sentence_fisher_curvature/max": 1449984.0, "sentence_fisher_curvature/median": 208896.0, "sentence_fisher_curvature/min": 210.0, "sentence_fisher_curvature/p25": 31616.0, "sentence_fisher_curvature/p75": 453120.0, "sentence_fisher_curvature/p85": 606208.0, "sentence_fisher_curvature/p90": 737280.0, "sentence_fisher_curvature/p95": 846848.0, "sentence_fisher_curvature/p99": 944129.625, "sentence_fisher_curvature/var": 86154952704.0, "sentence_fisher_kl_divergence": 6.472433824455948e-07, "sentence_fisher_kl_divergence/max": 3.2186508178710938e-06, "sentence_fisher_kl_divergence/median": 4.6193599700927734e-07, "sentence_fisher_kl_divergence/min": 4.656612873077393e-10, "sentence_fisher_kl_divergence/p25": 6.984919309616089e-08, "sentence_fisher_kl_divergence/p75": 1.0058283805847168e-06, "sentence_fisher_kl_divergence/p85": 1.341104507446289e-06, "sentence_fisher_kl_divergence/p90": 1.6316771507263184e-06, "sentence_fisher_kl_divergence/p95": 1.8775463104248047e-06, "sentence_fisher_kl_divergence/p99": 2.086166205117479e-06, "sentence_fisher_kl_divergence/var": 4.231156711890366e-13, "sentence_full_gradient_variance/max_squared_error": 2411.66845703125, "sentence_full_gradient_variance/metric": 2411.66845703125, "sentence_full_gradient_variance/p75": 2411.66845703125, "sentence_full_gradient_variance/p90": 2411.66845703125, "sentence_full_gradient_variance/p95": 2411.66845703125, "sentence_full_gradient_variance/p99": 2411.66845703125, "sentence_full_update_term": 0.00680088996887207, "sentence_full_update_term/max": 0.024658203125, "sentence_full_update_term/median": 0.00408935546875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00037384033203125, "sentence_full_update_term/p75": 0.010955810546875, "sentence_full_update_term/p85": 0.0152435302734375, "sentence_full_update_term/p90": 0.01611328125, "sentence_full_update_term/p95": 0.018890380859375, "sentence_full_update_term/p99": 0.02454223670065403, "sentence_full_update_term/var": 4.520117727224715e-05, "sentence_hessian_coeff": -43173.0, "sentence_hessian_coeff/max": 675840.0, "sentence_hessian_coeff/median": -103424.0, "sentence_hessian_coeff/min": -288768.0, "sentence_hessian_coeff/p25": -158720.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 660275.25, "sentence_hessian_coeff/var": 39928201216.0, "sentence_hessian_coeff_abs": 158051.0, "sentence_hessian_coeff_abs/max": 675840.0, "sentence_hessian_coeff_abs/median": 158720.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 69248.0, "sentence_hessian_coeff_abs/p75": 224256.0, "sentence_hessian_coeff_abs/p99": 660275.25, "sentence_hessian_coeff_abs/var": 16568665088.0, "step": 79, "token_fisher_curvature": 317332.46875, "token_fisher_curvature/max": 217055232.0, "token_fisher_curvature/median": 6.708500942254059e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 7.997156312768529e-25, "token_fisher_curvature/p75": 1.9184653865522705e-13, "token_fisher_curvature/p85": 1.5552359400317073e-10, "token_fisher_curvature/p90": 5.2386894822120667e-08, "token_fisher_curvature/p95": 0.003814697265625, "token_fisher_curvature/p99": 1146880.0, "token_fisher_curvature/var": 30543609593856.0, "token_fisher_kl_divergence": 7.030672009022965e-07, "token_fisher_kl_divergence/max": 0.00048065185546875, "token_fisher_kl_divergence/median": 1.4852771731114363e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.7720577338645984e-36, "token_fisher_kl_divergence/p75": 4.265150033476549e-25, "token_fisher_kl_divergence/p85": 3.441071348220595e-22, "token_fisher_kl_divergence/p90": 1.1604351377383915e-19, "token_fisher_kl_divergence/p95": 8.43769498715119e-15, "token_fisher_kl_divergence/p99": 2.5480985641479492e-06, "token_fisher_kl_divergence/var": 1.4998459418880117e-10, "token_full_update_term": 0.00010110428411280736, "token_full_update_term/max": 0.031494140625, "token_full_update_term/median": 7.610061635487855e-23, "token_full_update_term/min": -1.9669532775878906e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.6629367034256575e-15, "token_full_update_term/p85": 7.318590178329032e-13, "token_full_update_term/p90": 1.7268853014229535e-11, "token_full_update_term/p95": 1.2540795069071464e-08, "token_full_update_term/p99": 0.0021970272064208984, "token_full_update_term/var": 1.4026511507836403e-06, "token_hessian_coeff": -54892.890625, "token_hessian_coeff/max": 214958080.0, "token_hessian_coeff/median": -1.4992451724538114e-12, "token_hessian_coeff/min": -27525120.0, "token_hessian_coeff/p25": -3.1739473342895508e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.025144577026367188, "token_hessian_coeff/var": 21443125444608.0, "token_hessian_coeff_abs": 340146.5, "token_hessian_coeff_abs/max": 214958080.0, "token_hessian_coeff_abs/median": 4.918547347187996e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 1.4137996329210978e-15, "token_hessian_coeff_abs/p75": 3.7670135498046875e-05, "token_hessian_coeff_abs/p99": 10812416.0, "token_hessian_coeff_abs/var": 21330439176192.0 }, { "accuracy_reward": 0.8541666865348816, "accuracy_reward/correct": 0.9999999403953552, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.12587718665599823, "adam_stats/lm_head/lr_effective_max": 8.835225344228093e-06, "adam_stats/lm_head/lr_effective_mean": 9.148766813571463e-12, "adam_stats/lm_head/lr_effective_min": -9.318284355686046e-06, "adam_stats/lm_head/lr_effective_std": 2.6417390586175316e-07, "adam_stats/lr_effective_max": 9.646073522162624e-06, "adam_stats/lr_effective_mean": 1.6661625204528185e-11, "adam_stats/lr_effective_min": -9.867910193861462e-06, "adam_stats/m_t_max": 0.0006064785411581397, "adam_stats/m_t_mean": 4.122353500224385e-12, "adam_stats/m_t_min": -0.0006851087091490626, "adam_stats/v_t_max": 2.4871031200746074e-05, "adam_stats/v_t_mean": 1.729330545950647e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8541666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.12587718665599823, "all_logprobs": -0.007626364938914776, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.25, "all_logprobs/p1": -0.10221678018569946, "all_logprobs/p10": -5.960464477539062e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -5.817413330078125e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.010745121166110039, "clip_ratio": 0.0, "completion_length": 539.4166870117188, "completion_length/correct": 463.2073059082031, "completion_length/correct/max": 952.0, "completion_length/correct/median": 468.0, "completion_length/correct/min": 215.0, "completion_length/correct/p25": 347.0, "completion_length/correct/p75": 526.0, "completion_length/correct/var": 27356.392578125, "completion_length/incorrect": 985.7857666015625, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 1024.0, "completion_length/incorrect/min": 654.0, "completion_length/incorrect/p25": 1024.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 10047.10546875, "completion_length/max": 1024.0, "completion_length/median": 488.0, "completion_length/min": 215.0, "completion_length/p25": 347.0, "completion_length/p75": 641.5, "completion_length/var": 59075.36328125, "curvature_clip_ratio_token_fisher": 0.005310520529747009, "curvature_clip_ratio_token_hessian": 0.0029738915618509054, "curvature_clip_ratio_total_fisher": 0.005310520529747009, "curvature_clip_ratio_total_full": 0.005310520529747009, "curvature_clip_ratio_total_hessian": 0.0029738915618509054, "epoch": 0.128, "feature_vector_variance/max_squared_error": 55883.421875, "feature_vector_variance/metric": 30998.419921875, "generated_tokens/total": 4446867.0, "global_fisher_curvature": 172032.0, "global_fisher_curvature/max": 172032.0, "global_fisher_curvature/median": 172032.0, "global_fisher_curvature/min": 172032.0, "global_fisher_curvature/p25": 172032.0, "global_fisher_curvature/p75": 172032.0, "global_fisher_curvature/p85": 172032.0, "global_fisher_curvature/p90": 172032.0, "global_fisher_curvature/p95": 172032.0, "global_fisher_curvature/p99": 172032.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.1851232051849365e-07, "global_fisher_kl_divergence/max": 3.1851232051849365e-07, "global_fisher_kl_divergence/median": 3.1851232051849365e-07, "global_fisher_kl_divergence/min": 3.1851232051849365e-07, "global_fisher_kl_divergence/p25": 3.1851232051849365e-07, "global_fisher_kl_divergence/p75": 3.1851232051849365e-07, "global_fisher_kl_divergence/p85": 3.1851232051849365e-07, "global_fisher_kl_divergence/p90": 3.1851232051849365e-07, "global_fisher_kl_divergence/p95": 3.1851232051849365e-07, "global_fisher_kl_divergence/p99": 3.1851232051849365e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.016845703125, "global_full_update_term/max": 0.016845703125, "global_full_update_term/median": 0.016845703125, "global_full_update_term/min": 0.016845703125, "global_full_update_term/p25": 0.016845703125, "global_full_update_term/p75": 0.016845703125, "global_full_update_term/p85": 0.016845703125, "global_full_update_term/p90": 0.016845703125, "global_full_update_term/p95": 0.016845703125, "global_full_update_term/p99": 0.016845703125, "global_full_update_term/var": NaN, "global_hessian_coeff": 46592.0, "global_hessian_coeff/max": 46592.0, "global_hessian_coeff/median": 46592.0, "global_hessian_coeff/min": 46592.0, "global_hessian_coeff/p25": 46592.0, "global_hessian_coeff/p75": 46592.0, "global_hessian_coeff/p99": 46592.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 46592.0, "global_hessian_coeff_abs/max": 46592.0, "global_hessian_coeff_abs/median": 46592.0, "global_hessian_coeff_abs/min": 46592.0, "global_hessian_coeff_abs/p25": 46592.0, "global_hessian_coeff_abs/p75": 46592.0, "global_hessian_coeff_abs/p99": 46592.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.15785469114780426, "learning_rate": 1.7546666766076658e-06, "loss": -0.8542, "masked_global_fisher_curvature": 5536.0, "masked_global_fisher_curvature/max": 5536.0, "masked_global_fisher_curvature/median": 5536.0, "masked_global_fisher_curvature/min": 5536.0, "masked_global_fisher_curvature/p25": 5536.0, "masked_global_fisher_curvature/p75": 5536.0, "masked_global_fisher_curvature/p85": 5536.0, "masked_global_fisher_curvature/p90": 5536.0, "masked_global_fisher_curvature/p95": 5536.0, "masked_global_fisher_curvature/p99": 5536.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/max": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/median": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/min": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/p25": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/p75": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/p85": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/p90": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/p95": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/p99": 1.0244548320770264e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00445556640625, "masked_global_full_update_term/max": 0.00445556640625, "masked_global_full_update_term/median": 0.00445556640625, "masked_global_full_update_term/min": 0.00445556640625, "masked_global_full_update_term/p25": 0.00445556640625, "masked_global_full_update_term/p75": 0.00445556640625, "masked_global_full_update_term/p85": 0.00445556640625, "masked_global_full_update_term/p90": 0.00445556640625, "masked_global_full_update_term/p95": 0.00445556640625, "masked_global_full_update_term/p99": 0.00445556640625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -38400.0, "masked_global_hessian_coeff/max": -38400.0, "masked_global_hessian_coeff/median": -38400.0, "masked_global_hessian_coeff/min": -38400.0, "masked_global_hessian_coeff/p25": -38400.0, "masked_global_hessian_coeff/p75": -38400.0, "masked_global_hessian_coeff/p99": -38400.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 38400.0, "masked_global_hessian_coeff_abs/max": 38400.0, "masked_global_hessian_coeff_abs/median": 38400.0, "masked_global_hessian_coeff_abs/min": 38400.0, "masked_global_hessian_coeff_abs/p25": 38400.0, "masked_global_hessian_coeff_abs/p75": 38400.0, "masked_global_hessian_coeff_abs/p99": 38400.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 8.012990951538086, "masked_per_sentence_gradient_norm/max": 38.25, "masked_per_sentence_gradient_norm/median": 3.1875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 1.7109375, "masked_per_sentence_gradient_norm/p75": 13.40625, "masked_per_sentence_gradient_norm/var": 82.71466827392578, "masked_per_token_gradient_norm": 0.1558743715286255, "masked_per_token_gradient_norm/max": 42.5, "masked_per_token_gradient_norm/median": 3.183231456205249e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 6.938353180885315e-08, "masked_per_token_gradient_norm/var": 3.4248859882354736, "masked_sentence_fisher_curvature": 9385.912109375, "masked_sentence_fisher_curvature/max": 34560.0, "masked_sentence_fisher_curvature/median": 6912.0, "masked_sentence_fisher_curvature/min": 0.007659912109375, "masked_sentence_fisher_curvature/p25": 2496.0, "masked_sentence_fisher_curvature/p75": 12096.0, "masked_sentence_fisher_curvature/p85": 20640.0, "masked_sentence_fisher_curvature/p90": 22272.0, "masked_sentence_fisher_curvature/p95": 24608.0, "masked_sentence_fisher_curvature/p99": 32857.60546875, "masked_sentence_fisher_curvature/var": 73552776.0, "masked_sentence_fisher_kl_divergence": 1.7420074271967678e-08, "masked_sentence_fisher_kl_divergence/max": 6.426125764846802e-08, "masked_sentence_fisher_kl_divergence/median": 1.280568540096283e-08, "masked_sentence_fisher_kl_divergence/min": 1.4210854715202004e-14, "masked_sentence_fisher_kl_divergence/p25": 4.627509042620659e-09, "masked_sentence_fisher_kl_divergence/p75": 2.246815711259842e-08, "masked_sentence_fisher_kl_divergence/p85": 3.8242433220148087e-08, "masked_sentence_fisher_kl_divergence/p90": 4.132743924856186e-08, "masked_sentence_fisher_kl_divergence/p95": 4.5693013817071915e-08, "masked_sentence_fisher_kl_divergence/p99": 6.116462003546985e-08, "masked_sentence_fisher_kl_divergence/var": 2.5346981843574147e-16, "masked_sentence_full_gradient_variance/max_squared_error": 139.4048309326172, "masked_sentence_full_gradient_variance/metric": 139.4048309326172, "masked_sentence_full_gradient_variance/p75": 139.4048309326172, "masked_sentence_full_gradient_variance/p90": 139.4048309326172, "masked_sentence_full_gradient_variance/p95": 139.4048309326172, "masked_sentence_full_gradient_variance/p99": 139.4048309326172, "masked_sentence_full_update_term": 0.0014280150644481182, "masked_sentence_full_update_term/max": 0.0081787109375, "masked_sentence_full_update_term/median": 0.000453948974609375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0001976490020751953, "masked_sentence_full_update_term/p75": 0.0020904541015625, "masked_sentence_full_update_term/p85": 0.002471923828125, "masked_sentence_full_update_term/p90": 0.00370025634765625, "masked_sentence_full_update_term/p95": 0.0063018798828125, "masked_sentence_full_update_term/p99": 0.007540895603597164, "masked_sentence_full_update_term/var": 3.603125833251397e-06, "masked_sentence_hessian_coeff": -82695.7734375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -54528.0, "masked_sentence_hessian_coeff/min": -309248.0, "masked_sentence_hessian_coeff/p25": -142592.0, "masked_sentence_hessian_coeff/p75": -30496.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 5391710720.0, "masked_sentence_hessian_coeff_abs": 82695.7734375, "masked_sentence_hessian_coeff_abs/max": 309248.0, "masked_sentence_hessian_coeff_abs/median": 54528.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 30496.0, "masked_sentence_hessian_coeff_abs/p75": 142592.0, "masked_sentence_hessian_coeff_abs/p99": 291737.65625, "masked_sentence_hessian_coeff_abs/var": 5391710720.0, "masked_token_fisher_curvature": 10672.8974609375, "masked_token_fisher_curvature/max": 5079040.0, "masked_token_fisher_curvature/median": 8.063753657860939e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.1067256225959924e-24, "masked_token_fisher_curvature/p75": 9.015010959956271e-14, "masked_token_fisher_curvature/p85": 1.07775122160092e-10, "masked_token_fisher_curvature/p90": 2.135038812411949e-08, "masked_token_fisher_curvature/p95": 0.000270843505859375, "masked_token_fisher_curvature/p99": 49408.0, "masked_token_fisher_curvature/var": 29228261376.0, "masked_token_fisher_kl_divergence": 1.9819527352638033e-08, "masked_token_fisher_kl_divergence/max": 9.417533874511719e-06, "masked_token_fisher_kl_divergence/median": 1.4976031247555146e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.9026412447299945e-36, "masked_token_fisher_kl_divergence/p75": 1.6721326835788743e-25, "masked_token_fisher_kl_divergence/p85": 2.001777082378327e-22, "masked_token_fisher_kl_divergence/p90": 3.968316270661895e-20, "masked_token_fisher_kl_divergence/p95": 5.030698080332741e-16, "masked_token_fisher_kl_divergence/p99": 9.173527359962463e-08, "masked_token_fisher_kl_divergence/var": 1.0079549770791035e-13, "masked_token_full_update_term": 1.5812702258699574e-05, "masked_token_full_update_term/max": 0.004302978515625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -5.029141902923584e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.163336342344337e-15, "masked_token_full_update_term/p85": 4.369837824924616e-13, "masked_token_full_update_term/p90": 1.0174971976084635e-11, "masked_token_full_update_term/p95": 2.08092387765646e-09, "masked_token_full_update_term/p99": 0.00022411346435546875, "masked_token_full_update_term/var": 3.572023743458885e-08, "masked_token_hessian_coeff": -97707.78125, "masked_token_hessian_coeff/max": 16384.0, "masked_token_hessian_coeff/median": -1.2967404927621828e-13, "masked_token_hessian_coeff/min": -23330816.0, "masked_token_hessian_coeff/p25": -3.129243850708008e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.032958984375, "masked_token_hessian_coeff/var": 1229852901376.0, "masked_token_hessian_coeff_abs": 97713.8203125, "masked_token_hessian_coeff_abs/max": 23330816.0, "masked_token_hessian_coeff_abs/median": 8.440110832452774e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 3.314018249511719e-05, "masked_token_hessian_coeff_abs/p99": 1703936.0, "masked_token_hessian_coeff_abs/var": 1229851721728.0, "mean_logprobs": -0.007110595703125, "mean_logprobs/var": 2.9206275939941406e-05, "num_completions/total": 7680, "per_sentence_gradient_norm": 39.378013610839844, "per_sentence_gradient_norm/max": 217.0, "per_sentence_gradient_norm/median": 19.375, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 2.265625, "per_sentence_gradient_norm/p75": 54.8125, "per_sentence_gradient_norm/var": 2409.26708984375, "per_token_feature_norm": 190.1101531982422, "per_token_feature_norm/max": 247.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 108.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 135.9130859375, "per_token_gradient_norm": 0.8454335331916809, "per_token_gradient_norm/max": 274.0, "per_token_gradient_norm/median": 3.410605131648481e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 7.741618901491165e-08, "per_token_gradient_norm/var": 118.20201873779297, "per_token_policy_error_norm": 0.0046349382027983665, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004742478486150503, "policy_entropy": 0.007826907560229301, "policy_entropy/max": 1.75, "policy_entropy/median": 3.474269760772586e-10, "policy_entropy/min": 2.355810384551023e-21, "policy_entropy/p25": 7.425171588693047e-13, "policy_entropy/p75": 6.658956408500671e-08, "policy_entropy/var": 0.0038295278791338205, "policy_loss": -0.8541666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.12587718665599823, "policy_sharpness": 9.758454322814941, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.5270044803619385, "reward": 0.8541666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.12587718665599823, "rewards/accuracy_reward": 0.8541666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.12587718665599823, "sentence_fisher_curvature": 361611.5, "sentence_fisher_curvature/max": 1531904.0, "sentence_fisher_curvature/median": 248832.0, "sentence_fisher_curvature/min": 344.0, "sentence_fisher_curvature/p25": 9088.0, "sentence_fisher_curvature/p75": 564224.0, "sentence_fisher_curvature/p85": 831488.0, "sentence_fisher_curvature/p90": 880640.0, "sentence_fisher_curvature/p95": 1075200.0, "sentence_fisher_curvature/p99": 1158350.0, "sentence_fisher_curvature/var": 133707857920.0, "sentence_fisher_kl_divergence": 6.712866138514073e-07, "sentence_fisher_kl_divergence/max": 2.8461217880249023e-06, "sentence_fisher_kl_divergence/median": 4.6193599700927734e-07, "sentence_fisher_kl_divergence/min": 6.366462912410498e-10, "sentence_fisher_kl_divergence/p25": 1.6880221664905548e-08, "sentence_fisher_kl_divergence/p75": 1.0486692190170288e-06, "sentence_fisher_kl_divergence/p85": 1.5422701835632324e-06, "sentence_fisher_kl_divergence/p90": 1.6316771507263184e-06, "sentence_fisher_kl_divergence/p95": 2.000480890274048e-06, "sentence_fisher_kl_divergence/p99": 2.1524749627133133e-06, "sentence_fisher_kl_divergence/var": 4.61023038321462e-13, "sentence_full_gradient_variance/max_squared_error": 3864.29833984375, "sentence_full_gradient_variance/metric": 3864.29833984375, "sentence_full_gradient_variance/p75": 3864.29833984375, "sentence_full_gradient_variance/p90": 3864.29833984375, "sentence_full_gradient_variance/p95": 3864.29833984375, "sentence_full_gradient_variance/p99": 3864.29833984375, "sentence_full_update_term": 0.007265707012265921, "sentence_full_update_term/max": 0.040283203125, "sentence_full_update_term/median": 0.0027313232421875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00028228759765625, "sentence_full_update_term/p75": 0.009521484375, "sentence_full_update_term/p85": 0.016143798828125, "sentence_full_update_term/p90": 0.018798828125, "sentence_full_update_term/p95": 0.027618408203125, "sentence_full_update_term/p99": 0.035644546151161194, "sentence_full_update_term/var": 8.569083001930267e-05, "sentence_hessian_coeff": 51098.3359375, "sentence_hessian_coeff/max": 856064.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -385024.0, "sentence_hessian_coeff/p25": -56320.0, "sentence_hessian_coeff/p75": 175104.0, "sentence_hessian_coeff/p99": 762675.5, "sentence_hessian_coeff/var": 62811398144.0, "sentence_hessian_coeff_abs": 176980.34375, "sentence_hessian_coeff_abs/max": 856064.0, "sentence_hessian_coeff_abs/median": 112128.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 49152.0, "sentence_hessian_coeff_abs/p75": 287744.0, "sentence_hessian_coeff_abs/p99": 762675.5, "sentence_hessian_coeff_abs/var": 33798172672.0, "step": 80, "token_fisher_curvature": 342506.125, "token_fisher_curvature/max": 214958080.0, "token_fisher_curvature/median": 8.944667923005412e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.44276774644566e-24, "token_fisher_curvature/p75": 1.1901590823981678e-13, "token_fisher_curvature/p85": 1.4915713109076023e-10, "token_fisher_curvature/p90": 3.748573362827301e-08, "token_fisher_curvature/p95": 0.0016632080078125, "token_fisher_curvature/p99": 485376.0, "token_fisher_curvature/var": 38964853473280.0, "token_fisher_kl_divergence": 6.356942208185501e-07, "token_fisher_kl_divergence/max": 0.0003986358642578125, "token_fisher_kl_divergence/median": 1.6640034719505718e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.53740819417403e-36, "token_fisher_kl_divergence/p75": 2.213354373432906e-25, "token_fisher_kl_divergence/p85": 2.7627832459271124e-22, "token_fisher_kl_divergence/p90": 6.945670167485263e-20, "token_fisher_kl_divergence/p95": 3.0808688933348094e-15, "token_fisher_kl_divergence/p99": 9.015202522277832e-07, "token_fisher_kl_divergence/var": 1.342710109764056e-10, "token_full_update_term": 8.73098470037803e-05, "token_full_update_term/max": 0.028564453125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -5.029141902923584e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.884981308350689e-15, "token_full_update_term/p85": 5.435651928564766e-13, "token_full_update_term/p90": 1.4438228390645236e-11, "token_full_update_term/p95": 4.773028194904327e-09, "token_full_update_term/p99": 0.0010859966278076172, "token_full_update_term/var": 1.270992356694478e-06, "token_hessian_coeff": 33328.7890625, "token_hessian_coeff/max": 213909504.0, "token_hessian_coeff/median": -1.2967404927621828e-13, "token_hessian_coeff/min": -27656192.0, "token_hessian_coeff/p25": -3.3676624298095703e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0830078125, "token_hessian_coeff/var": 27346306334720.0, "token_hessian_coeff_abs": 352137.875, "token_hessian_coeff_abs/max": 213909504.0, "token_hessian_coeff_abs/median": 9.255018085241318e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.743171691894531e-05, "token_hessian_coeff_abs/p99": 6422528.0, "token_hessian_coeff_abs/var": 27223415324672.0 }, { "accuracy_reward": 0.6458333730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2311403602361679, "adam_stats/lm_head/lr_effective_max": 8.43981160869589e-06, "adam_stats/lm_head/lr_effective_mean": 3.3337664226418262e-12, "adam_stats/lm_head/lr_effective_min": -9.053533176484052e-06, "adam_stats/lm_head/lr_effective_std": 2.3813350935597555e-07, "adam_stats/lr_effective_max": 9.14029897103319e-06, "adam_stats/lr_effective_mean": 1.0962613629372786e-11, "adam_stats/lr_effective_min": -9.053533176484052e-06, "adam_stats/m_t_max": 0.0006543947383761406, "adam_stats/m_t_mean": 4.478156560849822e-12, "adam_stats/m_t_min": -0.0008525471785105765, "adam_stats/v_t_max": 2.4860250050551258e-05, "adam_stats/v_t_mean": 1.7302323853177204e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6458333730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2311403602361679, "all_logprobs": -0.008636696264147758, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.75, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -9.5367431640625e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -8.535385131835938e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.014853156171739101, "clip_ratio": 0.0, "completion_length": 575.2083740234375, "completion_length/correct": 485.6128845214844, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 447.0, "completion_length/correct/min": 253.0, "completion_length/correct/p25": 387.0, "completion_length/correct/p75": 574.5, "completion_length/correct/var": 34956.6015625, "completion_length/incorrect": 738.5882568359375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 901.0, "completion_length/incorrect/min": 324.0, "completion_length/incorrect/p25": 459.25, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 87535.34375, "completion_length/max": 1024.0, "completion_length/median": 467.0, "completion_length/min": 253.0, "completion_length/p25": 387.0, "completion_length/p75": 752.5, "completion_length/var": 67645.0078125, "curvature_clip_ratio_token_fisher": 0.004346251487731934, "curvature_clip_ratio_token_hessian": 0.0024085475597530603, "curvature_clip_ratio_total_fisher": 0.004346251487731934, "curvature_clip_ratio_total_full": 0.004346251487731934, "curvature_clip_ratio_total_hessian": 0.0024085475597530603, "epoch": 0.1296, "feature_vector_variance/max_squared_error": 75732.9453125, "feature_vector_variance/metric": 30905.337890625, "generated_tokens/total": 4502087.0, "global_fisher_curvature": 104448.0, "global_fisher_curvature/max": 104448.0, "global_fisher_curvature/median": 104448.0, "global_fisher_curvature/min": 104448.0, "global_fisher_curvature/p25": 104448.0, "global_fisher_curvature/p75": 104448.0, "global_fisher_curvature/p85": 104448.0, "global_fisher_curvature/p90": 104448.0, "global_fisher_curvature/p95": 104448.0, "global_fisher_curvature/p99": 104448.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.6111880540847778e-07, "global_fisher_kl_divergence/max": 1.6111880540847778e-07, "global_fisher_kl_divergence/median": 1.6111880540847778e-07, "global_fisher_kl_divergence/min": 1.6111880540847778e-07, "global_fisher_kl_divergence/p25": 1.6111880540847778e-07, "global_fisher_kl_divergence/p75": 1.6111880540847778e-07, "global_fisher_kl_divergence/p85": 1.6111880540847778e-07, "global_fisher_kl_divergence/p90": 1.6111880540847778e-07, "global_fisher_kl_divergence/p95": 1.6111880540847778e-07, "global_fisher_kl_divergence/p99": 1.6111880540847778e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.010986328125, "global_full_update_term/max": 0.010986328125, "global_full_update_term/median": 0.010986328125, "global_full_update_term/min": 0.010986328125, "global_full_update_term/p25": 0.010986328125, "global_full_update_term/p75": 0.010986328125, "global_full_update_term/p85": 0.010986328125, "global_full_update_term/p90": 0.010986328125, "global_full_update_term/p95": 0.010986328125, "global_full_update_term/p99": 0.010986328125, "global_full_update_term/var": NaN, "global_hessian_coeff": 22144.0, "global_hessian_coeff/max": 22144.0, "global_hessian_coeff/median": 22144.0, "global_hessian_coeff/min": 22144.0, "global_hessian_coeff/p25": 22144.0, "global_hessian_coeff/p75": 22144.0, "global_hessian_coeff/p99": 22144.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 22144.0, "global_hessian_coeff_abs/max": 22144.0, "global_hessian_coeff_abs/median": 22144.0, "global_hessian_coeff_abs/min": 22144.0, "global_hessian_coeff_abs/p25": 22144.0, "global_hessian_coeff_abs/p75": 22144.0, "global_hessian_coeff_abs/p99": 22144.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.1516626924276352, "learning_rate": 1.5899193479495858e-06, "loss": -0.6458, "masked_global_fisher_curvature": 6368.0, "masked_global_fisher_curvature/max": 6368.0, "masked_global_fisher_curvature/median": 6368.0, "masked_global_fisher_curvature/min": 6368.0, "masked_global_fisher_curvature/p25": 6368.0, "masked_global_fisher_curvature/p75": 6368.0, "masked_global_fisher_curvature/p85": 6368.0, "masked_global_fisher_curvature/p90": 6368.0, "masked_global_fisher_curvature/p95": 6368.0, "masked_global_fisher_curvature/p99": 6368.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/max": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/median": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/min": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/p25": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/p75": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/p85": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/p90": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/p95": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/p99": 9.778887033462524e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0037689208984375, "masked_global_full_update_term/max": 0.0037689208984375, "masked_global_full_update_term/median": 0.0037689208984375, "masked_global_full_update_term/min": 0.0037689208984375, "masked_global_full_update_term/p25": 0.0037689208984375, "masked_global_full_update_term/p75": 0.0037689208984375, "masked_global_full_update_term/p85": 0.0037689208984375, "masked_global_full_update_term/p90": 0.0037689208984375, "masked_global_full_update_term/p95": 0.0037689208984375, "masked_global_full_update_term/p99": 0.0037689208984375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -25728.0, "masked_global_hessian_coeff/max": -25728.0, "masked_global_hessian_coeff/median": -25728.0, "masked_global_hessian_coeff/min": -25728.0, "masked_global_hessian_coeff/p25": -25728.0, "masked_global_hessian_coeff/p75": -25728.0, "masked_global_hessian_coeff/p99": -25728.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 25728.0, "masked_global_hessian_coeff_abs/max": 25728.0, "masked_global_hessian_coeff_abs/median": 25728.0, "masked_global_hessian_coeff_abs/min": 25728.0, "masked_global_hessian_coeff_abs/p25": 25728.0, "masked_global_hessian_coeff_abs/p75": 25728.0, "masked_global_hessian_coeff_abs/p99": 25728.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 7.7885589599609375, "masked_per_sentence_gradient_norm/max": 41.5, "masked_per_sentence_gradient_norm/median": 1.890625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 12.703125, "masked_per_sentence_gradient_norm/var": 115.86408996582031, "masked_per_token_gradient_norm": 0.13242630660533905, "masked_per_token_gradient_norm/max": 48.5, "masked_per_token_gradient_norm/median": 2.3314683517128287e-15, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 6.664777174592018e-09, "masked_per_token_gradient_norm/var": 3.1629512310028076, "masked_sentence_fisher_curvature": 9886.314453125, "masked_sentence_fisher_curvature/max": 51456.0, "masked_sentence_fisher_curvature/median": 4544.0, "masked_sentence_fisher_curvature/min": 0.302734375, "masked_sentence_fisher_curvature/p25": 708.5, "masked_sentence_fisher_curvature/p75": 17824.0, "masked_sentence_fisher_curvature/p85": 22784.0, "masked_sentence_fisher_curvature/p90": 27392.0, "masked_sentence_fisher_curvature/p95": 30688.0, "masked_sentence_fisher_curvature/p99": 40512.03515625, "masked_sentence_fisher_curvature/var": 130989080.0, "masked_sentence_fisher_kl_divergence": 1.5221697680090074e-08, "masked_sentence_fisher_kl_divergence/max": 7.916241884231567e-08, "masked_sentence_fisher_kl_divergence/median": 6.984919309616089e-09, "masked_sentence_fisher_kl_divergence/min": 4.654054919228656e-13, "masked_sentence_fisher_kl_divergence/p25": 1.0913936421275139e-09, "masked_sentence_fisher_kl_divergence/p75": 2.741580829024315e-08, "masked_sentence_fisher_kl_divergence/p85": 3.5157427191734314e-08, "masked_sentence_fisher_kl_divergence/p90": 4.21423465013504e-08, "masked_sentence_fisher_kl_divergence/p95": 4.720641300082207e-08, "masked_sentence_fisher_kl_divergence/p99": 6.23521003717542e-08, "masked_sentence_fisher_kl_divergence/var": 3.105860176927074e-16, "masked_sentence_full_gradient_variance/max_squared_error": 168.41258239746094, "masked_sentence_full_gradient_variance/metric": 168.41258239746094, "masked_sentence_full_gradient_variance/p75": 168.41258239746094, "masked_sentence_full_gradient_variance/p90": 168.41258239746094, "masked_sentence_full_gradient_variance/p95": 168.41258239746094, "masked_sentence_full_gradient_variance/p99": 168.41258239746094, "masked_sentence_full_update_term": 0.0012762300902977586, "masked_sentence_full_update_term/max": 0.00653076171875, "masked_sentence_full_update_term/median": 0.000179290771484375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0021209716796875, "masked_sentence_full_update_term/p85": 0.003231048583984375, "masked_sentence_full_update_term/p90": 0.004119873046875, "masked_sentence_full_update_term/p95": 0.00576019287109375, "masked_sentence_full_update_term/p99": 0.006211854051798582, "masked_sentence_full_update_term/var": 3.3050496313080657e-06, "masked_sentence_hessian_coeff": -72523.703125, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -7264.0, "masked_sentence_hessian_coeff/min": -335872.0, "masked_sentence_hessian_coeff/p25": -128512.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 8363154432.0, "masked_sentence_hessian_coeff_abs": 72523.703125, "masked_sentence_hessian_coeff_abs/max": 335872.0, "masked_sentence_hessian_coeff_abs/median": 7264.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 128512.0, "masked_sentence_hessian_coeff_abs/p99": 331980.8125, "masked_sentence_hessian_coeff_abs/var": 8363154432.0, "masked_token_fisher_curvature": 10565.84765625, "masked_token_fisher_curvature/max": 6324224.0, "masked_token_fisher_curvature/median": 2.425902360936316e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 6.669143688708786e-24, "masked_token_fisher_curvature/p75": 2.0516921495072893e-13, "masked_token_fisher_curvature/p85": 1.014086592476815e-10, "masked_token_fisher_curvature/p90": 2.21530171984341e-08, "masked_token_fisher_curvature/p95": 0.000446319580078125, "masked_token_fisher_curvature/p99": 32000.0, "masked_token_fisher_curvature/var": 31085844480.0, "masked_token_fisher_kl_divergence": 1.626436585411284e-08, "masked_token_fisher_kl_divergence/max": 9.715557098388672e-06, "masked_token_fisher_kl_divergence/median": 3.7224373965116495e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.0250310739170347e-35, "masked_token_fisher_kl_divergence/p75": 3.1503949110906327e-25, "masked_token_fisher_kl_divergence/p85": 1.5633713577252223e-22, "masked_token_fisher_kl_divergence/p90": 3.413443515761324e-20, "masked_token_fisher_kl_divergence/p95": 6.869504964868156e-16, "masked_token_fisher_kl_divergence/p99": 4.936009645462036e-08, "masked_token_fisher_kl_divergence/var": 7.364995522089399e-14, "masked_token_full_update_term": 1.217667795572197e-05, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.9674341678619385e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.561957144850638e-17, "masked_token_full_update_term/p85": 2.6756374893466273e-14, "masked_token_full_update_term/p90": 5.755396159656812e-13, "masked_token_full_update_term/p95": 8.278711050024867e-11, "masked_token_full_update_term/p99": 9.97595489025116e-05, "masked_token_full_update_term/var": 2.7267009983233947e-08, "masked_token_hessian_coeff": -81095.546875, "masked_token_hessian_coeff/max": 2128.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -24903680.0, "masked_token_hessian_coeff/p25": -1.5925616025924683e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0031890869140625, "masked_token_hessian_coeff/var": 1089303609344.0, "masked_token_hessian_coeff_abs": 81095.75, "masked_token_hessian_coeff_abs/max": 24903680.0, "masked_token_hessian_coeff_abs/median": 6.536993168992922e-13, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.771615982055664e-06, "masked_token_hessian_coeff_abs/p99": 794624.0, "masked_token_hessian_coeff_abs/var": 1089303609344.0, "mean_logprobs": -0.0078125, "mean_logprobs/var": 3.910064697265625e-05, "num_completions/total": 7776, "per_sentence_gradient_norm": 35.64518356323242, "per_sentence_gradient_norm/max": 255.0, "per_sentence_gradient_norm/median": 14.625, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 60.125, "per_sentence_gradient_norm/var": 2238.443359375, "per_token_feature_norm": 189.35214233398438, "per_token_feature_norm/max": 290.0, "per_token_feature_norm/median": 189.0, "per_token_feature_norm/min": 101.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 158.1912078857422, "per_token_gradient_norm": 0.7101194262504578, "per_token_gradient_norm/max": 282.0, "per_token_gradient_norm/median": 3.0253577421035516e-15, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 7.62520357966423e-09, "per_token_gradient_norm/var": 99.4989242553711, "per_token_policy_error_norm": 0.004910082556307316, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.00498813483864069, "policy_entropy": 0.009005706757307053, "policy_entropy/max": 1.65625, "policy_entropy/median": 5.820766091346741e-10, "policy_entropy/min": 3.917527381051139e-21, "policy_entropy/p25": 1.3784529073745944e-12, "policy_entropy/p75": 9.73232090473175e-08, "policy_entropy/var": 0.004581850953400135, "policy_loss": -0.6458333730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2311403602361679, "policy_sharpness": 9.735048294067383, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6918845176696777, "reward": 0.6458333730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2311403602361679, "rewards/accuracy_reward": 0.6458333730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2311403602361679, "sentence_fisher_curvature": 307730.5625, "sentence_fisher_curvature/max": 1613824.0, "sentence_fisher_curvature/median": 147456.0, "sentence_fisher_curvature/min": 78.5, "sentence_fisher_curvature/p25": 1744.0, "sentence_fisher_curvature/p75": 518144.0, "sentence_fisher_curvature/p85": 720896.0, "sentence_fisher_curvature/p90": 843776.0, "sentence_fisher_curvature/p95": 940032.0, "sentence_fisher_curvature/p99": 1263617.125, "sentence_fisher_curvature/var": 132740669440.0, "sentence_fisher_kl_divergence": 4.7378620138260885e-07, "sentence_fisher_kl_divergence/max": 2.4884939193725586e-06, "sentence_fisher_kl_divergence/median": 2.2724270820617676e-07, "sentence_fisher_kl_divergence/min": 1.2096279533579946e-10, "sentence_fisher_kl_divergence/p25": 2.6775524020195007e-09, "sentence_fisher_kl_divergence/p75": 7.972121238708496e-07, "sentence_fisher_kl_divergence/p85": 1.1101365089416504e-06, "sentence_fisher_kl_divergence/p90": 1.296401023864746e-06, "sentence_fisher_kl_divergence/p95": 1.4454126358032227e-06, "sentence_fisher_kl_divergence/p99": 1.9505637283145916e-06, "sentence_fisher_kl_divergence/var": 3.1487703069932316e-13, "sentence_full_gradient_variance/max_squared_error": 3442.205078125, "sentence_full_gradient_variance/metric": 3442.205078125, "sentence_full_gradient_variance/p75": 3442.205078125, "sentence_full_gradient_variance/p90": 3442.205078125, "sentence_full_gradient_variance/p95": 3442.205078125, "sentence_full_gradient_variance/p99": 3442.205078125, "sentence_full_update_term": 0.005590518470853567, "sentence_full_update_term/max": 0.036865234375, "sentence_full_update_term/median": 0.0027008056640625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.00921630859375, "sentence_full_update_term/p85": 0.01318359375, "sentence_full_update_term/p90": 0.015106201171875, "sentence_full_update_term/p95": 0.017852783203125, "sentence_full_update_term/p99": 0.03060304746031761, "sentence_full_update_term/var": 5.2501040045171976e-05, "sentence_hessian_coeff": 47748.4609375, "sentence_hessian_coeff/max": 1105920.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -362496.0, "sentence_hessian_coeff/p25": -7264.0, "sentence_hessian_coeff/p75": 44352.0, "sentence_hessian_coeff/p99": 821863.3125, "sentence_hessian_coeff/var": 38670802944.0, "sentence_hessian_coeff_abs": 100163.546875, "sentence_hessian_coeff_abs/max": 1105920.0, "sentence_hessian_coeff_abs/median": 9600.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 154624.0, "sentence_hessian_coeff_abs/p99": 821863.3125, "sentence_hessian_coeff_abs/var": 30836379648.0, "step": 81, "token_fisher_curvature": 285898.125, "token_fisher_curvature/max": 217055232.0, "token_fisher_curvature/median": 2.656295322589486e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 7.134432783269864e-24, "token_fisher_curvature/p75": 2.4158453015843406e-13, "token_fisher_curvature/p85": 1.339728328275669e-10, "token_fisher_curvature/p90": 3.562308847904205e-08, "token_fisher_curvature/p95": 0.0017852783203125, "token_fisher_curvature/p99": 260096.0, "token_fisher_curvature/var": 32037694078976.0, "token_fisher_kl_divergence": 4.4004940491504385e-07, "token_fisher_kl_divergence/max": 0.0003337860107421875, "token_fisher_kl_divergence/median": 4.092215945833999e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.1002627123696611e-35, "token_fisher_kl_divergence/p75": 3.715850407953054e-25, "token_fisher_kl_divergence/p85": 2.0634601991503477e-22, "token_fisher_kl_divergence/p90": 5.463362509790237e-20, "token_fisher_kl_divergence/p95": 2.7478019859472624e-15, "token_fisher_kl_divergence/p99": 4.0046870708465576e-07, "token_fisher_kl_divergence/var": 7.590366529752757e-11, "token_full_update_term": 6.662708619842306e-05, "token_full_update_term/max": 0.0262451171875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.9674341678619385e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 7.24247051220317e-17, "token_full_update_term/p85": 3.3562562451461275e-14, "token_full_update_term/p90": 7.531752999057062e-13, "token_full_update_term/p95": 1.737596733164537e-10, "token_full_update_term/p99": 0.0004825592041015625, "token_full_update_term/var": 8.800138857623097e-07, "token_hessian_coeff": 24178.595703125, "token_hessian_coeff/max": 211812352.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -28835840.0, "token_hessian_coeff/p25": -1.7136335372924805e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.00823974609375, "token_hessian_coeff/var": 22442258989056.0, "token_hessian_coeff_abs": 285514.21875, "token_hessian_coeff_abs/max": 211812352.0, "token_hessian_coeff_abs/median": 8.064660050877137e-13, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.296881914138794e-06, "token_hessian_coeff_abs/p99": 3660608.0, "token_hessian_coeff_abs/var": 22361323601920.0 }, { "accuracy_reward": 0.7291666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.19956141710281372, "adam_stats/lm_head/lr_effective_max": 7.5827588261745404e-06, "adam_stats/lm_head/lr_effective_mean": 9.87423424275713e-12, "adam_stats/lm_head/lr_effective_min": -7.931584150355775e-06, "adam_stats/lm_head/lr_effective_std": 2.2855125791920727e-07, "adam_stats/lr_effective_max": 8.375438483199105e-06, "adam_stats/lr_effective_mean": 1.4957002650306528e-11, "adam_stats/lr_effective_min": -8.156797775882296e-06, "adam_stats/m_t_max": 0.0006636746693402529, "adam_stats/m_t_mean": 1.0375784433025448e-11, "adam_stats/m_t_min": -0.0006658214842900634, "adam_stats/v_t_max": 2.4837052478687838e-05, "adam_stats/v_t_mean": 1.731080665097473e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.7291666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.19956141710281372, "all_logprobs": -0.01004598755389452, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.0, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -2.2649765014648438e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00015335110947489738, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.018276561051607132, "clip_ratio": 0.0, "completion_length": 519.0104370117188, "completion_length/correct": 438.68572998046875, "completion_length/correct/max": 917.0, "completion_length/correct/median": 363.0, "completion_length/correct/min": 200.0, "completion_length/correct/p25": 353.0, "completion_length/correct/p75": 599.0, "completion_length/correct/var": 31870.16015625, "completion_length/incorrect": 735.269287109375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 778.0, "completion_length/incorrect/min": 356.0, "completion_length/incorrect/p25": 562.5, "completion_length/incorrect/p75": 896.25, "completion_length/incorrect/var": 47586.046875, "completion_length/max": 1024.0, "completion_length/median": 415.0, "completion_length/min": 200.0, "completion_length/p25": 360.0, "completion_length/p75": 674.0, "completion_length/var": 53224.22265625, "curvature_clip_ratio_token_fisher": 0.00449573528021574, "curvature_clip_ratio_token_hessian": 0.002348218811675906, "curvature_clip_ratio_total_fisher": 0.00449573528021574, "curvature_clip_ratio_total_full": 0.00449573528021574, "curvature_clip_ratio_total_hessian": 0.002348218811675906, "epoch": 0.1312, "feature_vector_variance/max_squared_error": 64322.63671875, "feature_vector_variance/metric": 31424.107421875, "generated_tokens/total": 4551912.0, "global_fisher_curvature": 216064.0, "global_fisher_curvature/max": 216064.0, "global_fisher_curvature/median": 216064.0, "global_fisher_curvature/min": 216064.0, "global_fisher_curvature/p25": 216064.0, "global_fisher_curvature/p75": 216064.0, "global_fisher_curvature/p85": 216064.0, "global_fisher_curvature/p90": 216064.0, "global_fisher_curvature/p95": 216064.0, "global_fisher_curvature/p99": 216064.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.738088369369507e-07, "global_fisher_kl_divergence/max": 2.738088369369507e-07, "global_fisher_kl_divergence/median": 2.738088369369507e-07, "global_fisher_kl_divergence/min": 2.738088369369507e-07, "global_fisher_kl_divergence/p25": 2.738088369369507e-07, "global_fisher_kl_divergence/p75": 2.738088369369507e-07, "global_fisher_kl_divergence/p85": 2.738088369369507e-07, "global_fisher_kl_divergence/p90": 2.738088369369507e-07, "global_fisher_kl_divergence/p95": 2.738088369369507e-07, "global_fisher_kl_divergence/p99": 2.738088369369507e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.01556396484375, "global_full_update_term/max": 0.01556396484375, "global_full_update_term/median": 0.01556396484375, "global_full_update_term/min": 0.01556396484375, "global_full_update_term/p25": 0.01556396484375, "global_full_update_term/p75": 0.01556396484375, "global_full_update_term/p85": 0.01556396484375, "global_full_update_term/p90": 0.01556396484375, "global_full_update_term/p95": 0.01556396484375, "global_full_update_term/p99": 0.01556396484375, "global_full_update_term/var": NaN, "global_hessian_coeff": 78336.0, "global_hessian_coeff/max": 78336.0, "global_hessian_coeff/median": 78336.0, "global_hessian_coeff/min": 78336.0, "global_hessian_coeff/p25": 78336.0, "global_hessian_coeff/p75": 78336.0, "global_hessian_coeff/p99": 78336.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 78336.0, "global_hessian_coeff_abs/max": 78336.0, "global_hessian_coeff_abs/median": 78336.0, "global_hessian_coeff_abs/min": 78336.0, "global_hessian_coeff_abs/p25": 78336.0, "global_hessian_coeff_abs/p75": 78336.0, "global_hessian_coeff_abs/p99": 78336.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.19259484112262726, "learning_rate": 1.432372542187895e-06, "loss": -0.7292, "masked_global_fisher_curvature": 10432.0, "masked_global_fisher_curvature/max": 10432.0, "masked_global_fisher_curvature/median": 10432.0, "masked_global_fisher_curvature/min": 10432.0, "masked_global_fisher_curvature/p25": 10432.0, "masked_global_fisher_curvature/p75": 10432.0, "masked_global_fisher_curvature/p85": 10432.0, "masked_global_fisher_curvature/p90": 10432.0, "masked_global_fisher_curvature/p95": 10432.0, "masked_global_fisher_curvature/p99": 10432.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/max": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/median": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/min": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/p25": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/p75": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/p85": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/p90": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/p95": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/p99": 1.3213139027357101e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.002349853515625, "masked_global_full_update_term/max": 0.002349853515625, "masked_global_full_update_term/median": 0.002349853515625, "masked_global_full_update_term/min": 0.002349853515625, "masked_global_full_update_term/p25": 0.002349853515625, "masked_global_full_update_term/p75": 0.002349853515625, "masked_global_full_update_term/p85": 0.002349853515625, "masked_global_full_update_term/p90": 0.002349853515625, "masked_global_full_update_term/p95": 0.002349853515625, "masked_global_full_update_term/p99": 0.002349853515625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -29440.0, "masked_global_hessian_coeff/max": -29440.0, "masked_global_hessian_coeff/median": -29440.0, "masked_global_hessian_coeff/min": -29440.0, "masked_global_hessian_coeff/p25": -29440.0, "masked_global_hessian_coeff/p75": -29440.0, "masked_global_hessian_coeff/p99": -29440.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 29440.0, "masked_global_hessian_coeff_abs/max": 29440.0, "masked_global_hessian_coeff_abs/median": 29440.0, "masked_global_hessian_coeff_abs/min": 29440.0, "masked_global_hessian_coeff_abs/p25": 29440.0, "masked_global_hessian_coeff_abs/p75": 29440.0, "masked_global_hessian_coeff_abs/p99": 29440.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 7.309398651123047, "masked_per_sentence_gradient_norm/max": 49.25, "masked_per_sentence_gradient_norm/median": 3.328125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 10.3125, "masked_per_sentence_gradient_norm/var": 103.75482177734375, "masked_per_token_gradient_norm": 0.14611761271953583, "masked_per_token_gradient_norm/max": 59.25, "masked_per_token_gradient_norm/median": 2.7853275241795927e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 4.0978193283081055e-08, "masked_per_token_gradient_norm/var": 3.976914644241333, "masked_sentence_fisher_curvature": 12324.130859375, "masked_sentence_fisher_curvature/max": 47104.0, "masked_sentence_fisher_curvature/median": 3840.0, "masked_sentence_fisher_curvature/min": 0.0194091796875, "masked_sentence_fisher_curvature/p25": 1360.0, "masked_sentence_fisher_curvature/p75": 21248.0, "masked_sentence_fisher_curvature/p85": 28416.0, "masked_sentence_fisher_curvature/p90": 37632.0, "masked_sentence_fisher_curvature/p95": 39552.0, "masked_sentence_fisher_curvature/p99": 46131.203125, "masked_sentence_fisher_curvature/var": 200076448.0, "masked_sentence_fisher_kl_divergence": 1.5576192780031306e-08, "masked_sentence_fisher_kl_divergence/max": 5.960464477539063e-08, "masked_sentence_fisher_kl_divergence/median": 4.8603396862745285e-09, "masked_sentence_fisher_kl_divergence/min": 2.453592884421596e-14, "masked_sentence_fisher_kl_divergence/p25": 1.7171259969472885e-09, "masked_sentence_fisher_kl_divergence/p75": 2.6891939342021942e-08, "masked_sentence_fisher_kl_divergence/p85": 3.591412678360939e-08, "masked_sentence_fisher_kl_divergence/p90": 4.7497451305389404e-08, "masked_sentence_fisher_kl_divergence/p95": 5.005858838558197e-08, "masked_sentence_fisher_kl_divergence/p99": 5.827751436981998e-08, "masked_sentence_fisher_kl_divergence/var": 3.1952652986031534e-16, "masked_sentence_full_gradient_variance/max_squared_error": 150.4138641357422, "masked_sentence_full_gradient_variance/metric": 150.4138641357422, "masked_sentence_full_gradient_variance/p75": 150.4138641357422, "masked_sentence_full_gradient_variance/p90": 150.4138641357422, "masked_sentence_full_gradient_variance/p95": 150.4138641357422, "masked_sentence_full_gradient_variance/p99": 150.4138641357422, "masked_sentence_full_update_term": 0.0009223639499396086, "masked_sentence_full_update_term/max": 0.005645751953125, "masked_sentence_full_update_term/median": 0.0004291534423828125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0014476776123046875, "masked_sentence_full_update_term/p85": 0.002696990966796875, "masked_sentence_full_update_term/p90": 0.0030364990234375, "masked_sentence_full_update_term/p95": 0.003387451171875, "masked_sentence_full_update_term/p99": 0.0041671800427138805, "masked_sentence_full_update_term/var": 1.6339424746547593e-06, "masked_sentence_hessian_coeff": -76837.03125, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -53760.0, "masked_sentence_hessian_coeff/min": -274432.0, "masked_sentence_hessian_coeff/p25": -139008.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 6882376192.0, "masked_sentence_hessian_coeff_abs": 76837.03125, "masked_sentence_hessian_coeff_abs/max": 274432.0, "masked_sentence_hessian_coeff_abs/median": 53760.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 139008.0, "masked_sentence_hessian_coeff_abs/p99": 274432.0, "masked_sentence_hessian_coeff_abs/var": 6882376192.0, "masked_token_fisher_curvature": 13133.853515625, "masked_token_fisher_curvature/max": 7831552.0, "masked_token_fisher_curvature/median": 1.496198998029996e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 8.602678370551488e-23, "masked_token_fisher_curvature/p75": 8.384404281969182e-13, "masked_token_fisher_curvature/p85": 6.948539521545172e-10, "masked_token_fisher_curvature/p90": 1.555308699607849e-07, "masked_token_fisher_curvature/p95": 0.0020599365234375, "masked_token_fisher_curvature/p99": 39680.0, "masked_token_fisher_curvature/var": 52516343808.0, "masked_token_fisher_kl_divergence": 1.6607453190431443e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 1.8932661725304283e-29, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.0908587575630828e-34, "masked_token_fisher_kl_divergence/p75": 1.0598251598335667e-24, "masked_token_fisher_kl_divergence/p85": 8.801201717564214e-22, "masked_token_fisher_kl_divergence/p90": 1.9651164376299768e-19, "masked_token_fisher_kl_divergence/p95": 2.609024107869118e-15, "masked_token_fisher_kl_divergence/p99": 5.005858838558197e-08, "masked_token_fisher_kl_divergence/var": 8.399500577493599e-14, "masked_token_full_update_term": 1.2301079550525174e-05, "masked_token_full_update_term/max": 0.004425048828125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.933906555175781e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.1102230246251565e-15, "masked_token_full_update_term/p85": 1.8562928971732617e-13, "masked_token_full_update_term/p90": 4.746425474877469e-12, "masked_token_full_update_term/p95": 1.4915713109076023e-09, "masked_token_full_update_term/p99": 9.870529174804688e-05, "masked_token_full_update_term/var": 2.8485240832765157e-08, "masked_token_hessian_coeff": -88694.3984375, "masked_token_hessian_coeff/max": 21632.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -26214400.0, "masked_token_hessian_coeff/p25": -2.0116567611694336e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0107421875, "masked_token_hessian_coeff/var": 1300317863936.0, "masked_token_hessian_coeff_abs": 88696.296875, "masked_token_hessian_coeff_abs/max": 26214400.0, "masked_token_hessian_coeff_abs/median": 7.8580342233181e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.7881393432617188e-05, "masked_token_hessian_coeff_abs/p99": 905216.0, "masked_token_hessian_coeff_abs/var": 1300317601792.0, "mean_logprobs": -0.00982666015625, "mean_logprobs/var": 7.534027099609375e-05, "num_completions/total": 7872, "per_sentence_gradient_norm": 43.521484375, "per_sentence_gradient_norm/max": 270.0, "per_sentence_gradient_norm/median": 19.875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 64.8125, "per_sentence_gradient_norm/var": 3213.107666015625, "per_token_feature_norm": 190.6735382080078, "per_token_feature_norm/max": 270.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 105.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 148.88055419921875, "per_token_gradient_norm": 0.7526106834411621, "per_token_gradient_norm/max": 294.0, "per_token_gradient_norm/median": 3.197442310920451e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 4.377216100692749e-08, "per_token_gradient_norm/var": 103.52178955078125, "per_token_policy_error_norm": 0.005781132262200117, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006001966539770365, "policy_entropy": 0.009555967524647713, "policy_entropy/max": 1.5078125, "policy_entropy/median": 1.2878444977104664e-09, "policy_entropy/min": 1.8131799027162367e-21, "policy_entropy/p25": 4.774847184307873e-12, "policy_entropy/p75": 1.8067657947540283e-07, "policy_entropy/var": 0.005125014111399651, "policy_loss": -0.7291666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.19956141710281372, "policy_sharpness": 9.727133750915527, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7332279682159424, "reward": 0.7291666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.19956141710281372, "rewards/accuracy_reward": 0.7291666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.19956141710281372, "sentence_fisher_curvature": 353354.34375, "sentence_fisher_curvature/max": 2670592.0, "sentence_fisher_curvature/median": 200704.0, "sentence_fisher_curvature/min": 174.0, "sentence_fisher_curvature/p25": 20480.0, "sentence_fisher_curvature/p75": 508416.0, "sentence_fisher_curvature/p85": 822272.0, "sentence_fisher_curvature/p90": 872448.0, "sentence_fisher_curvature/p95": 1232896.0, "sentence_fisher_curvature/p99": 1542147.625, "sentence_fisher_curvature/var": 203371020288.0, "sentence_fisher_kl_divergence": 4.4668806253866933e-07, "sentence_fisher_kl_divergence/max": 3.382563591003418e-06, "sentence_fisher_kl_divergence/median": 2.5331974029541016e-07, "sentence_fisher_kl_divergence/min": 2.2009771782904863e-10, "sentence_fisher_kl_divergence/p25": 2.584420144557953e-08, "sentence_fisher_kl_divergence/p75": 6.416812539100647e-07, "sentence_fisher_kl_divergence/p85": 1.037493348121643e-06, "sentence_fisher_kl_divergence/p90": 1.1026859283447266e-06, "sentence_fisher_kl_divergence/p95": 1.55717134475708e-06, "sentence_fisher_kl_divergence/p99": 1.9528017674019793e-06, "sentence_fisher_kl_divergence/var": 3.253470355537441e-13, "sentence_full_gradient_variance/max_squared_error": 4976.75732421875, "sentence_full_gradient_variance/metric": 4976.75732421875, "sentence_full_gradient_variance/p75": 4976.75732421875, "sentence_full_gradient_variance/p90": 4976.75732421875, "sentence_full_gradient_variance/p95": 4976.75732421875, "sentence_full_gradient_variance/p99": 4976.75732421875, "sentence_full_update_term": 0.00529054831713438, "sentence_full_update_term/max": 0.03369140625, "sentence_full_update_term/median": 0.0030364990234375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.00754547119140625, "sentence_full_update_term/p85": 0.010833740234375, "sentence_full_update_term/p90": 0.014434814453125, "sentence_full_update_term/p95": 0.0191650390625, "sentence_full_update_term/p99": 0.029516614973545074, "sentence_full_update_term/var": 4.9196165491594e-05, "sentence_hessian_coeff": 72319.3359375, "sentence_hessian_coeff/max": 1941504.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -327680.0, "sentence_hessian_coeff/p25": -64512.0, "sentence_hessian_coeff/p75": 92160.0, "sentence_hessian_coeff/p99": 1069878.0, "sentence_hessian_coeff/var": 101524824064.0, "sentence_hessian_coeff_abs": 171810.0, "sentence_hessian_coeff_abs/max": 1941504.0, "sentence_hessian_coeff_abs/median": 64512.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 208896.0, "sentence_hessian_coeff_abs/p99": 1069878.0, "sentence_hessian_coeff_abs/var": 76980559872.0, "step": 82, "token_fisher_curvature": 310411.59375, "token_fisher_curvature/max": 218103808.0, "token_fisher_curvature/median": 1.713039432527097e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 9.016268676828002e-23, "token_fisher_curvature/p75": 1.0587086762825493e-12, "token_fisher_curvature/p85": 9.74978320300579e-10, "token_fisher_curvature/p90": 3.0547380447387695e-07, "token_fisher_curvature/p95": 0.009521484375, "token_fisher_curvature/p99": 232056.0, "token_fisher_curvature/var": 34644045070336.0, "token_fisher_kl_divergence": 3.923094311630848e-07, "token_fisher_kl_divergence/max": 0.0002765655517578125, "token_fisher_kl_divergence/median": 2.1693674893577825e-29, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.1359977406346586e-34, "token_fisher_kl_divergence/p75": 1.3377061468630994e-24, "token_fisher_kl_divergence/p85": 1.2308447514789052e-21, "token_fisher_kl_divergence/p90": 3.8624702394796095e-19, "token_fisher_kl_divergence/p95": 1.2045919817182948e-14, "token_fisher_kl_divergence/p99": 2.9305374482646585e-07, "token_fisher_kl_divergence/var": 5.5321979636202556e-11, "token_full_update_term": 6.504715565824881e-05, "token_full_update_term/max": 0.0238037109375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.933906555175781e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.304512053934559e-15, "token_full_update_term/p85": 2.2382096176443156e-13, "token_full_update_term/p90": 6.536993168992922e-12, "token_full_update_term/p95": 3.3760443329811096e-09, "token_full_update_term/p99": 0.000518798828125, "token_full_update_term/var": 7.815414164724643e-07, "token_hessian_coeff": 21097.19140625, "token_hessian_coeff/max": 217055232.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -28704768.0, "token_hessian_coeff/p25": -2.130866050720215e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.02047872543334961, "token_hessian_coeff/var": 24604846325760.0, "token_hessian_coeff_abs": 301232.65625, "token_hessian_coeff_abs/max": 217055232.0, "token_hessian_coeff_abs/median": 8.694769348949194e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.990795135498047e-05, "token_hessian_coeff_abs/p99": 4249664.0, "token_hessian_coeff_abs/var": 24514547154944.0 }, { "accuracy_reward": 0.875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.11052632331848145, "adam_stats/lm_head/lr_effective_max": 7.320005352084991e-06, "adam_stats/lm_head/lr_effective_mean": 1.2433614034190743e-11, "adam_stats/lm_head/lr_effective_min": -7.507298050768441e-06, "adam_stats/lm_head/lr_effective_std": 2.08763864861794e-07, "adam_stats/lr_effective_max": 7.364882094407221e-06, "adam_stats/lr_effective_mean": 1.3588596393943053e-11, "adam_stats/lr_effective_min": -7.648119208170101e-06, "adam_stats/m_t_max": 0.0009754102793522179, "adam_stats/m_t_mean": 7.691147198285453e-12, "adam_stats/m_t_min": -0.0009157879976555705, "adam_stats/v_t_max": 2.4900564312702045e-05, "adam_stats/v_t_mean": 1.7462669765072603e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.11052632331848145, "all_logprobs": -0.007265000604093075, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.75, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -4.76837158203125e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -2.1457672119140625e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01059720478951931, "clip_ratio": 0.0, "completion_length": 424.0, "completion_length/correct": 397.0476379394531, "completion_length/correct/max": 1019.0, "completion_length/correct/median": 363.0, "completion_length/correct/min": 250.0, "completion_length/correct/p25": 292.0, "completion_length/correct/p75": 462.0, "completion_length/correct/var": 20729.87890625, "completion_length/incorrect": 612.6666870117188, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 344.0, "completion_length/incorrect/min": 334.0, "completion_length/incorrect/p25": 338.75, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 115344.421875, "completion_length/max": 1024.0, "completion_length/median": 363.0, "completion_length/min": 250.0, "completion_length/p25": 306.75, "completion_length/p75": 477.0, "completion_length/var": 36605.578125, "curvature_clip_ratio_token_fisher": 0.005994496867060661, "curvature_clip_ratio_token_hessian": 0.0027024371083825827, "curvature_clip_ratio_total_fisher": 0.005994496867060661, "curvature_clip_ratio_total_full": 0.005994496867060661, "curvature_clip_ratio_total_hessian": 0.0027024371083825827, "epoch": 0.1328, "feature_vector_variance/max_squared_error": 63225.55078125, "feature_vector_variance/metric": 31162.51171875, "generated_tokens/total": 4592616.0, "global_fisher_curvature": 195584.0, "global_fisher_curvature/max": 195584.0, "global_fisher_curvature/median": 195584.0, "global_fisher_curvature/min": 195584.0, "global_fisher_curvature/p25": 195584.0, "global_fisher_curvature/p75": 195584.0, "global_fisher_curvature/p85": 195584.0, "global_fisher_curvature/p90": 195584.0, "global_fisher_curvature/p95": 195584.0, "global_fisher_curvature/p99": 195584.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.0023435354232788e-07, "global_fisher_kl_divergence/max": 2.0023435354232788e-07, "global_fisher_kl_divergence/median": 2.0023435354232788e-07, "global_fisher_kl_divergence/min": 2.0023435354232788e-07, "global_fisher_kl_divergence/p25": 2.0023435354232788e-07, "global_fisher_kl_divergence/p75": 2.0023435354232788e-07, "global_fisher_kl_divergence/p85": 2.0023435354232788e-07, "global_fisher_kl_divergence/p90": 2.0023435354232788e-07, "global_fisher_kl_divergence/p95": 2.0023435354232788e-07, "global_fisher_kl_divergence/p99": 2.0023435354232788e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.01055908203125, "global_full_update_term/max": 0.01055908203125, "global_full_update_term/median": 0.01055908203125, "global_full_update_term/min": 0.01055908203125, "global_full_update_term/p25": 0.01055908203125, "global_full_update_term/p75": 0.01055908203125, "global_full_update_term/p85": 0.01055908203125, "global_full_update_term/p90": 0.01055908203125, "global_full_update_term/p95": 0.01055908203125, "global_full_update_term/p99": 0.01055908203125, "global_full_update_term/var": NaN, "global_hessian_coeff": 51968.0, "global_hessian_coeff/max": 51968.0, "global_hessian_coeff/median": 51968.0, "global_hessian_coeff/min": 51968.0, "global_hessian_coeff/p25": 51968.0, "global_hessian_coeff/p75": 51968.0, "global_hessian_coeff/p99": 51968.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 51968.0, "global_hessian_coeff_abs/max": 51968.0, "global_hessian_coeff_abs/median": 51968.0, "global_hessian_coeff_abs/min": 51968.0, "global_hessian_coeff_abs/p25": 51968.0, "global_hessian_coeff_abs/p75": 51968.0, "global_hessian_coeff_abs/p99": 51968.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.31103718280792236, "learning_rate": 1.282218205837188e-06, "loss": -0.875, "masked_global_fisher_curvature": 10304.0, "masked_global_fisher_curvature/max": 10304.0, "masked_global_fisher_curvature/median": 10304.0, "masked_global_fisher_curvature/min": 10304.0, "masked_global_fisher_curvature/p25": 10304.0, "masked_global_fisher_curvature/p75": 10304.0, "masked_global_fisher_curvature/p85": 10304.0, "masked_global_fisher_curvature/p90": 10304.0, "masked_global_fisher_curvature/p95": 10304.0, "masked_global_fisher_curvature/p99": 10304.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/max": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/median": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/min": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/p25": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/p75": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/p85": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/p90": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/p95": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/p99": 1.0593794286251068e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00177764892578125, "masked_global_full_update_term/max": 0.00177764892578125, "masked_global_full_update_term/median": 0.00177764892578125, "masked_global_full_update_term/min": 0.00177764892578125, "masked_global_full_update_term/p25": 0.00177764892578125, "masked_global_full_update_term/p75": 0.00177764892578125, "masked_global_full_update_term/p85": 0.00177764892578125, "masked_global_full_update_term/p90": 0.00177764892578125, "masked_global_full_update_term/p95": 0.00177764892578125, "masked_global_full_update_term/p99": 0.00177764892578125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -40192.0, "masked_global_hessian_coeff/max": -40192.0, "masked_global_hessian_coeff/median": -40192.0, "masked_global_hessian_coeff/min": -40192.0, "masked_global_hessian_coeff/p25": -40192.0, "masked_global_hessian_coeff/p75": -40192.0, "masked_global_hessian_coeff/p99": -40192.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 40192.0, "masked_global_hessian_coeff_abs/max": 40192.0, "masked_global_hessian_coeff_abs/median": 40192.0, "masked_global_hessian_coeff_abs/min": 40192.0, "masked_global_hessian_coeff_abs/p25": 40192.0, "masked_global_hessian_coeff_abs/p75": 40192.0, "masked_global_hessian_coeff_abs/p99": 40192.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 7.6317548751831055, "masked_per_sentence_gradient_norm/max": 46.25, "masked_per_sentence_gradient_norm/median": 5.0625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 1.15625, "masked_per_sentence_gradient_norm/p75": 10.046875, "masked_per_sentence_gradient_norm/var": 77.40657043457031, "masked_per_token_gradient_norm": 0.19785763323307037, "masked_per_token_gradient_norm/max": 66.5, "masked_per_token_gradient_norm/median": 1.3460521586239338e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 1.9567680809018384e-15, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.0849907994270325e-07, "masked_per_token_gradient_norm/var": 6.415334701538086, "masked_sentence_fisher_curvature": 18795.8984375, "masked_sentence_fisher_curvature/max": 59904.0, "masked_sentence_fisher_curvature/median": 13312.0, "masked_sentence_fisher_curvature/min": 6.78125, "masked_sentence_fisher_curvature/p25": 3060.0, "masked_sentence_fisher_curvature/p75": 28032.0, "masked_sentence_fisher_curvature/p85": 42496.0, "masked_sentence_fisher_curvature/p90": 46720.0, "masked_sentence_fisher_curvature/p95": 56064.0, "masked_sentence_fisher_curvature/p99": 59904.0, "masked_sentence_fisher_curvature/var": 318438080.0, "masked_sentence_fisher_kl_divergence": 1.928161275088769e-08, "masked_sentence_fisher_kl_divergence/max": 6.146728992462158e-08, "masked_sentence_fisher_kl_divergence/median": 1.367880031466484e-08, "masked_sentence_fisher_kl_divergence/min": 6.963318810448982e-12, "masked_sentence_fisher_kl_divergence/p25": 3.1395757105201483e-09, "masked_sentence_fisher_kl_divergence/p75": 2.8783688321709633e-08, "masked_sentence_fisher_kl_divergence/p85": 4.353933036327362e-08, "masked_sentence_fisher_kl_divergence/p90": 4.784669727087021e-08, "masked_sentence_fisher_kl_divergence/p95": 5.75091689825058e-08, "masked_sentence_fisher_kl_divergence/p99": 6.146728992462158e-08, "masked_sentence_fisher_kl_divergence/var": 3.35032950267463e-16, "masked_sentence_full_gradient_variance/max_squared_error": 129.56268310546875, "masked_sentence_full_gradient_variance/metric": 129.56268310546875, "masked_sentence_full_gradient_variance/p75": 129.56268310546875, "masked_sentence_full_gradient_variance/p90": 129.56268310546875, "masked_sentence_full_gradient_variance/p95": 129.56268310546875, "masked_sentence_full_gradient_variance/p99": 129.56268310546875, "masked_sentence_full_update_term": 0.0008352994918823242, "masked_sentence_full_update_term/max": 0.00567626953125, "masked_sentence_full_update_term/median": 0.00055694580078125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00011539459228515625, "masked_sentence_full_update_term/p75": 0.0013427734375, "masked_sentence_full_update_term/p85": 0.001556396484375, "masked_sentence_full_update_term/p90": 0.0017852783203125, "masked_sentence_full_update_term/p95": 0.002262115478515625, "masked_sentence_full_update_term/p99": 0.00567626953125, "masked_sentence_full_update_term/var": 9.775512808118947e-07, "masked_sentence_hessian_coeff": -104324.671875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -93184.0, "masked_sentence_hessian_coeff/min": -309248.0, "masked_sentence_hessian_coeff/p25": -151552.0, "masked_sentence_hessian_coeff/p75": -43904.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 7287380480.0, "masked_sentence_hessian_coeff_abs": 104324.671875, "masked_sentence_hessian_coeff_abs/max": 309248.0, "masked_sentence_hessian_coeff_abs/median": 88576.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 43904.0, "masked_sentence_hessian_coeff_abs/p75": 151552.0, "masked_sentence_hessian_coeff_abs/p99": 295628.84375, "masked_sentence_hessian_coeff_abs/var": 7287380480.0, "masked_token_fisher_curvature": 19431.712890625, "masked_token_fisher_curvature/max": 9568256.0, "masked_token_fisher_curvature/median": 1.5449880957918438e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.730439128037627e-24, "masked_token_fisher_curvature/p75": 1.4477308241112041e-13, "masked_token_fisher_curvature/p85": 6.866684998385608e-11, "masked_token_fisher_curvature/p90": 1.1303654900984839e-08, "masked_token_fisher_curvature/p95": 3.552436828613281e-05, "masked_token_fisher_curvature/p99": 28701.5, "masked_token_fisher_curvature/var": 107419574272.0, "masked_token_fisher_kl_divergence": 1.994105858216244e-08, "masked_token_fisher_kl_divergence/max": 9.834766387939453e-06, "masked_token_fisher_kl_divergence/median": 1.590047762086102e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.8430367253878245e-36, "masked_token_fisher_kl_divergence/p75": 1.4863401631812216e-25, "masked_token_fisher_kl_divergence/p85": 7.031035206700735e-23, "masked_token_fisher_kl_divergence/p90": 1.1604103223200149e-20, "masked_token_fisher_kl_divergence/p95": 3.642919299551295e-17, "masked_token_fisher_kl_divergence/p99": 2.947990651591681e-08, "masked_token_fisher_kl_divergence/var": 1.1311979998540611e-13, "masked_token_full_update_term": 1.5088422514963895e-05, "masked_token_full_update_term/max": 0.00439453125, "masked_token_full_update_term/median": 2.022291161569642e-20, "masked_token_full_update_term/min": -8.493661880493164e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.191091917959966e-15, "masked_token_full_update_term/p85": 2.9665159217984183e-13, "masked_token_full_update_term/p90": 5.4569682106375694e-12, "masked_token_full_update_term/p95": 9.240466170012951e-10, "masked_token_full_update_term/p99": 0.00015931203961372375, "masked_token_full_update_term/var": 3.765426725976795e-08, "masked_token_hessian_coeff": -116472.8203125, "masked_token_hessian_coeff/max": 26624.0, "masked_token_hessian_coeff/median": -4.383764462545514e-10, "masked_token_hessian_coeff/min": -27787264.0, "masked_token_hessian_coeff/p25": -9.715557098388672e-06, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.068359375, "masked_token_hessian_coeff/var": 1923377790976.0, "masked_token_hessian_coeff_abs": 116474.8046875, "masked_token_hessian_coeff_abs/max": 27787264.0, "masked_token_hessian_coeff_abs/median": 4.563480615615845e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 6.075140390748857e-13, "masked_token_hessian_coeff_abs/p75": 5.91278076171875e-05, "masked_token_hessian_coeff_abs/p99": 1608992.0, "masked_token_hessian_coeff_abs/var": 1923377397760.0, "mean_logprobs": -0.007049560546875, "mean_logprobs/var": 2.7179718017578125e-05, "num_completions/total": 7968, "per_sentence_gradient_norm": 44.82958984375, "per_sentence_gradient_norm/max": 231.0, "per_sentence_gradient_norm/median": 25.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 1.15625, "per_sentence_gradient_norm/p75": 74.0, "per_sentence_gradient_norm/var": 2493.17236328125, "per_token_feature_norm": 190.79605102539062, "per_token_feature_norm/max": 268.0, "per_token_feature_norm/median": 191.0, "per_token_feature_norm/min": 104.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 144.34751892089844, "per_token_gradient_norm": 0.9749918580055237, "per_token_gradient_norm/max": 282.0, "per_token_gradient_norm/median": 1.482476363889873e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 2.1788126858268697e-15, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.1688098311424255e-07, "per_token_gradient_norm/var": 130.37342834472656, "per_token_policy_error_norm": 0.004327210132032633, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004138545598834753, "policy_entropy": 0.007612745277583599, "policy_entropy/max": 0.91015625, "policy_entropy/median": 3.80168785341084e-10, "policy_entropy/min": 1.5881867761018131e-22, "policy_entropy/p25": 1.2505552149377763e-12, "policy_entropy/p75": 8.021015673875809e-08, "policy_entropy/var": 0.003688955679535866, "policy_loss": -0.875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.11052632331848145, "policy_sharpness": 9.784806251525879, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.411865234375, "reward": 0.875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.11052632331848145, "rewards/accuracy_reward": 0.875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.11052632331848145, "sentence_fisher_curvature": 390834.25, "sentence_fisher_curvature/max": 1310720.0, "sentence_fisher_curvature/median": 301056.0, "sentence_fisher_curvature/min": 6.78125, "sentence_fisher_curvature/p25": 4672.0, "sentence_fisher_curvature/p75": 643072.0, "sentence_fisher_curvature/p85": 775168.0, "sentence_fisher_curvature/p90": 911360.0, "sentence_fisher_curvature/p95": 1083392.0, "sentence_fisher_curvature/p99": 1310720.0, "sentence_fisher_curvature/var": 131941810176.0, "sentence_fisher_kl_divergence": 4.0103890341924853e-07, "sentence_fisher_kl_divergence/max": 1.341104507446289e-06, "sentence_fisher_kl_divergence/median": 3.0919909477233887e-07, "sentence_fisher_kl_divergence/min": 6.963318810448982e-12, "sentence_fisher_kl_divergence/p25": 4.794856067746878e-09, "sentence_fisher_kl_divergence/p75": 6.593763828277588e-07, "sentence_fisher_kl_divergence/p85": 7.962808012962341e-07, "sentence_fisher_kl_divergence/p90": 9.350478649139404e-07, "sentence_fisher_kl_divergence/p95": 1.1119991540908813e-06, "sentence_fisher_kl_divergence/p99": 1.341104507446289e-06, "sentence_fisher_kl_divergence/var": 1.3875078657635959e-13, "sentence_full_gradient_variance/max_squared_error": 4362.89404296875, "sentence_full_gradient_variance/metric": 4362.89404296875, "sentence_full_gradient_variance/p75": 4362.89404296875, "sentence_full_gradient_variance/p90": 4362.89404296875, "sentence_full_gradient_variance/p95": 4362.89404296875, "sentence_full_gradient_variance/p99": 4362.89404296875, "sentence_full_update_term": 0.005041231866925955, "sentence_full_update_term/max": 0.0269775390625, "sentence_full_update_term/median": 0.00341796875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00011539459228515625, "sentence_full_update_term/p75": 0.0089874267578125, "sentence_full_update_term/p85": 0.01043701171875, "sentence_full_update_term/p90": 0.011871337890625, "sentence_full_update_term/p95": 0.0142822265625, "sentence_full_update_term/p99": 0.017236359417438507, "sentence_full_update_term/var": 2.735385533014778e-05, "sentence_hessian_coeff": 24357.375, "sentence_hessian_coeff/max": 647168.0, "sentence_hessian_coeff/median": -1848.0, "sentence_hessian_coeff/min": -452608.0, "sentence_hessian_coeff/p25": -53952.0, "sentence_hessian_coeff/p75": 88064.0, "sentence_hessian_coeff/p99": 616038.5, "sentence_hessian_coeff/var": 51308257280.0, "sentence_hessian_coeff_abs": 154588.046875, "sentence_hessian_coeff_abs/max": 647168.0, "sentence_hessian_coeff_abs/median": 76288.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 5672.0, "sentence_hessian_coeff_abs/p75": 243712.0, "sentence_hessian_coeff_abs/p99": 616038.5, "sentence_hessian_coeff_abs/var": 27758761984.0, "step": 83, "token_fisher_curvature": 389713.40625, "token_fisher_curvature/max": 222298112.0, "token_fisher_curvature/median": 1.748276003132876e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.169878828456423e-24, "token_fisher_curvature/p75": 1.758593271006248e-13, "token_fisher_curvature/p85": 1.0368239600211382e-10, "token_fisher_curvature/p90": 2.088472683681175e-08, "token_fisher_curvature/p95": 0.00016726553440093994, "token_fisher_curvature/p99": 945664.0, "token_fisher_curvature/var": 42191267299328.0, "token_fisher_kl_divergence": 3.997367059582757e-07, "token_fisher_kl_divergence/max": 0.00022792816162109375, "token_fisher_kl_divergence/median": 1.7995889400354332e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.3132344657167395e-36, "token_fisher_kl_divergence/p75": 1.8013796542902849e-25, "token_fisher_kl_divergence/p85": 1.0670629901934057e-22, "token_fisher_kl_divergence/p90": 2.1429768129411288e-20, "token_fisher_kl_divergence/p95": 1.7194430016083395e-16, "token_fisher_kl_divergence/p99": 9.6810981631279e-07, "token_fisher_kl_divergence/var": 4.439144191126232e-11, "token_full_update_term": 7.593040936626494e-05, "token_full_update_term/max": 0.0216064453125, "token_full_update_term/median": 2.6046263128069735e-20, "token_full_update_term/min": -8.493661880493164e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.9682480351975755e-15, "token_full_update_term/p85": 3.9257486150745535e-13, "token_full_update_term/p90": 7.958078640513122e-12, "token_full_update_term/p95": 2.1973391994833946e-09, "token_full_update_term/p99": 0.0012581348419189453, "token_full_update_term/var": 7.973915217007743e-07, "token_hessian_coeff": 1975.730224609375, "token_hessian_coeff/max": 221249536.0, "token_hessian_coeff/median": -4.420144250616431e-10, "token_hessian_coeff/min": -29097984.0, "token_hessian_coeff/p25": -1.0132789611816406e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.158203125, "token_hessian_coeff/var": 29108295696384.0, "token_hessian_coeff_abs": 384737.5625, "token_hessian_coeff_abs/max": 221249536.0, "token_hessian_coeff_abs/median": 4.912726581096649e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 6.608047442568932e-13, "token_hessian_coeff_abs/p75": 6.914138793945312e-05, "token_hessian_coeff_abs/p99": 9306112.0, "token_hessian_coeff_abs/var": 28960270319616.0 }, { "accuracy_reward": 0.6770833730697632, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.22094300389289856, "adam_stats/lm_head/lr_effective_max": 6.345657766360091e-06, "adam_stats/lm_head/lr_effective_mean": -1.7348540139183743e-12, "adam_stats/lm_head/lr_effective_min": -6.560980637004832e-06, "adam_stats/lm_head/lr_effective_std": 1.8817354430211708e-07, "adam_stats/lr_effective_max": 6.7391583797871135e-06, "adam_stats/lr_effective_mean": 1.0002321020052829e-11, "adam_stats/lr_effective_min": -6.823128387623001e-06, "adam_stats/m_t_max": 0.0012548449449241161, "adam_stats/m_t_mean": 4.9083397936355855e-12, "adam_stats/m_t_min": -0.001556631177663803, "adam_stats/v_t_max": 2.4876408133422956e-05, "adam_stats/v_t_mean": 1.7504170855831003e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6770833730697632, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.22094300389289856, "all_logprobs": -0.007957919500768185, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.75, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -1.0728836059570312e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -4.5299530029296875e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012265777215361595, "clip_ratio": 0.0, "completion_length": 442.46875, "completion_length/correct": 387.03076171875, "completion_length/correct/max": 789.0, "completion_length/correct/median": 344.0, "completion_length/correct/min": 254.0, "completion_length/correct/p25": 267.0, "completion_length/correct/p75": 519.0, "completion_length/correct/var": 22182.216796875, "completion_length/incorrect": 558.7096557617188, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 430.0, "completion_length/incorrect/min": 280.0, "completion_length/incorrect/p25": 284.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 99120.3515625, "completion_length/max": 1024.0, "completion_length/median": 362.0, "completion_length/min": 254.0, "completion_length/p25": 278.5, "completion_length/p75": 528.75, "completion_length/var": 52756.96875, "curvature_clip_ratio_token_fisher": 0.004237587563693523, "curvature_clip_ratio_token_hessian": 0.0019539985805749893, "curvature_clip_ratio_total_fisher": 0.004237587563693523, "curvature_clip_ratio_total_full": 0.004237587563693523, "curvature_clip_ratio_total_hessian": 0.0019539985805749893, "epoch": 0.1344, "feature_vector_variance/max_squared_error": 75652.984375, "feature_vector_variance/metric": 31157.001953125, "generated_tokens/total": 4635093.0, "global_fisher_curvature": 154624.0, "global_fisher_curvature/max": 154624.0, "global_fisher_curvature/median": 154624.0, "global_fisher_curvature/min": 154624.0, "global_fisher_curvature/p25": 154624.0, "global_fisher_curvature/p75": 154624.0, "global_fisher_curvature/p85": 154624.0, "global_fisher_curvature/p90": 154624.0, "global_fisher_curvature/p95": 154624.0, "global_fisher_curvature/p99": 154624.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.2665987014770508e-07, "global_fisher_kl_divergence/max": 1.2665987014770508e-07, "global_fisher_kl_divergence/median": 1.2665987014770508e-07, "global_fisher_kl_divergence/min": 1.2665987014770508e-07, "global_fisher_kl_divergence/p25": 1.2665987014770508e-07, "global_fisher_kl_divergence/p75": 1.2665987014770508e-07, "global_fisher_kl_divergence/p85": 1.2665987014770508e-07, "global_fisher_kl_divergence/p90": 1.2665987014770508e-07, "global_fisher_kl_divergence/p95": 1.2665987014770508e-07, "global_fisher_kl_divergence/p99": 1.2665987014770508e-07, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.027099609375, "global_full_update_term/max": 0.027099609375, "global_full_update_term/median": 0.027099609375, "global_full_update_term/min": 0.027099609375, "global_full_update_term/p25": 0.027099609375, "global_full_update_term/p75": 0.027099609375, "global_full_update_term/p85": 0.027099609375, "global_full_update_term/p90": 0.027099609375, "global_full_update_term/p95": 0.027099609375, "global_full_update_term/p99": 0.027099609375, "global_full_update_term/var": NaN, "global_hessian_coeff": 51712.0, "global_hessian_coeff/max": 51712.0, "global_hessian_coeff/median": 51712.0, "global_hessian_coeff/min": 51712.0, "global_hessian_coeff/p25": 51712.0, "global_hessian_coeff/p75": 51712.0, "global_hessian_coeff/p99": 51712.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 51712.0, "global_hessian_coeff_abs/max": 51712.0, "global_hessian_coeff_abs/median": 51712.0, "global_hessian_coeff_abs/min": 51712.0, "global_hessian_coeff_abs/p25": 51712.0, "global_hessian_coeff_abs/p75": 51712.0, "global_hessian_coeff_abs/p99": 51712.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.24554981291294098, "learning_rate": 1.1396392788268054e-06, "loss": -0.6771, "masked_global_fisher_curvature": 11328.0, "masked_global_fisher_curvature/max": 11328.0, "masked_global_fisher_curvature/median": 11328.0, "masked_global_fisher_curvature/min": 11328.0, "masked_global_fisher_curvature/p25": 11328.0, "masked_global_fisher_curvature/p75": 11328.0, "masked_global_fisher_curvature/p85": 11328.0, "masked_global_fisher_curvature/p90": 11328.0, "masked_global_fisher_curvature/p95": 11328.0, "masked_global_fisher_curvature/p99": 11328.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/max": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/median": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/min": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/p25": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/p75": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/p85": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/p90": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/p95": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/p99": 9.313225746154785e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.005615234375, "masked_global_full_update_term/max": 0.005615234375, "masked_global_full_update_term/median": 0.005615234375, "masked_global_full_update_term/min": 0.005615234375, "masked_global_full_update_term/p25": 0.005615234375, "masked_global_full_update_term/p75": 0.005615234375, "masked_global_full_update_term/p85": 0.005615234375, "masked_global_full_update_term/p90": 0.005615234375, "masked_global_full_update_term/p95": 0.005615234375, "masked_global_full_update_term/p99": 0.005615234375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -28928.0, "masked_global_hessian_coeff/max": -28928.0, "masked_global_hessian_coeff/median": -28928.0, "masked_global_hessian_coeff/min": -28928.0, "masked_global_hessian_coeff/p25": -28928.0, "masked_global_hessian_coeff/p75": -28928.0, "masked_global_hessian_coeff/p99": -28928.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 28928.0, "masked_global_hessian_coeff_abs/max": 28928.0, "masked_global_hessian_coeff_abs/median": 28928.0, "masked_global_hessian_coeff_abs/min": 28928.0, "masked_global_hessian_coeff_abs/p25": 28928.0, "masked_global_hessian_coeff_abs/p75": 28928.0, "masked_global_hessian_coeff_abs/p99": 28928.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 8.806559562683105, "masked_per_sentence_gradient_norm/max": 50.0, "masked_per_sentence_gradient_norm/median": 3.96875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 15.8125, "masked_per_sentence_gradient_norm/var": 139.49783325195312, "masked_per_token_gradient_norm": 0.1641460359096527, "masked_per_token_gradient_norm/max": 63.75, "masked_per_token_gradient_norm/median": 5.115907697472721e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.584420144557953e-08, "masked_per_token_gradient_norm/var": 5.285000801086426, "masked_sentence_fisher_curvature": 15928.9951171875, "masked_sentence_fisher_curvature/max": 125952.0, "masked_sentence_fisher_curvature/median": 7456.0, "masked_sentence_fisher_curvature/min": 61.5, "masked_sentence_fisher_curvature/p25": 627.5, "masked_sentence_fisher_curvature/p75": 20480.0, "masked_sentence_fisher_curvature/p85": 39424.0, "masked_sentence_fisher_curvature/p90": 46336.0, "masked_sentence_fisher_curvature/p95": 61376.0, "masked_sentence_fisher_curvature/p99": 97740.890625, "masked_sentence_fisher_curvature/var": 534889600.0, "masked_sentence_fisher_kl_divergence": 1.3089589856463135e-08, "masked_sentence_fisher_kl_divergence/max": 1.0337680578231812e-07, "masked_sentence_fisher_kl_divergence/median": 6.1409082263708115e-09, "masked_sentence_fisher_kl_divergence/min": 5.047695594839752e-11, "masked_sentence_fisher_kl_divergence/p25": 5.165929906070232e-10, "masked_sentence_fisher_kl_divergence/p75": 1.6851117834448814e-08, "masked_sentence_fisher_kl_divergence/p85": 3.247987478971481e-08, "masked_sentence_fisher_kl_divergence/p90": 3.8067810237407684e-08, "masked_sentence_fisher_kl_divergence/p95": 5.0407834351062775e-08, "masked_sentence_fisher_kl_divergence/p99": 8.037321208576031e-08, "masked_sentence_fisher_kl_divergence/var": 3.608775401447474e-16, "masked_sentence_full_gradient_variance/max_squared_error": 207.9127197265625, "masked_sentence_full_gradient_variance/metric": 207.9127197265625, "masked_sentence_full_gradient_variance/p75": 207.9127197265625, "masked_sentence_full_gradient_variance/p90": 207.9127197265625, "masked_sentence_full_gradient_variance/p95": 207.9127197265625, "masked_sentence_full_gradient_variance/p99": 207.9127197265625, "masked_sentence_full_update_term": 0.000931084156036377, "masked_sentence_full_update_term/max": 0.007354736328125, "masked_sentence_full_update_term/median": 0.000209808349609375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0015506744384765625, "masked_sentence_full_update_term/p85": 0.002048492431640625, "masked_sentence_full_update_term/p90": 0.00254058837890625, "masked_sentence_full_update_term/p95": 0.003704071044921875, "masked_sentence_full_update_term/p99": 0.00564423156902194, "masked_sentence_full_update_term/var": 1.8515790998208104e-06, "masked_sentence_hessian_coeff": -86924.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -38912.0, "masked_sentence_hessian_coeff/min": -460800.0, "masked_sentence_hessian_coeff/p25": -144640.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 11123812352.0, "masked_sentence_hessian_coeff_abs": 86924.0, "masked_sentence_hessian_coeff_abs/max": 460800.0, "masked_sentence_hessian_coeff_abs/median": 21120.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 144640.0, "masked_sentence_hessian_coeff_abs/p99": 412160.15625, "masked_sentence_hessian_coeff_abs/var": 11123812352.0, "masked_token_fisher_curvature": 18066.263671875, "masked_token_fisher_curvature/max": 11730944.0, "masked_token_fisher_curvature/median": 4.824699667560495e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 3.019209235818551e-23, "masked_token_fisher_curvature/p75": 5.293543381412746e-13, "masked_token_fisher_curvature/p85": 2.8194335754960775e-10, "masked_token_fisher_curvature/p90": 3.608874976634979e-08, "masked_token_fisher_curvature/p95": 0.00020885467529296875, "masked_token_fisher_curvature/p99": 26649.0, "masked_token_fisher_curvature/var": 115342458880.0, "masked_token_fisher_kl_divergence": 1.4842438389450763e-08, "masked_token_fisher_kl_divergence/max": 9.655952453613281e-06, "masked_token_fisher_kl_divergence/median": 3.9689564293932156e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.482644068936671e-35, "masked_token_fisher_kl_divergence/p75": 4.362085261510107e-25, "masked_token_fisher_kl_divergence/p85": 2.3161057151484775e-22, "masked_token_fisher_kl_divergence/p90": 2.964615315390051e-20, "masked_token_fisher_kl_divergence/p95": 1.717376241217039e-16, "masked_token_fisher_kl_divergence/p99": 2.1908817871008068e-08, "masked_token_fisher_kl_divergence/var": 7.780399452960926e-14, "masked_token_full_update_term": 1.1584624189708848e-05, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.4421682357788086e-06, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 2.6194324487249787e-16, "masked_token_full_update_term/p85": 5.3734794391857577e-14, "masked_token_full_update_term/p90": 1.7266188478970435e-12, "masked_token_full_update_term/p95": 2.473825588822365e-10, "masked_token_full_update_term/p99": 6.914138793945312e-05, "masked_token_full_update_term/var": 2.684870459290778e-08, "masked_token_hessian_coeff": -100267.0078125, "masked_token_hessian_coeff/max": 1920.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -29097984.0, "masked_token_hessian_coeff/p25": -6.742775440216064e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.04154205322265625, "masked_token_hessian_coeff/var": 1672034910208.0, "masked_token_hessian_coeff_abs": 100267.6484375, "masked_token_hessian_coeff_abs/max": 29097984.0, "masked_token_hessian_coeff_abs/median": 1.800799509510398e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.2516975402832031e-05, "masked_token_hessian_coeff_abs/p99": 725312.0, "masked_token_hessian_coeff_abs/var": 1672034910208.0, "mean_logprobs": -0.00738525390625, "mean_logprobs/var": 4.363059997558594e-05, "num_completions/total": 8064, "per_sentence_gradient_norm": 41.13671875, "per_sentence_gradient_norm/max": 278.0, "per_sentence_gradient_norm/median": 13.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 56.5, "per_sentence_gradient_norm/var": 3709.35009765625, "per_token_feature_norm": 190.1078643798828, "per_token_feature_norm/max": 286.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 102.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 130.56967163085938, "per_token_gradient_norm": 0.761073887348175, "per_token_gradient_norm/max": 284.0, "per_token_gradient_norm/median": 5.684341886080801e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.9685907065868378e-08, "per_token_gradient_norm/var": 103.90804290771484, "per_token_policy_error_norm": 0.004655828233808279, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004537784960120916, "policy_entropy": 0.008344891481101513, "policy_entropy/max": 2.75, "policy_entropy/median": 7.130438461899757e-10, "policy_entropy/min": 5.241016361135983e-21, "policy_entropy/p25": 2.2026824808563106e-12, "policy_entropy/p75": 1.2852251529693604e-07, "policy_entropy/var": 0.004624331835657358, "policy_loss": -0.6770833730697632, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.22094300389289856, "policy_sharpness": 9.764900207519531, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.5252375602722168, "reward": 0.6770833730697632, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.22094300389289856, "rewards/accuracy_reward": 0.6770833730697632, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.22094300389289856, "sentence_fisher_curvature": 334762.125, "sentence_fisher_curvature/max": 2146304.0, "sentence_fisher_curvature/median": 116736.0, "sentence_fisher_curvature/min": 61.5, "sentence_fisher_curvature/p25": 978.0, "sentence_fisher_curvature/p75": 527872.0, "sentence_fisher_curvature/p85": 692224.0, "sentence_fisher_curvature/p90": 917504.0, "sentence_fisher_curvature/p95": 1112064.0, "sentence_fisher_curvature/p99": 1648232.0, "sentence_fisher_curvature/var": 179288244224.0, "sentence_fisher_kl_divergence": 2.7527858037501574e-07, "sentence_fisher_kl_divergence/max": 1.7657876014709473e-06, "sentence_fisher_kl_divergence/median": 9.592622518539429e-08, "sentence_fisher_kl_divergence/min": 5.047695594839752e-11, "sentence_fisher_kl_divergence/p25": 8.039933163672686e-10, "sentence_fisher_kl_divergence/p75": 4.330649971961975e-07, "sentence_fisher_kl_divergence/p85": 5.699694156646729e-07, "sentence_fisher_kl_divergence/p90": 7.543712854385376e-07, "sentence_fisher_kl_divergence/p95": 9.154900908470154e-07, "sentence_fisher_kl_divergence/p99": 1.355261929347762e-06, "sentence_fisher_kl_divergence/var": 1.213045641582436e-13, "sentence_full_gradient_variance/max_squared_error": 5287.9404296875, "sentence_full_gradient_variance/metric": 5287.9404296875, "sentence_full_gradient_variance/p75": 5287.9404296875, "sentence_full_gradient_variance/p90": 5287.9404296875, "sentence_full_gradient_variance/p95": 5287.9404296875, "sentence_full_gradient_variance/p99": 5287.9404296875, "sentence_full_update_term": 0.0046114129945635796, "sentence_full_update_term/max": 0.04736328125, "sentence_full_update_term/median": 0.0015869140625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.006988525390625, "sentence_full_update_term/p85": 0.0089263916015625, "sentence_full_update_term/p90": 0.01116943359375, "sentence_full_update_term/p95": 0.013824462890625, "sentence_full_update_term/p99": 0.03391117602586746, "sentence_full_update_term/var": 5.095990854897536e-05, "sentence_hessian_coeff": 43825.0, "sentence_hessian_coeff/max": 1318912.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -460800.0, "sentence_hessian_coeff/p25": -23808.0, "sentence_hessian_coeff/p75": 40448.0, "sentence_hessian_coeff/p99": 867534.25, "sentence_hessian_coeff/var": 63434989568.0, "sentence_hessian_coeff_abs": 133430.34375, "sentence_hessian_coeff_abs/max": 1318912.0, "sentence_hessian_coeff_abs/median": 32256.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 178688.0, "sentence_hessian_coeff_abs/p99": 867534.25, "sentence_hessian_coeff_abs/var": 47384772608.0, "step": 84, "token_fisher_curvature": 332647.96875, "token_fisher_curvature/max": 229638144.0, "token_fisher_curvature/median": 5.285485590866834e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 3.288042934898285e-23, "token_fisher_curvature/p75": 6.430411758628907e-13, "token_fisher_curvature/p85": 3.728928277269006e-10, "token_fisher_curvature/p90": 5.587935447692871e-08, "token_fisher_curvature/p95": 0.000583648681640625, "token_fisher_curvature/p99": 282624.0, "token_fisher_curvature/var": 37376155975680.0, "token_fisher_kl_divergence": 2.733596033976937e-07, "token_fisher_kl_divergence/max": 0.0001888275146484375, "token_fisher_kl_divergence/median": 4.338734978715565e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.7083389842945504e-35, "token_fisher_kl_divergence/p75": 5.299125799167834e-25, "token_fisher_kl_divergence/p85": 3.0605682664462024e-22, "token_fisher_kl_divergence/p90": 4.5951537388545793e-20, "token_fisher_kl_divergence/p95": 4.787836793695988e-16, "token_fisher_kl_divergence/p99": 2.3189932107925415e-07, "token_fisher_kl_divergence/var": 2.5244068987961832e-11, "token_full_update_term": 5.465831054607406e-05, "token_full_update_term/max": 0.0196533203125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.4421682357788086e-06, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 3.0531133177191805e-16, "token_full_update_term/p85": 6.439293542825908e-14, "token_full_update_term/p90": 2.364664020149121e-12, "token_full_update_term/p95": 4.427391786521184e-10, "token_full_update_term/p99": 0.00055694580078125, "token_full_update_term/var": 5.421900937108148e-07, "token_hessian_coeff": 29076.0546875, "token_hessian_coeff/max": 228589568.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -29097984.0, "token_hessian_coeff/p25": -7.040798664093018e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.07543182373046875, "token_hessian_coeff/var": 24809773727744.0, "token_hessian_coeff_abs": 315599.1875, "token_hessian_coeff_abs/max": 228589568.0, "token_hessian_coeff_abs/median": 2.1373125491663814e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 1.33514404296875e-05, "token_hessian_coeff_abs/p99": 4988672.0, "token_hessian_coeff_abs/var": 24711014645760.0 }, { "accuracy_reward": 0.4270833432674408, "accuracy_reward/correct": 0.9999999403953552, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 0.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24725879728794098, "adam_stats/lm_head/lr_effective_max": 5.832745955558494e-06, "adam_stats/lm_head/lr_effective_mean": 5.489135621733476e-12, "adam_stats/lm_head/lr_effective_min": -5.874216640222585e-06, "adam_stats/lm_head/lr_effective_std": 1.5991361124179093e-07, "adam_stats/lr_effective_max": 6.010304787196219e-06, "adam_stats/lr_effective_mean": 2.1333077665497413e-11, "adam_stats/lr_effective_min": -5.958133897365769e-06, "adam_stats/m_t_max": 0.0013002589112147689, "adam_stats/m_t_mean": 4.871726719951619e-12, "adam_stats/m_t_min": -0.0013513769954442978, "adam_stats/v_t_max": 2.485270488250535e-05, "adam_stats/v_t_mean": 1.7535532487872318e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.4270833432674408, "advantages/max": 1.0, "advantages/median": 0.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24725879728794098, "all_logprobs": -0.010669424198567867, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.03125, "all_logprobs/p1": -0.251953125, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00010728836059570312, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.015806984156370163, "clip_ratio": 0.0, "completion_length": 507.66668701171875, "completion_length/correct": 432.2682800292969, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 292.0, "completion_length/correct/min": 218.0, "completion_length/correct/p25": 267.0, "completion_length/correct/p75": 493.0, "completion_length/correct/var": 60278.30078125, "completion_length/incorrect": 563.8726806640625, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 408.0, "completion_length/incorrect/min": 246.0, "completion_length/incorrect/p25": 347.0, "completion_length/incorrect/p75": 847.5, "completion_length/incorrect/var": 80802.265625, "completion_length/max": 1024.0, "completion_length/median": 378.0, "completion_length/min": 218.0, "completion_length/p25": 294.75, "completion_length/p75": 691.25, "completion_length/var": 75592.5, "curvature_clip_ratio_token_fisher": 0.0031393631361424923, "curvature_clip_ratio_token_hessian": 0.001066973083652556, "curvature_clip_ratio_total_fisher": 0.0031393631361424923, "curvature_clip_ratio_total_full": 0.0031393631361424923, "curvature_clip_ratio_total_hessian": 0.001066973083652556, "epoch": 0.136, "feature_vector_variance/max_squared_error": 60945.02734375, "feature_vector_variance/metric": 31150.1640625, "generated_tokens/total": 4683829.0, "global_fisher_curvature": 108544.0, "global_fisher_curvature/max": 108544.0, "global_fisher_curvature/median": 108544.0, "global_fisher_curvature/min": 108544.0, "global_fisher_curvature/p25": 108544.0, "global_fisher_curvature/p75": 108544.0, "global_fisher_curvature/p85": 108544.0, "global_fisher_curvature/p90": 108544.0, "global_fisher_curvature/p95": 108544.0, "global_fisher_curvature/p99": 108544.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.031485438346863e-08, "global_fisher_kl_divergence/max": 7.031485438346863e-08, "global_fisher_kl_divergence/median": 7.031485438346863e-08, "global_fisher_kl_divergence/min": 7.031485438346863e-08, "global_fisher_kl_divergence/p25": 7.031485438346863e-08, "global_fisher_kl_divergence/p75": 7.031485438346863e-08, "global_fisher_kl_divergence/p85": 7.031485438346863e-08, "global_fisher_kl_divergence/p90": 7.031485438346863e-08, "global_fisher_kl_divergence/p95": 7.031485438346863e-08, "global_fisher_kl_divergence/p99": 7.031485438346863e-08, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0062255859375, "global_full_update_term/max": 0.0062255859375, "global_full_update_term/median": 0.0062255859375, "global_full_update_term/min": 0.0062255859375, "global_full_update_term/p25": 0.0062255859375, "global_full_update_term/p75": 0.0062255859375, "global_full_update_term/p85": 0.0062255859375, "global_full_update_term/p90": 0.0062255859375, "global_full_update_term/p95": 0.0062255859375, "global_full_update_term/p99": 0.0062255859375, "global_full_update_term/var": NaN, "global_hessian_coeff": 21760.0, "global_hessian_coeff/max": 21760.0, "global_hessian_coeff/median": 21760.0, "global_hessian_coeff/min": 21760.0, "global_hessian_coeff/p25": 21760.0, "global_hessian_coeff/p75": 21760.0, "global_hessian_coeff/p99": 21760.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 21760.0, "global_hessian_coeff_abs/max": 21760.0, "global_hessian_coeff_abs/median": 21760.0, "global_hessian_coeff_abs/min": 21760.0, "global_hessian_coeff_abs/p25": 21760.0, "global_hessian_coeff_abs/p75": 21760.0, "global_hessian_coeff_abs/p99": 21760.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.20875687897205353, "learning_rate": 1.0048094716167097e-06, "loss": -0.4271, "masked_global_fisher_curvature": 11456.0, "masked_global_fisher_curvature/max": 11456.0, "masked_global_fisher_curvature/median": 11456.0, "masked_global_fisher_curvature/min": 11456.0, "masked_global_fisher_curvature/p25": 11456.0, "masked_global_fisher_curvature/p75": 11456.0, "masked_global_fisher_curvature/p85": 11456.0, "masked_global_fisher_curvature/p90": 11456.0, "masked_global_fisher_curvature/p95": 11456.0, "masked_global_fisher_curvature/p99": 11456.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/max": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/median": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/min": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/p25": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/p75": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/p85": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/p90": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/p95": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/p99": 7.450580596923828e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00159454345703125, "masked_global_full_update_term/max": 0.00159454345703125, "masked_global_full_update_term/median": 0.00159454345703125, "masked_global_full_update_term/min": 0.00159454345703125, "masked_global_full_update_term/p25": 0.00159454345703125, "masked_global_full_update_term/p75": 0.00159454345703125, "masked_global_full_update_term/p85": 0.00159454345703125, "masked_global_full_update_term/p90": 0.00159454345703125, "masked_global_full_update_term/p95": 0.00159454345703125, "masked_global_full_update_term/p99": 0.00159454345703125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -24448.0, "masked_global_hessian_coeff/max": -24448.0, "masked_global_hessian_coeff/median": -24448.0, "masked_global_hessian_coeff/min": -24448.0, "masked_global_hessian_coeff/p25": -24448.0, "masked_global_hessian_coeff/p75": -24448.0, "masked_global_hessian_coeff/p99": -24448.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 24448.0, "masked_global_hessian_coeff_abs/max": 24448.0, "masked_global_hessian_coeff_abs/median": 24448.0, "masked_global_hessian_coeff_abs/min": 24448.0, "masked_global_hessian_coeff_abs/p25": 24448.0, "masked_global_hessian_coeff_abs/p75": 24448.0, "masked_global_hessian_coeff_abs/p99": 24448.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 6.5189619064331055, "masked_per_sentence_gradient_norm/max": 44.5, "masked_per_sentence_gradient_norm/median": 0.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 7.8828125, "masked_per_sentence_gradient_norm/var": 127.83647918701172, "masked_per_token_gradient_norm": 0.11171078681945801, "masked_per_token_gradient_norm/max": 73.5, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.4665602066088468e-11, "masked_per_token_gradient_norm/var": 4.173831939697266, "masked_sentence_fisher_curvature": 18752.29296875, "masked_sentence_fisher_curvature/max": 101376.0, "masked_sentence_fisher_curvature/median": 7008.0, "masked_sentence_fisher_curvature/min": 72.0, "masked_sentence_fisher_curvature/p25": 2140.0, "masked_sentence_fisher_curvature/p75": 29696.0, "masked_sentence_fisher_curvature/p85": 51136.0, "masked_sentence_fisher_curvature/p90": 56832.0, "masked_sentence_fisher_curvature/p95": 71680.0, "masked_sentence_fisher_curvature/p99": 93107.2265625, "masked_sentence_fisher_curvature/var": 595444288.0, "masked_sentence_fisher_kl_divergence": 1.2182283626316348e-08, "masked_sentence_fisher_kl_divergence/max": 6.565824151039124e-08, "masked_sentence_fisher_kl_divergence/median": 4.540197551250458e-09, "masked_sentence_fisher_kl_divergence/min": 4.6838977141305804e-11, "masked_sentence_fisher_kl_divergence/p25": 1.3915268937125802e-09, "masked_sentence_fisher_kl_divergence/p75": 1.932494342327118e-08, "masked_sentence_fisher_kl_divergence/p85": 3.317836672067642e-08, "masked_sentence_fisher_kl_divergence/p90": 3.702007234096527e-08, "masked_sentence_fisher_kl_divergence/p95": 4.6566128730773926e-08, "masked_sentence_fisher_kl_divergence/p99": 6.034971988810867e-08, "masked_sentence_fisher_kl_divergence/var": 2.512585330478157e-16, "masked_sentence_full_gradient_variance/max_squared_error": 161.47047424316406, "masked_sentence_full_gradient_variance/metric": 161.47047424316406, "masked_sentence_full_gradient_variance/p75": 161.47047424316406, "masked_sentence_full_gradient_variance/p90": 161.47047424316406, "masked_sentence_full_gradient_variance/p95": 161.47047424316406, "masked_sentence_full_gradient_variance/p99": 161.47047424316406, "masked_sentence_full_update_term": 0.0005584607715718448, "masked_sentence_full_update_term/max": 0.0031585693359375, "masked_sentence_full_update_term/median": 0.0, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0007066726684570312, "masked_sentence_full_update_term/p85": 0.002044677734375, "masked_sentence_full_update_term/p90": 0.002410888671875, "masked_sentence_full_update_term/p95": 0.0024566650390625, "masked_sentence_full_update_term/p99": 0.0026367204263806343, "masked_sentence_full_update_term/var": 8.531342814421805e-07, "masked_sentence_hessian_coeff": -66101.3359375, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -327680.0, "masked_sentence_hessian_coeff/p25": -129152.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 9160463360.0, "masked_sentence_hessian_coeff_abs": 66101.3359375, "masked_sentence_hessian_coeff_abs/max": 327680.0, "masked_sentence_hessian_coeff_abs/median": 0.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 129152.0, "masked_sentence_hessian_coeff_abs/p99": 304332.875, "masked_sentence_hessian_coeff_abs/var": 9160463360.0, "masked_token_fisher_curvature": 16238.01953125, "masked_token_fisher_curvature/max": 15335424.0, "masked_token_fisher_curvature/median": 2.2632720350634905e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 6.1780052000054254e-24, "masked_token_fisher_curvature/p75": 2.753353101070388e-13, "masked_token_fisher_curvature/p85": 1.3005774235352874e-10, "masked_token_fisher_curvature/p90": 2.421438694000244e-08, "masked_token_fisher_curvature/p95": 0.0003490447998046875, "masked_token_fisher_curvature/p99": 24320.0, "masked_token_fisher_curvature/var": 115473891328.0, "masked_token_fisher_kl_divergence": 1.0543123707407176e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 1.4667882456453188e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 4.020190679812223e-36, "masked_token_fisher_kl_divergence/p75": 1.7852237829513586e-25, "masked_token_fisher_kl_divergence/p85": 8.437242248040882e-23, "masked_token_fisher_kl_divergence/p90": 1.5775988642611344e-20, "masked_token_fisher_kl_divergence/p95": 2.2724877535296173e-16, "masked_token_fisher_kl_divergence/p99": 1.5832483768463135e-08, "masked_token_fisher_kl_divergence/var": 4.865671328845543e-14, "masked_token_full_update_term": 7.016804829618195e-06, "masked_token_full_update_term/max": 0.00445556640625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.76837158203125e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 0.0, "masked_token_full_update_term/p85": 1.43982048506075e-16, "masked_token_full_update_term/p90": 9.2148511043888e-15, "masked_token_full_update_term/p95": 1.5063505998114124e-12, "masked_token_full_update_term/p99": 7.470662239938974e-07, "masked_token_full_update_term/var": 1.651191539053798e-08, "masked_token_hessian_coeff": -65090.24609375, "masked_token_hessian_coeff/max": 2880.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -29097984.0, "masked_token_hessian_coeff/p25": -5.275779813018744e-13, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.0021514892578125, "masked_token_hessian_coeff/var": 1196133449728.0, "masked_token_hessian_coeff_abs": 65090.4453125, "masked_token_hessian_coeff_abs/max": 29097984.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 6.00994098931551e-09, "masked_token_hessian_coeff_abs/p99": 3141.75, "masked_token_hessian_coeff_abs/var": 1196133449728.0, "mean_logprobs": -0.0108642578125, "mean_logprobs/var": 4.6253204345703125e-05, "num_completions/total": 8160, "per_sentence_gradient_norm": 26.637371063232422, "per_sentence_gradient_norm/max": 237.0, "per_sentence_gradient_norm/median": 0.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 33.25, "per_sentence_gradient_norm/var": 1923.13623046875, "per_token_feature_norm": 190.47308349609375, "per_token_feature_norm/max": 262.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 104.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 122.90128326416016, "per_token_gradient_norm": 0.5819568037986755, "per_token_gradient_norm/max": 300.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.693933882052079e-11, "per_token_gradient_norm/var": 84.64611053466797, "per_token_policy_error_norm": 0.00638769194483757, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.006041418295353651, "policy_entropy": 0.010723277926445007, "policy_entropy/max": 1.6875, "policy_entropy/median": 4.4929038267582655e-10, "policy_entropy/min": 8.84090638696676e-21, "policy_entropy/p25": 1.0658141036401503e-12, "policy_entropy/p75": 8.614733815193176e-08, "policy_entropy/var": 0.006227347534149885, "policy_loss": -0.4270833432674408, "policy_loss/max": 0.0, "policy_loss/median": 0.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24725879728794098, "policy_sharpness": 9.723280906677246, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8492076396942139, "reward": 0.4270833432674408, "reward/max": 1.0, "reward/median": 0.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24725879728794098, "rewards/accuracy_reward": 0.4270833432674408, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 0.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24725879728794098, "sentence_fisher_curvature": 284881.25, "sentence_fisher_curvature/max": 1744896.0, "sentence_fisher_curvature/median": 11328.0, "sentence_fisher_curvature/min": 276.0, "sentence_fisher_curvature/p25": 2760.0, "sentence_fisher_curvature/p75": 500224.0, "sentence_fisher_curvature/p85": 784384.0, "sentence_fisher_curvature/p90": 1001472.0, "sentence_fisher_curvature/p95": 1146880.0, "sentence_fisher_curvature/p99": 1674854.625, "sentence_fisher_curvature/var": 190057267200.0, "sentence_fisher_kl_divergence": 1.8500058729387092e-07, "sentence_fisher_kl_divergence/max": 1.1324882507324219e-06, "sentence_fisher_kl_divergence/median": 7.363269105553627e-09, "sentence_fisher_kl_divergence/min": 1.7917045624926686e-10, "sentence_fisher_kl_divergence/p25": 1.7917045624926686e-09, "sentence_fisher_kl_divergence/p75": 3.2456591725349426e-07, "sentence_fisher_kl_divergence/p85": 5.103647708892822e-07, "sentence_fisher_kl_divergence/p90": 6.50063157081604e-07, "sentence_fisher_kl_divergence/p95": 7.441267371177673e-07, "sentence_fisher_kl_divergence/p99": 1.0900200777541613e-06, "sentence_fisher_kl_divergence/var": 8.015941697307044e-14, "sentence_full_gradient_variance/max_squared_error": 2555.403076171875, "sentence_full_gradient_variance/metric": 2555.403076171875, "sentence_full_gradient_variance/p75": 2555.403076171875, "sentence_full_gradient_variance/p90": 2555.403076171875, "sentence_full_gradient_variance/p95": 2555.403076171875, "sentence_full_gradient_variance/p99": 2555.403076171875, "sentence_full_update_term": 0.0026101272087544203, "sentence_full_update_term/max": 0.0224609375, "sentence_full_update_term/median": 0.0, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.00341796875, "sentence_full_update_term/p85": 0.00653839111328125, "sentence_full_update_term/p90": 0.009765625, "sentence_full_update_term/p95": 0.0114898681640625, "sentence_full_update_term/p99": 0.016546649858355522, "sentence_full_update_term/var": 1.8947699572890997e-05, "sentence_hessian_coeff": 55792.0, "sentence_hessian_coeff/max": 1245184.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -407552.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 1159577.875, "sentence_hessian_coeff/var": 67789230080.0, "sentence_hessian_coeff_abs": 126208.0, "sentence_hessian_coeff_abs/max": 1245184.0, "sentence_hessian_coeff_abs/median": 0.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 193536.0, "sentence_hessian_coeff_abs/p99": 1159577.875, "sentence_hessian_coeff_abs/var": 54838607872.0, "step": 85, "token_fisher_curvature": 264424.0, "token_fisher_curvature/max": 228589568.0, "token_fisher_curvature/median": 2.3852447794681098e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 6.462348535570529e-24, "token_fisher_curvature/p75": 3.135269821541442e-13, "token_fisher_curvature/p85": 1.6007106751203537e-10, "token_fisher_curvature/p90": 3.702007234096527e-08, "token_fisher_curvature/p95": 0.0008449554443359375, "token_fisher_curvature/p99": 82588.0, "token_fisher_curvature/var": 30241271578624.0, "token_fisher_kl_divergence": 1.7176641620153532e-07, "token_fisher_kl_divergence/max": 0.000148773193359375, "token_fisher_kl_divergence/median": 1.5469069313318278e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 4.208269775943789e-36, "token_fisher_kl_divergence/p75": 2.0336203047873508e-25, "token_fisher_kl_divergence/p85": 1.0381116687540497e-22, "token_fisher_kl_divergence/p90": 2.4034559878340772e-20, "token_fisher_kl_divergence/p95": 5.490399801466594e-16, "token_fisher_kl_divergence/p99": 5.354104359867051e-08, "token_fisher_kl_divergence/var": 1.2764306972501416e-11, "token_full_update_term": 3.684171679196879e-05, "token_full_update_term/max": 0.0172119140625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.76837158203125e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 0.0, "token_full_update_term/p85": 1.8561541192951836e-16, "token_full_update_term/p90": 1.1823875212257917e-14, "token_full_update_term/p95": 2.1742607714259066e-12, "token_full_update_term/p99": 1.617101952433586e-05, "token_full_update_term/var": 3.3975950941567135e-07, "token_hessian_coeff": 42841.85546875, "token_hessian_coeff/max": 225443840.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -29884416.0, "token_hessian_coeff/p25": -5.835332217429823e-13, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.0044144392013549805, "token_hessian_coeff/var": 20614842679296.0, "token_hessian_coeff_abs": 225843.015625, "token_hessian_coeff_abs/max": 225443840.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 6.7229848355054855e-09, "token_hessian_coeff_abs/p99": 57656.0, "token_hessian_coeff_abs/var": 20565670756352.0 }, { "accuracy_reward": 0.5104166865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.25252196192741394, "adam_stats/lm_head/lr_effective_max": 4.79411346532288e-06, "adam_stats/lm_head/lr_effective_mean": 3.490488714036344e-12, "adam_stats/lm_head/lr_effective_min": -5.12640781380469e-06, "adam_stats/lm_head/lr_effective_std": 1.3551805011502438e-07, "adam_stats/lr_effective_max": 5.218083970248699e-06, "adam_stats/lr_effective_mean": 3.1589798051706586e-12, "adam_stats/lr_effective_min": -5.21227229910437e-06, "adam_stats/m_t_max": 0.0017641633749008179, "adam_stats/m_t_mean": -2.5183917451432336e-12, "adam_stats/m_t_min": -0.0015824532601982355, "adam_stats/v_t_max": 2.49651784542948e-05, "adam_stats/v_t_mean": 1.7594659453348815e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5104166865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.25252196192741394, "all_logprobs": -0.01033876370638609, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.71875, "all_logprobs/p1": -0.205078125, "all_logprobs/p10": -1.0490184649825096e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -9.961123578250408e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01510355994105339, "clip_ratio": 0.0, "completion_length": 447.32293701171875, "completion_length/correct": 405.89794921875, "completion_length/correct/max": 796.0, "completion_length/correct/median": 378.0, "completion_length/correct/min": 225.0, "completion_length/correct/p25": 303.0, "completion_length/correct/p75": 486.0, "completion_length/correct/var": 21154.927734375, "completion_length/incorrect": 490.5106201171875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 399.0, "completion_length/incorrect/min": 247.0, "completion_length/incorrect/p25": 369.5, "completion_length/incorrect/p75": 531.0, "completion_length/incorrect/var": 45025.77734375, "completion_length/max": 1024.0, "completion_length/median": 389.0, "completion_length/min": 225.0, "completion_length/p25": 321.0, "completion_length/p75": 524.0, "completion_length/var": 34298.640625, "curvature_clip_ratio_token_fisher": 0.0029341219924390316, "curvature_clip_ratio_token_hessian": 0.000954753952100873, "curvature_clip_ratio_total_fisher": 0.0029341219924390316, "curvature_clip_ratio_total_full": 0.0029341219924390316, "curvature_clip_ratio_total_hessian": 0.000954753952100873, "epoch": 0.1376, "feature_vector_variance/max_squared_error": 55808.58203125, "feature_vector_variance/metric": 30904.55859375, "generated_tokens/total": 4726772.0, "global_fisher_curvature": 142336.0, "global_fisher_curvature/max": 142336.0, "global_fisher_curvature/median": 142336.0, "global_fisher_curvature/min": 142336.0, "global_fisher_curvature/p25": 142336.0, "global_fisher_curvature/p75": 142336.0, "global_fisher_curvature/p85": 142336.0, "global_fisher_curvature/p90": 142336.0, "global_fisher_curvature/p95": 142336.0, "global_fisher_curvature/p99": 142336.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 7.171183824539185e-08, "global_fisher_kl_divergence/max": 7.171183824539185e-08, "global_fisher_kl_divergence/median": 7.171183824539185e-08, "global_fisher_kl_divergence/min": 7.171183824539185e-08, "global_fisher_kl_divergence/p25": 7.171183824539185e-08, "global_fisher_kl_divergence/p75": 7.171183824539185e-08, "global_fisher_kl_divergence/p85": 7.171183824539185e-08, "global_fisher_kl_divergence/p90": 7.171183824539185e-08, "global_fisher_kl_divergence/p95": 7.171183824539185e-08, "global_fisher_kl_divergence/p99": 7.171183824539185e-08, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0084228515625, "global_full_update_term/max": 0.0084228515625, "global_full_update_term/median": 0.0084228515625, "global_full_update_term/min": 0.0084228515625, "global_full_update_term/p25": 0.0084228515625, "global_full_update_term/p75": 0.0084228515625, "global_full_update_term/p85": 0.0084228515625, "global_full_update_term/p90": 0.0084228515625, "global_full_update_term/p95": 0.0084228515625, "global_full_update_term/p99": 0.0084228515625, "global_full_update_term/var": NaN, "global_hessian_coeff": 44288.0, "global_hessian_coeff/max": 44288.0, "global_hessian_coeff/median": 44288.0, "global_hessian_coeff/min": 44288.0, "global_hessian_coeff/p25": 44288.0, "global_hessian_coeff/p75": 44288.0, "global_hessian_coeff/p99": 44288.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 44288.0, "global_hessian_coeff_abs/max": 44288.0, "global_hessian_coeff_abs/median": 44288.0, "global_hessian_coeff_abs/min": 44288.0, "global_hessian_coeff_abs/p25": 44288.0, "global_hessian_coeff_abs/p75": 44288.0, "global_hessian_coeff_abs/p99": 44288.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.2309853732585907, "learning_rate": 8.778930535580476e-07, "loss": -0.5104, "masked_global_fisher_curvature": 13440.0, "masked_global_fisher_curvature/max": 13440.0, "masked_global_fisher_curvature/median": 13440.0, "masked_global_fisher_curvature/min": 13440.0, "masked_global_fisher_curvature/p25": 13440.0, "masked_global_fisher_curvature/p75": 13440.0, "masked_global_fisher_curvature/p85": 13440.0, "masked_global_fisher_curvature/p90": 13440.0, "masked_global_fisher_curvature/p95": 13440.0, "masked_global_fisher_curvature/p99": 13440.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/max": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/median": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/min": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/p25": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/p75": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/p85": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/p90": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/p95": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/p99": 6.781192496418953e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00141143798828125, "masked_global_full_update_term/max": 0.00141143798828125, "masked_global_full_update_term/median": 0.00141143798828125, "masked_global_full_update_term/min": 0.00141143798828125, "masked_global_full_update_term/p25": 0.00141143798828125, "masked_global_full_update_term/p75": 0.00141143798828125, "masked_global_full_update_term/p85": 0.00141143798828125, "masked_global_full_update_term/p90": 0.00141143798828125, "masked_global_full_update_term/p95": 0.00141143798828125, "masked_global_full_update_term/p99": 0.00141143798828125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -36096.0, "masked_global_hessian_coeff/max": -36096.0, "masked_global_hessian_coeff/median": -36096.0, "masked_global_hessian_coeff/min": -36096.0, "masked_global_hessian_coeff/p25": -36096.0, "masked_global_hessian_coeff/p75": -36096.0, "masked_global_hessian_coeff/p99": -36096.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 36096.0, "masked_global_hessian_coeff_abs/max": 36096.0, "masked_global_hessian_coeff_abs/median": 36096.0, "masked_global_hessian_coeff_abs/min": 36096.0, "masked_global_hessian_coeff_abs/p25": 36096.0, "masked_global_hessian_coeff_abs/p75": 36096.0, "masked_global_hessian_coeff_abs/p99": 36096.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 8.613119125366211, "masked_per_sentence_gradient_norm/max": 69.0, "masked_per_sentence_gradient_norm/median": 1.1484375, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 14.8125, "masked_per_sentence_gradient_norm/var": 178.45379638671875, "masked_per_token_gradient_norm": 0.1317969411611557, "masked_per_token_gradient_norm/max": 82.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 5.566107574850321e-10, "masked_per_token_gradient_norm/var": 4.840991497039795, "masked_sentence_fisher_curvature": 19889.302734375, "masked_sentence_fisher_curvature/max": 149504.0, "masked_sentence_fisher_curvature/median": 8512.0, "masked_sentence_fisher_curvature/min": 68.5, "masked_sentence_fisher_curvature/p25": 2268.0, "masked_sentence_fisher_curvature/p75": 20352.0, "masked_sentence_fisher_curvature/p85": 24320.0, "masked_sentence_fisher_curvature/p90": 43520.0, "masked_sentence_fisher_curvature/p95": 119296.0, "masked_sentence_fisher_curvature/p99": 134912.046875, "masked_sentence_fisher_curvature/var": 1087768576.0, "masked_sentence_fisher_kl_divergence": 1.0036226960608019e-08, "masked_sentence_fisher_kl_divergence/max": 7.543712854385376e-08, "masked_sentence_fisher_kl_divergence/median": 4.307366907596588e-09, "masked_sentence_fisher_kl_divergence/min": 3.456079866737127e-11, "masked_sentence_fisher_kl_divergence/p25": 1.1459633242338896e-09, "masked_sentence_fisher_kl_divergence/p75": 1.0302755981683731e-08, "masked_sentence_fisher_kl_divergence/p85": 1.2281816452741623e-08, "masked_sentence_fisher_kl_divergence/p90": 2.1944288164377213e-08, "masked_sentence_fisher_kl_divergence/p95": 6.007030606269836e-08, "masked_sentence_fisher_kl_divergence/p99": 6.791672291228679e-08, "masked_sentence_fisher_kl_divergence/var": 2.764195938064455e-16, "masked_sentence_full_gradient_variance/max_squared_error": 237.40573120117188, "masked_sentence_full_gradient_variance/metric": 237.40573120117188, "masked_sentence_full_gradient_variance/p75": 237.40573120117188, "masked_sentence_full_gradient_variance/p90": 237.40573120117188, "masked_sentence_full_gradient_variance/p95": 237.40573120117188, "masked_sentence_full_gradient_variance/p99": 237.40573120117188, "masked_sentence_full_update_term": 0.0006204943056218326, "masked_sentence_full_update_term/max": 0.004302978515625, "masked_sentence_full_update_term/median": 5.221366882324219e-05, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0009002685546875, "masked_sentence_full_update_term/p85": 0.001140594482421875, "masked_sentence_full_update_term/p90": 0.00177001953125, "masked_sentence_full_update_term/p95": 0.0031890869140625, "masked_sentence_full_update_term/p99": 0.0037521379999816418, "masked_sentence_full_update_term/var": 9.84682742455334e-07, "masked_sentence_hessian_coeff": -78296.0, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -23936.0, "masked_sentence_hessian_coeff/min": -509952.0, "masked_sentence_hessian_coeff/p25": -118272.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 12610928640.0, "masked_sentence_hessian_coeff_abs": 78296.0, "masked_sentence_hessian_coeff_abs/max": 509952.0, "masked_sentence_hessian_coeff_abs/median": 23936.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 118272.0, "masked_sentence_hessian_coeff_abs/p99": 455475.375, "masked_sentence_hessian_coeff_abs/var": 12610928640.0, "masked_token_fisher_curvature": 18245.015625, "masked_token_fisher_curvature/max": 16777216.0, "masked_token_fisher_curvature/median": 5.827586677109586e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 5.169878828456423e-24, "masked_token_fisher_curvature/p75": 6.714628852932947e-13, "masked_token_fisher_curvature/p85": 2.4374458007514477e-10, "masked_token_fisher_curvature/p90": 3.804416337516159e-08, "masked_token_fisher_curvature/p95": 0.0004936158657073975, "masked_token_fisher_curvature/p99": 35072.0, "masked_token_fisher_curvature/var": 138843668480.0, "masked_token_fisher_kl_divergence": 9.211069240677716e-09, "masked_token_fisher_kl_divergence/max": 8.463859558105469e-06, "masked_token_fisher_kl_divergence/median": 2.945902442934716e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.6095974588254783e-36, "masked_token_fisher_kl_divergence/p75": 3.3927329811745276e-25, "masked_token_fisher_kl_divergence/p85": 1.2324991127040112e-22, "masked_token_fisher_kl_divergence/p90": 1.920630664286875e-20, "masked_token_fisher_kl_divergence/p95": 2.487566359496429e-16, "masked_token_fisher_kl_divergence/p99": 1.7695128917694092e-08, "masked_token_fisher_kl_divergence/var": 3.538982597099151e-14, "masked_token_full_update_term": 7.285566425707657e-06, "masked_token_full_update_term/max": 0.004119873046875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -9.685754776000977e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.8719428134320037e-19, "masked_token_full_update_term/p85": 9.645062526431047e-16, "masked_token_full_update_term/p90": 3.3084646133829665e-14, "masked_token_full_update_term/p95": 3.240074875066057e-12, "masked_token_full_update_term/p99": 3.1495001167058945e-06, "masked_token_full_update_term/var": 1.494080770214623e-08, "masked_token_hessian_coeff": -77962.3671875, "masked_token_hessian_coeff/max": 326.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -28835840.0, "masked_token_hessian_coeff/p25": -2.0372681319713593e-09, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.00416254997253418, "masked_token_hessian_coeff/var": 1396781219840.0, "masked_token_hessian_coeff_abs": 77962.4375, "masked_token_hessian_coeff_abs/max": 28835840.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 1.909211277961731e-07, "masked_token_hessian_coeff_abs/p99": 18135.0, "masked_token_hessian_coeff_abs/var": 1396781219840.0, "mean_logprobs": -0.01043701171875, "mean_logprobs/var": 4.673004150390625e-05, "num_completions/total": 8256, "per_sentence_gradient_norm": 34.684898376464844, "per_sentence_gradient_norm/max": 242.0, "per_sentence_gradient_norm/median": 5.875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 52.4375, "per_sentence_gradient_norm/var": 2961.853515625, "per_token_feature_norm": 189.6347198486328, "per_token_feature_norm/max": 249.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 108.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 143.6183624267578, "per_token_gradient_norm": 0.59444260597229, "per_token_gradient_norm/max": 264.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 6.111804395914078e-10, "per_token_gradient_norm/var": 85.86548614501953, "per_token_policy_error_norm": 0.006116574630141258, "per_token_policy_error_norm/max": 1.96875, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005996165331453085, "policy_entropy": 0.010667245835065842, "policy_entropy/max": 1.4921875, "policy_entropy/median": 7.566995918750763e-10, "policy_entropy/min": 7.676236084492097e-21, "policy_entropy/p25": 9.734435479913373e-13, "policy_entropy/p75": 1.4621764421463013e-07, "policy_entropy/var": 0.005696767941117287, "policy_loss": -0.5104166865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.25252196192741394, "policy_sharpness": 9.723489761352539, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8222606182098389, "reward": 0.5104166865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.25252196192741394, "rewards/accuracy_reward": 0.5104166865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.25252196192741394, "sentence_fisher_curvature": 298308.8125, "sentence_fisher_curvature/max": 1933312.0, "sentence_fisher_curvature/median": 14080.0, "sentence_fisher_curvature/min": 68.5, "sentence_fisher_curvature/p25": 2764.0, "sentence_fisher_curvature/p75": 491520.0, "sentence_fisher_curvature/p85": 757760.0, "sentence_fisher_curvature/p90": 1038336.0, "sentence_fisher_curvature/p95": 1200128.0, "sentence_fisher_curvature/p99": 1287374.875, "sentence_fisher_curvature/var": 183251845120.0, "sentence_fisher_kl_divergence": 1.5055543656217196e-07, "sentence_fisher_kl_divergence/max": 9.760260581970215e-07, "sentence_fisher_kl_divergence/median": 7.101334631443024e-09, "sentence_fisher_kl_divergence/min": 3.456079866737127e-11, "sentence_fisher_kl_divergence/p25": 1.395164872519672e-09, "sentence_fisher_kl_divergence/p75": 2.477318048477173e-07, "sentence_fisher_kl_divergence/p85": 3.818422555923462e-07, "sentence_fisher_kl_divergence/p90": 5.234032869338989e-07, "sentence_fisher_kl_divergence/p95": 6.05359673500061e-07, "sentence_fisher_kl_divergence/p99": 6.504367320303572e-07, "sentence_fisher_kl_divergence/var": 4.666268253413798e-14, "sentence_full_gradient_variance/max_squared_error": 4014.04296875, "sentence_full_gradient_variance/metric": 4014.04296875, "sentence_full_gradient_variance/p75": 4014.04296875, "sentence_full_gradient_variance/p90": 4014.04296875, "sentence_full_gradient_variance/p95": 4014.04296875, "sentence_full_gradient_variance/p99": 4014.04296875, "sentence_full_update_term": 0.002732237335294485, "sentence_full_update_term/max": 0.0162353515625, "sentence_full_update_term/median": 0.000347137451171875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.00537109375, "sentence_full_update_term/p85": 0.00750732421875, "sentence_full_update_term/p90": 0.007598876953125, "sentence_full_update_term/p95": 0.0100555419921875, "sentence_full_update_term/p99": 0.01600341871380806, "sentence_full_update_term/var": 1.646514101594221e-05, "sentence_hessian_coeff": 51114.66796875, "sentence_hessian_coeff/max": 770048.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -548864.0, "sentence_hessian_coeff/p25": -23936.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 641638.8125, "sentence_hessian_coeff/var": 48750010368.0, "sentence_hessian_coeff_abs": 122325.3359375, "sentence_hessian_coeff_abs/max": 770048.0, "sentence_hessian_coeff_abs/median": 18176.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 166912.0, "sentence_hessian_coeff_abs/p99": 641638.8125, "sentence_hessian_coeff_abs/var": 36269223936.0, "step": 86, "token_fisher_curvature": 268764.46875, "token_fisher_curvature/max": 224395264.0, "token_fisher_curvature/median": 6.342582709040201e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.247427010883269e-24, "token_fisher_curvature/p75": 7.602807272633072e-13, "token_fisher_curvature/p85": 2.9831426218152046e-10, "token_fisher_curvature/p90": 5.448237061500549e-08, "token_fisher_curvature/p95": 0.00115203857421875, "token_fisher_curvature/p99": 81920.0, "token_fisher_curvature/var": 30383341043712.0, "token_fisher_kl_divergence": 1.3571916213095392e-07, "token_fisher_kl_divergence/max": 0.00011348724365234375, "token_fisher_kl_divergence/median": 3.2047474274603605e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.644862289350147e-36, "token_fisher_kl_divergence/p75": 3.8450973786644646e-25, "token_fisher_kl_divergence/p85": 1.5054687148465104e-22, "token_fisher_kl_divergence/p90": 2.752857078576476e-20, "token_fisher_kl_divergence/p95": 5.828670879282072e-16, "token_fisher_kl_divergence/p99": 4.1443854570388794e-08, "token_fisher_kl_divergence/var": 7.750960463737133e-12, "token_full_update_term": 3.3230036933673546e-05, "token_full_update_term/max": 0.01507568359375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -9.685754776000977e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 2.4818065354551e-19, "token_full_update_term/p85": 1.1171619185290638e-15, "token_full_update_term/p90": 4.3032417906818665e-14, "token_full_update_term/p95": 5.132783087447024e-12, "token_full_update_term/p99": 5.936622619628906e-05, "token_full_update_term/var": 2.6826927523870836e-07, "token_hessian_coeff": 41062.30078125, "token_hessian_coeff/max": 217055232.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -28835840.0, "token_hessian_coeff/p25": -2.0954757928848267e-09, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.007499575614929199, "token_hessian_coeff/var": 20003447373824.0, "token_hessian_coeff_abs": 232845.046875, "token_hessian_coeff_abs/max": 217055232.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.0954757928848267e-07, "token_hessian_coeff_abs/p99": 494760.0, "token_hessian_coeff_abs/var": 19950917910528.0 }, { "accuracy_reward": 0.6875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.21710526943206787, "adam_stats/lm_head/lr_effective_max": 4.084263764525531e-06, "adam_stats/lm_head/lr_effective_mean": -3.0027894233619623e-12, "adam_stats/lm_head/lr_effective_min": -4.334903223934816e-06, "adam_stats/lm_head/lr_effective_std": 1.2614485456197144e-07, "adam_stats/lr_effective_max": 4.575028015096905e-06, "adam_stats/lr_effective_mean": -8.581447184796698e-12, "adam_stats/lr_effective_min": -4.6132759052852634e-06, "adam_stats/m_t_max": 0.0013729825150221586, "adam_stats/m_t_mean": -1.6582384337188172e-12, "adam_stats/m_t_min": -0.0010382166365161538, "adam_stats/v_t_max": 2.510763988539111e-05, "adam_stats/v_t_mean": 1.7707388290030823e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.21710526943206787, "all_logprobs": -0.008474953472614288, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.25, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -9.129045065492392e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.011787883937358856, "clip_ratio": 0.0, "completion_length": 735.3333740234375, "completion_length/correct": 704.3939819335938, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 684.0, "completion_length/correct/min": 287.0, "completion_length/correct/p25": 588.25, "completion_length/correct/p75": 920.25, "completion_length/correct/var": 47481.35546875, "completion_length/incorrect": 803.4000244140625, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 992.0, "completion_length/incorrect/min": 331.0, "completion_length/incorrect/p25": 618.25, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 75575.765625, "completion_length/max": 1024.0, "completion_length/median": 726.0, "completion_length/min": 287.0, "completion_length/p25": 588.75, "completion_length/p75": 952.0, "completion_length/var": 57685.84375, "curvature_clip_ratio_token_fisher": 0.0029748412780463696, "curvature_clip_ratio_token_hessian": 0.0007932910230010748, "curvature_clip_ratio_total_fisher": 0.0029748412780463696, "curvature_clip_ratio_total_full": 0.0029748412780463696, "curvature_clip_ratio_total_hessian": 0.0007932910230010748, "epoch": 0.1392, "feature_vector_variance/max_squared_error": 66663.640625, "feature_vector_variance/metric": 30735.34765625, "generated_tokens/total": 4797364.0, "global_fisher_curvature": 131072.0, "global_fisher_curvature/max": 131072.0, "global_fisher_curvature/median": 131072.0, "global_fisher_curvature/min": 131072.0, "global_fisher_curvature/p25": 131072.0, "global_fisher_curvature/p75": 131072.0, "global_fisher_curvature/p85": 131072.0, "global_fisher_curvature/p90": 131072.0, "global_fisher_curvature/p95": 131072.0, "global_fisher_curvature/p99": 131072.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.052424967288971e-08, "global_fisher_kl_divergence/max": 5.052424967288971e-08, "global_fisher_kl_divergence/median": 5.052424967288971e-08, "global_fisher_kl_divergence/min": 5.052424967288971e-08, "global_fisher_kl_divergence/p25": 5.052424967288971e-08, "global_fisher_kl_divergence/p75": 5.052424967288971e-08, "global_fisher_kl_divergence/p85": 5.052424967288971e-08, "global_fisher_kl_divergence/p90": 5.052424967288971e-08, "global_fisher_kl_divergence/p95": 5.052424967288971e-08, "global_fisher_kl_divergence/p99": 5.052424967288971e-08, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.00885009765625, "global_full_update_term/max": 0.00885009765625, "global_full_update_term/median": 0.00885009765625, "global_full_update_term/min": 0.00885009765625, "global_full_update_term/p25": 0.00885009765625, "global_full_update_term/p75": 0.00885009765625, "global_full_update_term/p85": 0.00885009765625, "global_full_update_term/p90": 0.00885009765625, "global_full_update_term/p95": 0.00885009765625, "global_full_update_term/p99": 0.00885009765625, "global_full_update_term/var": NaN, "global_hessian_coeff": 32384.0, "global_hessian_coeff/max": 32384.0, "global_hessian_coeff/median": 32384.0, "global_hessian_coeff/min": 32384.0, "global_hessian_coeff/p25": 32384.0, "global_hessian_coeff/p75": 32384.0, "global_hessian_coeff/p99": 32384.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 32384.0, "global_hessian_coeff_abs/max": 32384.0, "global_hessian_coeff_abs/median": 32384.0, "global_hessian_coeff_abs/min": 32384.0, "global_hessian_coeff_abs/p25": 32384.0, "global_hessian_coeff_abs/p75": 32384.0, "global_hessian_coeff_abs/p99": 32384.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.2854626774787903, "learning_rate": 7.59044652756249e-07, "loss": -0.6875, "masked_global_fisher_curvature": 21888.0, "masked_global_fisher_curvature/max": 21888.0, "masked_global_fisher_curvature/median": 21888.0, "masked_global_fisher_curvature/min": 21888.0, "masked_global_fisher_curvature/p25": 21888.0, "masked_global_fisher_curvature/p75": 21888.0, "masked_global_fisher_curvature/p85": 21888.0, "masked_global_fisher_curvature/p90": 21888.0, "masked_global_fisher_curvature/p95": 21888.0, "masked_global_fisher_curvature/p99": 21888.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/max": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/median": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/min": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/p25": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/p75": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/p85": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/p90": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/p95": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/p99": 8.440110832452774e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0034332275390625, "masked_global_full_update_term/max": 0.0034332275390625, "masked_global_full_update_term/median": 0.0034332275390625, "masked_global_full_update_term/min": 0.0034332275390625, "masked_global_full_update_term/p25": 0.0034332275390625, "masked_global_full_update_term/p75": 0.0034332275390625, "masked_global_full_update_term/p85": 0.0034332275390625, "masked_global_full_update_term/p90": 0.0034332275390625, "masked_global_full_update_term/p95": 0.0034332275390625, "masked_global_full_update_term/p99": 0.0034332275390625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -48384.0, "masked_global_hessian_coeff/max": -48384.0, "masked_global_hessian_coeff/median": -48384.0, "masked_global_hessian_coeff/min": -48384.0, "masked_global_hessian_coeff/p25": -48384.0, "masked_global_hessian_coeff/p75": -48384.0, "masked_global_hessian_coeff/p99": -48384.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 48384.0, "masked_global_hessian_coeff_abs/max": 48384.0, "masked_global_hessian_coeff_abs/median": 48384.0, "masked_global_hessian_coeff_abs/min": 48384.0, "masked_global_hessian_coeff_abs/p25": 48384.0, "masked_global_hessian_coeff_abs/p75": 48384.0, "masked_global_hessian_coeff_abs/p99": 48384.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 14.499918937683105, "masked_per_sentence_gradient_norm/max": 80.0, "masked_per_sentence_gradient_norm/median": 8.8125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 22.0, "masked_per_sentence_gradient_norm/var": 255.86013793945312, "masked_per_token_gradient_norm": 0.2977258265018463, "masked_per_token_gradient_norm/max": 92.0, "masked_per_token_gradient_norm/median": 3.154809746774845e-12, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.9907020032405853e-08, "masked_per_token_gradient_norm/var": 13.8897705078125, "masked_sentence_fisher_curvature": 44916.375, "masked_sentence_fisher_curvature/max": 212992.0, "masked_sentence_fisher_curvature/median": 25600.0, "masked_sentence_fisher_curvature/min": 328.0, "masked_sentence_fisher_curvature/p25": 3352.0, "masked_sentence_fisher_curvature/p75": 57472.0, "masked_sentence_fisher_curvature/p85": 91136.0, "masked_sentence_fisher_curvature/p90": 122880.0, "masked_sentence_fisher_curvature/p95": 176384.0, "masked_sentence_fisher_curvature/p99": 208128.015625, "masked_sentence_fisher_curvature/var": 3009270272.0, "masked_sentence_fisher_kl_divergence": 1.7303573685012452e-08, "masked_sentence_fisher_kl_divergence/max": 8.195638656616211e-08, "masked_sentence_fisher_kl_divergence/median": 9.837094694375992e-09, "masked_sentence_fisher_kl_divergence/min": 1.2641976354643703e-10, "masked_sentence_fisher_kl_divergence/p25": 1.293301465921104e-09, "masked_sentence_fisher_kl_divergence/p75": 2.214801497757435e-08, "masked_sentence_fisher_kl_divergence/p85": 3.5099219530820847e-08, "masked_sentence_fisher_kl_divergence/p90": 4.738103598356247e-08, "masked_sentence_fisher_kl_divergence/p95": 6.810296326875687e-08, "masked_sentence_fisher_kl_divergence/p99": 8.018687935873459e-08, "masked_sentence_fisher_kl_divergence/var": 4.465205120460377e-16, "masked_sentence_full_gradient_variance/max_squared_error": 450.19256591796875, "masked_sentence_full_gradient_variance/metric": 450.19256591796875, "masked_sentence_full_gradient_variance/p75": 450.19256591796875, "masked_sentence_full_gradient_variance/p90": 450.19256591796875, "masked_sentence_full_gradient_variance/p95": 450.19256591796875, "masked_sentence_full_gradient_variance/p99": 450.19256591796875, "masked_sentence_full_update_term": 0.0013033052673563361, "masked_sentence_full_update_term/max": 0.00689697265625, "masked_sentence_full_update_term/median": 0.000865936279296875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0017242431640625, "masked_sentence_full_update_term/p85": 0.002471923828125, "masked_sentence_full_update_term/p90": 0.00299835205078125, "masked_sentence_full_update_term/p95": 0.00513458251953125, "masked_sentence_full_update_term/p99": 0.005824283231049776, "masked_sentence_full_update_term/var": 2.366790795349516e-06, "masked_sentence_hessian_coeff": -142066.671875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -139264.0, "masked_sentence_hessian_coeff/min": -585728.0, "masked_sentence_hessian_coeff/p25": -191488.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 19419041792.0, "masked_sentence_hessian_coeff_abs": 142066.671875, "masked_sentence_hessian_coeff_abs/max": 585728.0, "masked_sentence_hessian_coeff_abs/median": 118272.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 191488.0, "masked_sentence_hessian_coeff_abs/p99": 519577.8125, "masked_sentence_hessian_coeff_abs/var": 19419041792.0, "masked_token_fisher_curvature": 44703.90625, "masked_token_fisher_curvature/max": 25296896.0, "masked_token_fisher_curvature/median": 2.4665599424045226e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 4.911384887033602e-24, "masked_token_fisher_curvature/p75": 2.0339285811132868e-13, "masked_token_fisher_curvature/p85": 1.2823875294998288e-10, "masked_token_fisher_curvature/p90": 2.1886080503463745e-08, "masked_token_fisher_curvature/p95": 0.000736236572265625, "masked_token_fisher_curvature/p99": 114176.0, "masked_token_fisher_curvature/var": 599818960896.0, "masked_token_fisher_kl_divergence": 1.7228515503120434e-08, "masked_token_fisher_kl_divergence/max": 9.775161743164062e-06, "masked_token_fisher_kl_divergence/median": 9.490982765940298e-31, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.892545904823883e-36, "masked_token_fisher_kl_divergence/p75": 7.835597599379266e-26, "masked_token_fisher_kl_divergence/p85": 4.9424041600043404e-23, "masked_token_fisher_kl_divergence/p90": 8.41738991333961e-21, "masked_token_fisher_kl_divergence/p95": 2.8449465006019636e-16, "masked_token_fisher_kl_divergence/p99": 4.400499165058136e-08, "masked_token_fisher_kl_divergence/var": 8.910348858488099e-14, "masked_token_full_update_term": 1.4468108929577284e-05, "masked_token_full_update_term/max": 0.004425048828125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -9.98377799987793e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 6.314393452555578e-16, "masked_token_full_update_term/p85": 3.885780586188048e-14, "masked_token_full_update_term/p90": 7.851497230149107e-13, "masked_token_full_update_term/p95": 1.8003021295953658e-10, "masked_token_full_update_term/p99": 0.00019550323486328125, "masked_token_full_update_term/var": 3.32942384773105e-08, "masked_token_hessian_coeff": -159502.90625, "masked_token_hessian_coeff/max": 2864.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -30408704.0, "masked_token_hessian_coeff/p25": -5.243346095085144e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.02065277099609375, "masked_token_hessian_coeff/var": 3053345046528.0, "masked_token_hessian_coeff_abs": 159503.234375, "masked_token_hessian_coeff_abs/max": 30408704.0, "masked_token_hessian_coeff_abs/median": 9.058567229658365e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 6.586313247680664e-06, "masked_token_hessian_coeff_abs/p99": 3026944.0, "masked_token_hessian_coeff_abs/var": 3053344784384.0, "mean_logprobs": -0.008544921875, "mean_logprobs/var": 2.5391578674316406e-05, "num_completions/total": 8352, "per_sentence_gradient_norm": 38.85807418823242, "per_sentence_gradient_norm/max": 208.0, "per_sentence_gradient_norm/median": 24.125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 61.875, "per_sentence_gradient_norm/var": 2008.7174072265625, "per_token_feature_norm": 190.22286987304688, "per_token_feature_norm/max": 270.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 108.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 122.96212005615234, "per_token_gradient_norm": 0.7938157320022583, "per_token_gradient_norm/max": 324.0, "per_token_gradient_norm/median": 3.325340003357269e-12, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.10711732506752e-08, "per_token_gradient_norm/var": 104.52320861816406, "per_token_policy_error_norm": 0.005000483710318804, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004804743453860283, "policy_entropy": 0.009193463250994682, "policy_entropy/max": 1.5390625, "policy_entropy/median": 5.602487362921238e-10, "policy_entropy/min": 3.891057601449442e-21, "policy_entropy/p25": 1.0231815394945443e-12, "policy_entropy/p75": 8.475035429000854e-08, "policy_entropy/var": 0.004720352590084076, "policy_loss": -0.6875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.21710526943206787, "policy_sharpness": 9.737293243408203, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6932743787765503, "reward": 0.6875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.21710526943206787, "rewards/accuracy_reward": 0.6875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.21710526943206787, "sentence_fisher_curvature": 331954.0625, "sentence_fisher_curvature/max": 1359872.0, "sentence_fisher_curvature/median": 204800.0, "sentence_fisher_curvature/min": 328.0, "sentence_fisher_curvature/p25": 3352.0, "sentence_fisher_curvature/p75": 551936.0, "sentence_fisher_curvature/p85": 716800.0, "sentence_fisher_curvature/p90": 851968.0, "sentence_fisher_curvature/p95": 1067008.0, "sentence_fisher_curvature/p99": 1289830.625, "sentence_fisher_curvature/var": 132198424576.0, "sentence_fisher_kl_divergence": 1.2792875736522547e-07, "sentence_fisher_kl_divergence/max": 5.252659320831299e-07, "sentence_fisher_kl_divergence/median": 7.869675755500793e-08, "sentence_fisher_kl_divergence/min": 1.2641976354643703e-10, "sentence_fisher_kl_divergence/p25": 1.293301465921104e-09, "sentence_fisher_kl_divergence/p75": 2.123415470123291e-07, "sentence_fisher_kl_divergence/p85": 2.7567148208618164e-07, "sentence_fisher_kl_divergence/p90": 3.287568688392639e-07, "sentence_fisher_kl_divergence/p95": 4.116445779800415e-07, "sentence_fisher_kl_divergence/p99": 4.969538167642895e-07, "sentence_fisher_kl_divergence/var": 1.96440594639382e-14, "sentence_full_gradient_variance/max_squared_error": 3431.242919921875, "sentence_full_gradient_variance/metric": 3431.242919921875, "sentence_full_gradient_variance/p75": 3431.242919921875, "sentence_full_gradient_variance/p90": 3431.242919921875, "sentence_full_gradient_variance/p95": 3431.242919921875, "sentence_full_gradient_variance/p99": 3431.242919921875, "sentence_full_update_term": 0.003458182094618678, "sentence_full_update_term/max": 0.0164794921875, "sentence_full_update_term/median": 0.0021514892578125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.00572967529296875, "sentence_full_update_term/p85": 0.00658416748046875, "sentence_full_update_term/p90": 0.00982666015625, "sentence_full_update_term/p95": 0.012359619140625, "sentence_full_update_term/p99": 0.01589966006577015, "sentence_full_update_term/var": 1.603149030415807e-05, "sentence_hessian_coeff": 22071.0, "sentence_hessian_coeff/max": 505856.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -256000.0, "sentence_hessian_coeff/p25": -78208.0, "sentence_hessian_coeff/p75": 118784.0, "sentence_hessian_coeff/p99": 449433.78125, "sentence_hessian_coeff/var": 25733255168.0, "sentence_hessian_coeff_abs": 113265.671875, "sentence_hessian_coeff_abs/max": 505856.0, "sentence_hessian_coeff_abs/median": 87040.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 184320.0, "sentence_hessian_coeff_abs/p99": 449433.78125, "sentence_hessian_coeff_abs/var": 13261357056.0, "step": 87, "token_fisher_curvature": 330319.84375, "token_fisher_curvature/max": 228589568.0, "token_fisher_curvature/median": 2.6156377411212794e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.2991257991678335e-24, "token_fisher_curvature/p75": 2.3092638912203256e-13, "token_fisher_curvature/p85": 1.6039081174312741e-10, "token_fisher_curvature/p90": 3.282912075519562e-08, "token_fisher_curvature/p95": 0.0020192861557006836, "token_fisher_curvature/p99": 473088.0, "token_fisher_curvature/var": 37225907617792.0, "token_fisher_kl_divergence": 1.2728018816687836e-07, "token_fisher_kl_divergence/max": 8.821487426757812e-05, "token_fisher_kl_divergence/median": 1.0107280348144214e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.0453601704307803e-36, "token_fisher_kl_divergence/p75": 8.885729236409477e-26, "token_fisher_kl_divergence/p85": 6.17703584772509e-23, "token_fisher_kl_divergence/p90": 1.2652554649611111e-20, "token_fisher_kl_divergence/p95": 7.779421638126616e-16, "token_fisher_kl_divergence/p99": 1.825392246246338e-07, "token_fisher_kl_divergence/var": 5.527008538341871e-12, "token_full_update_term": 3.9007474697427824e-05, "token_full_update_term/max": 0.01336669921875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -9.98377799987793e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 6.973588373426765e-16, "token_full_update_term/p85": 4.440892098500626e-14, "token_full_update_term/p90": 9.450218385609332e-13, "token_full_update_term/p95": 2.892193151637912e-10, "token_full_update_term/p99": 0.000507354736328125, "token_full_update_term/var": 2.5328654373879544e-07, "token_hessian_coeff": -7043.78857421875, "token_hessian_coeff/max": 225443840.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -30408704.0, "token_hessian_coeff/p25": -5.252659320831299e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.03932952880859375, "token_hessian_coeff/var": 25574043025408.0, "token_hessian_coeff_abs": 337204.75, "token_hessian_coeff_abs/max": 225443840.0, "token_hessian_coeff_abs/median": 9.531504474580288e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 7.010996341705322e-06, "token_hessian_coeff_abs/p99": 6818816.0, "token_hessian_coeff_abs/var": 25460387872768.0 }, { "accuracy_reward": 0.78125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.17269736528396606, "adam_stats/lm_head/lr_effective_max": 3.5231273614044767e-06, "adam_stats/lm_head/lr_effective_mean": -7.962511726355981e-12, "adam_stats/lm_head/lr_effective_min": -3.7686177165596746e-06, "adam_stats/lm_head/lr_effective_std": 1.173792725239764e-07, "adam_stats/lr_effective_max": 4.053674729220802e-06, "adam_stats/lr_effective_mean": -1.5045084969522726e-11, "adam_stats/lr_effective_min": -3.970100351580186e-06, "adam_stats/m_t_max": 0.0026639073621481657, "adam_stats/m_t_mean": -2.3126901435577274e-11, "adam_stats/m_t_min": -0.0022283405996859074, "adam_stats/v_t_max": 2.5276185624534264e-05, "adam_stats/v_t_mean": 1.826171809257704e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.78125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.17269736528396606, "all_logprobs": -0.008847528137266636, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -4.75, "all_logprobs/p1": -0.1611328125, "all_logprobs/p10": -9.5367431640625e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -7.929804269224405e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012466087006032467, "clip_ratio": 0.0, "completion_length": 441.82293701171875, "completion_length/correct": 410.0133361816406, "completion_length/correct/max": 690.0, "completion_length/correct/median": 359.0, "completion_length/correct/min": 265.0, "completion_length/correct/p25": 321.0, "completion_length/correct/p75": 526.5, "completion_length/correct/var": 14841.634765625, "completion_length/incorrect": 555.4285888671875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 520.0, "completion_length/incorrect/min": 312.0, "completion_length/incorrect/p25": 447.0, "completion_length/incorrect/p75": 602.0, "completion_length/incorrect/var": 49850.7578125, "completion_length/max": 1024.0, "completion_length/median": 372.0, "completion_length/min": 265.0, "completion_length/p25": 321.0, "completion_length/p75": 541.0, "completion_length/var": 25707.5390625, "curvature_clip_ratio_token_fisher": 0.0030413768254220486, "curvature_clip_ratio_token_hessian": 0.0004715312970802188, "curvature_clip_ratio_total_fisher": 0.0030413768254220486, "curvature_clip_ratio_total_full": 0.0030413768254220486, "curvature_clip_ratio_total_hessian": 0.0004715312970802188, "epoch": 0.1408, "feature_vector_variance/max_squared_error": 65063.73828125, "feature_vector_variance/metric": 31281.916015625, "generated_tokens/total": 4839779.0, "global_fisher_curvature": 153600.0, "global_fisher_curvature/max": 153600.0, "global_fisher_curvature/median": 153600.0, "global_fisher_curvature/min": 153600.0, "global_fisher_curvature/p25": 153600.0, "global_fisher_curvature/p75": 153600.0, "global_fisher_curvature/p85": 153600.0, "global_fisher_curvature/p90": 153600.0, "global_fisher_curvature/p95": 153600.0, "global_fisher_curvature/p99": 153600.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 4.423782229423523e-08, "global_fisher_kl_divergence/max": 4.423782229423523e-08, "global_fisher_kl_divergence/median": 4.423782229423523e-08, "global_fisher_kl_divergence/min": 4.423782229423523e-08, "global_fisher_kl_divergence/p25": 4.423782229423523e-08, "global_fisher_kl_divergence/p75": 4.423782229423523e-08, "global_fisher_kl_divergence/p85": 4.423782229423523e-08, "global_fisher_kl_divergence/p90": 4.423782229423523e-08, "global_fisher_kl_divergence/p95": 4.423782229423523e-08, "global_fisher_kl_divergence/p99": 4.423782229423523e-08, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.004852294921875, "global_full_update_term/max": 0.004852294921875, "global_full_update_term/median": 0.004852294921875, "global_full_update_term/min": 0.004852294921875, "global_full_update_term/p25": 0.004852294921875, "global_full_update_term/p75": 0.004852294921875, "global_full_update_term/p85": 0.004852294921875, "global_full_update_term/p90": 0.004852294921875, "global_full_update_term/p95": 0.004852294921875, "global_full_update_term/p99": 0.004852294921875, "global_full_update_term/var": NaN, "global_hessian_coeff": 5536.0, "global_hessian_coeff/max": 5536.0, "global_hessian_coeff/median": 5536.0, "global_hessian_coeff/min": 5536.0, "global_hessian_coeff/p25": 5536.0, "global_hessian_coeff/p75": 5536.0, "global_hessian_coeff/p99": 5536.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 5536.0, "global_hessian_coeff_abs/max": 5536.0, "global_hessian_coeff_abs/median": 5536.0, "global_hessian_coeff_abs/min": 5536.0, "global_hessian_coeff_abs/p25": 5536.0, "global_hessian_coeff_abs/p75": 5536.0, "global_hessian_coeff_abs/p99": 5536.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.5890795588493347, "learning_rate": 6.484090676804927e-07, "loss": -0.7812, "masked_global_fisher_curvature": 41472.0, "masked_global_fisher_curvature/max": 41472.0, "masked_global_fisher_curvature/median": 41472.0, "masked_global_fisher_curvature/min": 41472.0, "masked_global_fisher_curvature/p25": 41472.0, "masked_global_fisher_curvature/p75": 41472.0, "masked_global_fisher_curvature/p85": 41472.0, "masked_global_fisher_curvature/p90": 41472.0, "masked_global_fisher_curvature/p95": 41472.0, "masked_global_fisher_curvature/p99": 41472.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/max": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/median": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/min": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/p25": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/p75": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/p85": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/p90": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/p95": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/p99": 1.1932570487260818e-08, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.002471923828125, "masked_global_full_update_term/max": 0.002471923828125, "masked_global_full_update_term/median": 0.002471923828125, "masked_global_full_update_term/min": 0.002471923828125, "masked_global_full_update_term/p25": 0.002471923828125, "masked_global_full_update_term/p75": 0.002471923828125, "masked_global_full_update_term/p85": 0.002471923828125, "masked_global_full_update_term/p90": 0.002471923828125, "masked_global_full_update_term/p95": 0.002471923828125, "masked_global_full_update_term/p99": 0.002471923828125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -82944.0, "masked_global_hessian_coeff/max": -82944.0, "masked_global_hessian_coeff/median": -82944.0, "masked_global_hessian_coeff/min": -82944.0, "masked_global_hessian_coeff/p25": -82944.0, "masked_global_hessian_coeff/p75": -82944.0, "masked_global_hessian_coeff/p99": -82944.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 82944.0, "masked_global_hessian_coeff_abs/max": 82944.0, "masked_global_hessian_coeff_abs/median": 82944.0, "masked_global_hessian_coeff_abs/min": 82944.0, "masked_global_hessian_coeff_abs/p25": 82944.0, "masked_global_hessian_coeff_abs/p75": 82944.0, "masked_global_hessian_coeff_abs/p99": 82944.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 14.01025390625, "masked_per_sentence_gradient_norm/max": 68.0, "masked_per_sentence_gradient_norm/median": 11.625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 3.24609375, "masked_per_sentence_gradient_norm/p75": 19.15625, "masked_per_sentence_gradient_norm/var": 270.5278015136719, "masked_per_token_gradient_norm": 0.4062899053096771, "masked_per_token_gradient_norm/max": 125.5, "masked_per_token_gradient_norm/median": 2.97859514830634e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 5.3318217396736145e-08, "masked_per_token_gradient_norm/var": 22.53373146057129, "masked_sentence_fisher_curvature": 70701.671875, "masked_sentence_fisher_curvature/max": 270336.0, "masked_sentence_fisher_curvature/median": 69632.0, "masked_sentence_fisher_curvature/min": 1008.0, "masked_sentence_fisher_curvature/p25": 25344.0, "masked_sentence_fisher_curvature/p75": 96256.0, "masked_sentence_fisher_curvature/p85": 130560.0, "masked_sentence_fisher_curvature/p90": 135168.0, "masked_sentence_fisher_curvature/p95": 168960.0, "masked_sentence_fisher_curvature/p99": 204185.8125, "masked_sentence_fisher_curvature/var": 3050600192.0, "masked_sentence_fisher_kl_divergence": 2.0357163066364592e-08, "masked_sentence_fisher_kl_divergence/max": 7.776543498039246e-08, "masked_sentence_fisher_kl_divergence/median": 2.0023435354232788e-08, "masked_sentence_fisher_kl_divergence/min": 2.9103830456733704e-10, "masked_sentence_fisher_kl_divergence/p25": 7.283233571797609e-09, "masked_sentence_fisher_kl_divergence/p75": 2.7706846594810486e-08, "masked_sentence_fisher_kl_divergence/p85": 3.771856427192688e-08, "masked_sentence_fisher_kl_divergence/p90": 3.888271749019623e-08, "masked_sentence_fisher_kl_divergence/p95": 4.8603396862745285e-08, "masked_sentence_fisher_kl_divergence/p99": 5.874323250054658e-08, "masked_sentence_fisher_kl_divergence/var": 2.52738696653363e-16, "masked_sentence_full_gradient_variance/max_squared_error": 438.4970703125, "masked_sentence_full_gradient_variance/metric": 438.4970703125, "masked_sentence_full_gradient_variance/p75": 438.4970703125, "masked_sentence_full_gradient_variance/p90": 438.4970703125, "masked_sentence_full_gradient_variance/p95": 438.4970703125, "masked_sentence_full_gradient_variance/p99": 438.4970703125, "masked_sentence_full_update_term": 0.0008726766100153327, "masked_sentence_full_update_term/max": 0.00360107421875, "masked_sentence_full_update_term/median": 0.000701904296875, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.000148773193359375, "masked_sentence_full_update_term/p75": 0.001346588134765625, "masked_sentence_full_update_term/p85": 0.001556396484375, "masked_sentence_full_update_term/p90": 0.001987457275390625, "masked_sentence_full_update_term/p95": 0.00257110595703125, "masked_sentence_full_update_term/p99": 0.0031517043244093657, "masked_sentence_full_update_term/var": 7.123363161554153e-07, "masked_sentence_hessian_coeff": -183826.671875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -195584.0, "masked_sentence_hessian_coeff/min": -663552.0, "masked_sentence_hessian_coeff/p25": -262144.0, "masked_sentence_hessian_coeff/p75": -98944.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 16305812480.0, "masked_sentence_hessian_coeff_abs": 183826.671875, "masked_sentence_hessian_coeff_abs/max": 663552.0, "masked_sentence_hessian_coeff_abs/median": 192512.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 98944.0, "masked_sentence_hessian_coeff_abs/p75": 262144.0, "masked_sentence_hessian_coeff_abs/p99": 463155.84375, "masked_sentence_hessian_coeff_abs/var": 16305812480.0, "masked_token_fisher_curvature": 72401.6484375, "masked_token_fisher_curvature/max": 34340864.0, "masked_token_fisher_curvature/median": 2.0735366548785272e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 5.841963076155758e-24, "masked_token_fisher_curvature/p75": 2.682298827494378e-13, "masked_token_fisher_curvature/p85": 2.3101165425032377e-10, "masked_token_fisher_curvature/p90": 5.657784640789032e-08, "masked_token_fisher_curvature/p95": 0.0016021728515625, "masked_token_fisher_curvature/p99": 313344.0, "masked_token_fisher_curvature/var": 1339711946752.0, "masked_token_fisher_kl_divergence": 2.0842412240540398e-08, "masked_token_fisher_kl_divergence/max": 9.894371032714844e-06, "masked_token_fisher_kl_divergence/median": 5.97808654737798e-31, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.680956921675871e-36, "masked_token_fisher_kl_divergence/p75": 7.714428564337319e-26, "masked_token_fisher_kl_divergence/p85": 6.658803931051873e-23, "masked_token_fisher_kl_divergence/p90": 1.6305384234645282e-20, "masked_token_fisher_kl_divergence/p95": 4.614364446098307e-16, "masked_token_fisher_kl_divergence/p99": 9.033828973770142e-08, "masked_token_fisher_kl_divergence/var": 1.1099750811408363e-13, "masked_token_full_update_term": 1.7159667550004087e-05, "masked_token_full_update_term/max": 0.00445556640625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.601874828338623e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 8.049116928532385e-16, "masked_token_full_update_term/p85": 7.105427357601002e-14, "masked_token_full_update_term/p90": 1.3038459201197838e-12, "masked_token_full_update_term/p95": 5.20230969414115e-10, "masked_token_full_update_term/p99": 0.0002727508544921875, "masked_token_full_update_term/var": 4.0374807497300935e-08, "masked_token_hessian_coeff": -197678.25, "masked_token_hessian_coeff/max": 131072.0, "masked_token_hessian_coeff/median": -7.069900220812997e-13, "masked_token_hessian_coeff/min": -30277632.0, "masked_token_hessian_coeff/p25": -4.291534423828125e-06, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.095703125, "masked_token_hessian_coeff/var": 3927142563840.0, "masked_token_hessian_coeff_abs": 197691.71875, "masked_token_hessian_coeff_abs/max": 30277632.0, "masked_token_hessian_coeff_abs/median": 8.032657206058502e-09, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 3.546476364135742e-05, "masked_token_hessian_coeff_abs/p99": 5187328.0, "masked_token_hessian_coeff_abs/var": 3927136796672.0, "mean_logprobs": -0.0084228515625, "mean_logprobs/var": 2.8014183044433594e-05, "num_completions/total": 8448, "per_sentence_gradient_norm": 27.765300750732422, "per_sentence_gradient_norm/max": 192.0, "per_sentence_gradient_norm/median": 18.125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 4.359375, "per_sentence_gradient_norm/p75": 40.125, "per_sentence_gradient_norm/var": 1006.288818359375, "per_token_feature_norm": 190.42532348632812, "per_token_feature_norm/max": 270.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 105.0, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 196.5, "per_token_feature_norm/var": 131.20086669921875, "per_token_gradient_norm": 0.9297506809234619, "per_token_gradient_norm/max": 318.0, "per_token_gradient_norm/median": 3.1377567211166024e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 5.541369318962097e-08, "per_token_gradient_norm/var": 118.91222381591797, "per_token_policy_error_norm": 0.005154196638613939, "per_token_policy_error_norm/max": 1.96875, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.00488467188552022, "policy_entropy": 0.009656990878283978, "policy_entropy/max": 1.46875, "policy_entropy/median": 4.802132025361061e-10, "policy_entropy/min": 9.661469554619363e-22, "policy_entropy/p25": 1.1439738045737613e-12, "policy_entropy/p75": 8.475035429000854e-08, "policy_entropy/var": 0.005181340500712395, "policy_loss": -0.78125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.17269736528396606, "policy_sharpness": 9.739744186401367, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7040396928787231, "reward": 0.78125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.17269736528396606, "rewards/accuracy_reward": 0.78125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.17269736528396606, "sentence_fisher_curvature": 402665.25, "sentence_fisher_curvature/max": 1753088.0, "sentence_fisher_curvature/median": 370688.0, "sentence_fisher_curvature/min": 1008.0, "sentence_fisher_curvature/p25": 82432.0, "sentence_fisher_curvature/p75": 636928.0, "sentence_fisher_curvature/p85": 720896.0, "sentence_fisher_curvature/p90": 864256.0, "sentence_fisher_curvature/p95": 1265664.0, "sentence_fisher_curvature/p99": 1511834.375, "sentence_fisher_curvature/var": 155103887360.0, "sentence_fisher_kl_divergence": 1.1601601812571971e-07, "sentence_fisher_kl_divergence/max": 5.066394805908203e-07, "sentence_fisher_kl_divergence/median": 1.0663643479347229e-07, "sentence_fisher_kl_divergence/min": 2.9103830456733704e-10, "sentence_fisher_kl_divergence/p25": 2.3748725652694702e-08, "sentence_fisher_kl_divergence/p75": 1.832377165555954e-07, "sentence_fisher_kl_divergence/p85": 2.076849341392517e-07, "sentence_fisher_kl_divergence/p90": 2.4959444999694824e-07, "sentence_fisher_kl_divergence/p95": 3.650784492492676e-07, "sentence_fisher_kl_divergence/p99": 4.358591922937194e-07, "sentence_fisher_kl_divergence/var": 1.2892697807520125e-14, "sentence_full_gradient_variance/max_squared_error": 1726.218505859375, "sentence_full_gradient_variance/metric": 1726.218505859375, "sentence_full_gradient_variance/p75": 1726.218505859375, "sentence_full_gradient_variance/p90": 1726.218505859375, "sentence_full_gradient_variance/p95": 1726.218505859375, "sentence_full_gradient_variance/p99": 1726.218505859375, "sentence_full_update_term": 0.0018801888218149543, "sentence_full_update_term/max": 0.00885009765625, "sentence_full_update_term/median": 0.0013885498046875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0003376007080078125, "sentence_full_update_term/p75": 0.00262451171875, "sentence_full_update_term/p85": 0.003482818603515625, "sentence_full_update_term/p90": 0.0044403076171875, "sentence_full_update_term/p95": 0.00597381591796875, "sentence_full_update_term/p99": 0.008038332685828209, "sentence_full_update_term/var": 3.829105935437838e-06, "sentence_hessian_coeff": 8082.0, "sentence_hessian_coeff/max": 1081344.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -438272.0, "sentence_hessian_coeff/p25": -190976.0, "sentence_hessian_coeff/p75": 105472.0, "sentence_hessian_coeff/p99": 808960.875, "sentence_hessian_coeff/var": 78072414208.0, "sentence_hessian_coeff_abs": 191810.0, "sentence_hessian_coeff_abs/max": 1081344.0, "sentence_hessian_coeff_abs/median": 151552.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 43776.0, "sentence_hessian_coeff_abs/p75": 262144.0, "sentence_hessian_coeff_abs/p99": 808960.875, "sentence_hessian_coeff_abs/var": 40960073728.0, "step": 88, "token_fisher_curvature": 386216.5625, "token_fisher_curvature/max": 228589568.0, "token_fisher_curvature/median": 2.2632720350634905e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 6.1780052000054254e-24, "token_fisher_curvature/p75": 3.019806626980426e-13, "token_fisher_curvature/p85": 2.7466739993542433e-10, "token_fisher_curvature/p90": 8.288770914077759e-08, "token_fisher_curvature/p95": 0.004065036773681641, "token_fisher_curvature/p99": 1061504.0, "token_fisher_curvature/var": 42079833030656.0, "token_fisher_kl_divergence": 1.1121535692382167e-07, "token_fisher_kl_divergence/max": 6.580352783203125e-05, "token_fisher_kl_divergence/median": 6.532754371361504e-31, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.774996469741654e-36, "token_fisher_kl_divergence/p75": 8.703975683846556e-26, "token_fisher_kl_divergence/p85": 7.899574849881414e-23, "token_fisher_kl_divergence/p90": 2.3928680759933985e-20, "token_fisher_kl_divergence/p95": 1.1692307278626801e-15, "token_fisher_kl_divergence/p99": 3.062887117266655e-07, "token_fisher_kl_divergence/var": 3.4904384070555405e-12, "token_full_update_term": 3.987892341683619e-05, "token_full_update_term/max": 0.01153564453125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.601874828338623e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 8.673617379884035e-16, "token_full_update_term/p85": 7.900451126641173e-14, "token_full_update_term/p90": 1.6200374375330284e-12, "token_full_update_term/p95": 9.302567605118384e-10, "token_full_update_term/p99": 0.000716090202331543, "token_full_update_term/var": 2.2085386319758982e-07, "token_hessian_coeff": -18999.59765625, "token_hessian_coeff/max": 225443840.0, "token_hessian_coeff/median": -6.465938895416912e-13, "token_hessian_coeff/min": -30277632.0, "token_hessian_coeff/p25": -4.246830940246582e-06, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.2109375, "token_hessian_coeff/var": 28143914057728.0, "token_hessian_coeff_abs": 387541.21875, "token_hessian_coeff_abs/max": 225443840.0, "token_hessian_coeff_abs/median": 8.381903171539307e-09, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.802776336669922e-05, "token_hessian_coeff_abs/p99": 11525120.0, "token_hessian_coeff_abs/var": 27994085130240.0 }, { "accuracy_reward": 0.5625, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.24868421256542206, "adam_stats/lm_head/lr_effective_max": 3.045112634936231e-06, "adam_stats/lm_head/lr_effective_mean": -2.9405243431174233e-12, "adam_stats/lm_head/lr_effective_min": -3.0585604235966457e-06, "adam_stats/lm_head/lr_effective_std": 9.605064121842588e-08, "adam_stats/lr_effective_max": 3.3608419016672997e-06, "adam_stats/lr_effective_mean": -1.168172242965726e-11, "adam_stats/lr_effective_min": -3.3399969652236905e-06, "adam_stats/m_t_max": 0.0018817692762240767, "adam_stats/m_t_mean": -2.531186371612648e-11, "adam_stats/m_t_min": -0.0014481721445918083, "adam_stats/v_t_max": 2.535605017328635e-05, "adam_stats/v_t_mean": 1.8396532127512577e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5625, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.24868421256542206, "all_logprobs": -0.008588120341300964, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -4.25, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -9.584426879882812e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.011614352464675903, "clip_ratio": 0.0, "completion_length": 500.4270935058594, "completion_length/correct": 457.72222900390625, "completion_length/correct/max": 999.0, "completion_length/correct/median": 446.0, "completion_length/correct/min": 179.0, "completion_length/correct/p25": 336.0, "completion_length/correct/p75": 487.0, "completion_length/correct/var": 38996.1328125, "completion_length/incorrect": 555.3333740234375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 463.0, "completion_length/incorrect/min": 278.0, "completion_length/incorrect/p25": 396.0, "completion_length/incorrect/p75": 599.25, "completion_length/incorrect/var": 61303.64453125, "completion_length/max": 1024.0, "completion_length/median": 462.0, "completion_length/min": 179.0, "completion_length/p25": 384.0, "completion_length/p75": 552.75, "completion_length/var": 50582.53515625, "curvature_clip_ratio_token_fisher": 0.0022272642236202955, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0022272642236202955, "curvature_clip_ratio_total_full": 0.0022272642236202955, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1424, "feature_vector_variance/max_squared_error": 55796.90625, "feature_vector_variance/metric": 30672.66796875, "generated_tokens/total": 4887820.0, "global_fisher_curvature": 153600.0, "global_fisher_curvature/max": 153600.0, "global_fisher_curvature/median": 153600.0, "global_fisher_curvature/min": 153600.0, "global_fisher_curvature/p25": 153600.0, "global_fisher_curvature/p75": 153600.0, "global_fisher_curvature/p85": 153600.0, "global_fisher_curvature/p90": 153600.0, "global_fisher_curvature/p95": 153600.0, "global_fisher_curvature/p99": 153600.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.236345946788788e-08, "global_fisher_kl_divergence/max": 3.236345946788788e-08, "global_fisher_kl_divergence/median": 3.236345946788788e-08, "global_fisher_kl_divergence/min": 3.236345946788788e-08, "global_fisher_kl_divergence/p25": 3.236345946788788e-08, "global_fisher_kl_divergence/p75": 3.236345946788788e-08, "global_fisher_kl_divergence/p85": 3.236345946788788e-08, "global_fisher_kl_divergence/p90": 3.236345946788788e-08, "global_fisher_kl_divergence/p95": 3.236345946788788e-08, "global_fisher_kl_divergence/p99": 3.236345946788788e-08, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.00390625, "global_full_update_term/max": 0.00390625, "global_full_update_term/median": 0.00390625, "global_full_update_term/min": 0.00390625, "global_full_update_term/p25": 0.00390625, "global_full_update_term/p75": 0.00390625, "global_full_update_term/p85": 0.00390625, "global_full_update_term/p90": 0.00390625, "global_full_update_term/p95": 0.00390625, "global_full_update_term/p99": 0.00390625, "global_full_update_term/var": NaN, "global_hessian_coeff": 32768.0, "global_hessian_coeff/max": 32768.0, "global_hessian_coeff/median": 32768.0, "global_hessian_coeff/min": 32768.0, "global_hessian_coeff/p25": 32768.0, "global_hessian_coeff/p75": 32768.0, "global_hessian_coeff/p99": 32768.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 32768.0, "global_hessian_coeff_abs/max": 32768.0, "global_hessian_coeff_abs/median": 32768.0, "global_hessian_coeff_abs/min": 32768.0, "global_hessian_coeff_abs/p25": 32768.0, "global_hessian_coeff_abs/p75": 32768.0, "global_hessian_coeff_abs/p99": 32768.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.33976998925209045, "learning_rate": 5.461210907490952e-07, "loss": -0.5625, "masked_global_fisher_curvature": 37120.0, "masked_global_fisher_curvature/max": 37120.0, "masked_global_fisher_curvature/median": 37120.0, "masked_global_fisher_curvature/min": 37120.0, "masked_global_fisher_curvature/p25": 37120.0, "masked_global_fisher_curvature/p75": 37120.0, "masked_global_fisher_curvature/p85": 37120.0, "masked_global_fisher_curvature/p90": 37120.0, "masked_global_fisher_curvature/p95": 37120.0, "masked_global_fisher_curvature/p99": 37120.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/max": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/median": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/min": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/p25": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/p75": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/p85": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/p90": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/p95": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/p99": 7.799826562404633e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.001953125, "masked_global_full_update_term/max": 0.001953125, "masked_global_full_update_term/median": 0.001953125, "masked_global_full_update_term/min": 0.001953125, "masked_global_full_update_term/p25": 0.001953125, "masked_global_full_update_term/p75": 0.001953125, "masked_global_full_update_term/p85": 0.001953125, "masked_global_full_update_term/p90": 0.001953125, "masked_global_full_update_term/p95": 0.001953125, "masked_global_full_update_term/p99": 0.001953125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -38656.0, "masked_global_hessian_coeff/max": -38656.0, "masked_global_hessian_coeff/median": -38656.0, "masked_global_hessian_coeff/min": -38656.0, "masked_global_hessian_coeff/p25": -38656.0, "masked_global_hessian_coeff/p75": -38656.0, "masked_global_hessian_coeff/p99": -38656.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 38656.0, "masked_global_hessian_coeff_abs/max": 38656.0, "masked_global_hessian_coeff_abs/median": 38656.0, "masked_global_hessian_coeff_abs/min": 38656.0, "masked_global_hessian_coeff_abs/p25": 38656.0, "masked_global_hessian_coeff_abs/p75": 38656.0, "masked_global_hessian_coeff_abs/p99": 38656.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 13.973470687866211, "masked_per_sentence_gradient_norm/max": 84.5, "masked_per_sentence_gradient_norm/median": 3.96875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 18.03125, "masked_per_sentence_gradient_norm/var": 426.3085632324219, "masked_per_token_gradient_norm": 0.2765173017978668, "masked_per_token_gradient_norm/max": 129.0, "masked_per_token_gradient_norm/median": 3.230922474006803e-17, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 9.822542779147625e-10, "masked_per_token_gradient_norm/var": 18.37512969970703, "masked_sentence_fisher_curvature": 63396.83984375, "masked_sentence_fisher_curvature/max": 333824.0, "masked_sentence_fisher_curvature/median": 30208.0, "masked_sentence_fisher_curvature/min": 102.5, "masked_sentence_fisher_curvature/p25": 2480.0, "masked_sentence_fisher_curvature/p75": 108288.0, "masked_sentence_fisher_curvature/p85": 134784.0, "masked_sentence_fisher_curvature/p90": 169472.0, "masked_sentence_fisher_curvature/p95": 192256.0, "masked_sentence_fisher_curvature/p99": 262809.8125, "masked_sentence_fisher_curvature/var": 5405316608.0, "masked_sentence_fisher_kl_divergence": 1.3330693882096512e-08, "masked_sentence_fisher_kl_divergence/max": 7.031485438346863e-08, "masked_sentence_fisher_kl_divergence/median": 6.344635039567947e-09, "masked_sentence_fisher_kl_divergence/min": 2.1600499167107046e-11, "masked_sentence_fisher_kl_divergence/p25": 5.20230969414115e-10, "masked_sentence_fisher_kl_divergence/p75": 2.2759195417165756e-08, "masked_sentence_fisher_kl_divergence/p85": 2.8347130864858627e-08, "masked_sentence_fisher_kl_divergence/p90": 3.562308847904205e-08, "masked_sentence_fisher_kl_divergence/p95": 4.039611667394638e-08, "masked_sentence_fisher_kl_divergence/p99": 5.527404312033468e-08, "masked_sentence_fisher_kl_divergence/var": 2.3915549102273583e-16, "masked_sentence_full_gradient_variance/max_squared_error": 598.000732421875, "masked_sentence_full_gradient_variance/metric": 598.000732421875, "masked_sentence_full_gradient_variance/p75": 598.000732421875, "masked_sentence_full_gradient_variance/p90": 598.000732421875, "masked_sentence_full_gradient_variance/p95": 598.000732421875, "masked_sentence_full_gradient_variance/p99": 598.000732421875, "masked_sentence_full_update_term": 0.0007142946124076843, "masked_sentence_full_update_term/max": 0.006439208984375, "masked_sentence_full_update_term/median": 0.0001087188720703125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00092315673828125, "masked_sentence_full_update_term/p85": 0.001598358154296875, "masked_sentence_full_update_term/p90": 0.0021514892578125, "masked_sentence_full_update_term/p95": 0.002681732177734375, "masked_sentence_full_update_term/p99": 0.0049026538617908955, "masked_sentence_full_update_term/var": 1.2544420542326407e-06, "masked_sentence_hessian_coeff": -95846.671875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -42240.0, "masked_sentence_hessian_coeff/min": -458752.0, "masked_sentence_hessian_coeff/p25": -174336.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 11306157056.0, "masked_sentence_hessian_coeff_abs": 95846.671875, "masked_sentence_hessian_coeff_abs/max": 458752.0, "masked_sentence_hessian_coeff_abs/median": 42240.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 174336.0, "masked_sentence_hessian_coeff_abs/p99": 349798.75, "masked_sentence_hessian_coeff_abs/var": 11306157056.0, "masked_token_fisher_curvature": 62171.25, "masked_token_fisher_curvature/max": 46137344.0, "masked_token_fisher_curvature/median": 6.403569081242511e-19, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 3.0114544175758664e-24, "masked_token_fisher_curvature/p75": 1.412203687323199e-13, "masked_token_fisher_curvature/p85": 8.321876521222293e-11, "masked_token_fisher_curvature/p90": 2.3050233721733093e-08, "masked_token_fisher_curvature/p95": 0.0007503330707550049, "masked_token_fisher_curvature/p99": 51968.0, "masked_token_fisher_curvature/var": 1543983595520.0, "masked_token_fisher_kl_divergence": 1.3067460891136307e-08, "masked_token_fisher_kl_divergence/max": 9.715557098388672e-06, "masked_token_fisher_kl_divergence/median": 1.348150961071065e-31, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 6.318282135669795e-37, "masked_token_fisher_kl_divergence/p75": 2.9686413585277116e-26, "masked_token_fisher_kl_divergence/p85": 1.747419044018271e-23, "masked_token_fisher_kl_divergence/p90": 4.84396966711053e-21, "masked_token_fisher_kl_divergence/p95": 1.575955620343461e-16, "masked_token_fisher_kl_divergence/p99": 1.0943040251731873e-08, "masked_token_fisher_kl_divergence/var": 6.82299201404217e-14, "masked_token_full_update_term": 1.002617682388518e-05, "masked_token_full_update_term/max": 0.00439453125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -2.7008354663848877e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.4772254600114998e-18, "masked_token_full_update_term/p85": 1.5265566588595902e-15, "masked_token_full_update_term/p90": 4.618527782440651e-14, "masked_token_full_update_term/p95": 6.973199795368146e-12, "masked_token_full_update_term/p99": 3.695487976074219e-05, "masked_token_full_update_term/var": 2.408878430060213e-08, "masked_token_hessian_coeff": -116325.7734375, "masked_token_hessian_coeff/max": 4384.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -30932992.0, "masked_token_hessian_coeff/p25": -3.6961864680051804e-09, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.017333984375, "masked_token_hessian_coeff/var": 2247944568832.0, "masked_token_hessian_coeff_abs": 116326.3046875, "masked_token_hessian_coeff_abs/max": 30932992.0, "masked_token_hessian_coeff_abs/median": 2.0650148258027912e-14, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 4.2654573917388916e-07, "masked_token_hessian_coeff_abs/p99": 812544.0, "masked_token_hessian_coeff_abs/var": 2247944568832.0, "mean_logprobs": -0.008544921875, "mean_logprobs/var": 2.47955322265625e-05, "num_completions/total": 8544, "per_sentence_gradient_norm": 30.277019500732422, "per_sentence_gradient_norm/max": 216.0, "per_sentence_gradient_norm/median": 17.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 45.1875, "per_sentence_gradient_norm/var": 1584.0166015625, "per_token_feature_norm": 189.86822509765625, "per_token_feature_norm/max": 238.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 108.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 116.05806732177734, "per_token_gradient_norm": 0.6737802624702454, "per_token_gradient_norm/max": 274.0, "per_token_gradient_norm/median": 4.206704429243757e-17, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.0550138540565968e-09, "per_token_gradient_norm/var": 92.66194152832031, "per_token_policy_error_norm": 0.005173843819648027, "per_token_policy_error_norm/max": 1.9375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004948786925524473, "policy_entropy": 0.009104428812861443, "policy_entropy/max": 1.5234375, "policy_entropy/median": 2.8194335754960775e-10, "policy_entropy/min": 2.236696376343387e-21, "policy_entropy/p25": 8.348877145181177e-13, "policy_entropy/p75": 6.193295121192932e-08, "policy_entropy/var": 0.004813611973077059, "policy_loss": -0.5625, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.24868421256542206, "policy_sharpness": 9.7377347946167, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6982816457748413, "reward": 0.5625, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.24868421256542206, "rewards/accuracy_reward": 0.5625, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.24868421256542206, "sentence_fisher_curvature": 322661.75, "sentence_fisher_curvature/max": 1449984.0, "sentence_fisher_curvature/median": 123392.0, "sentence_fisher_curvature/min": 102.5, "sentence_fisher_curvature/p25": 4952.0, "sentence_fisher_curvature/p75": 581632.0, "sentence_fisher_curvature/p85": 733184.0, "sentence_fisher_curvature/p90": 866304.0, "sentence_fisher_curvature/p95": 988160.0, "sentence_fisher_curvature/p99": 1208730.375, "sentence_fisher_curvature/var": 134132219904.0, "sentence_fisher_kl_divergence": 6.779384165156443e-08, "sentence_fisher_kl_divergence/max": 3.0547380447387695e-07, "sentence_fisher_kl_divergence/median": 2.5960616767406464e-08, "sentence_fisher_kl_divergence/min": 2.1600499167107046e-11, "sentence_fisher_kl_divergence/p25": 1.0422809282317758e-09, "sentence_fisher_kl_divergence/p75": 1.2200325727462769e-07, "sentence_fisher_kl_divergence/p85": 1.5366822481155396e-07, "sentence_fisher_kl_divergence/p90": 1.8207356333732605e-07, "sentence_fisher_kl_divergence/p95": 2.076849341392517e-07, "sentence_fisher_kl_divergence/p99": 2.541580954584788e-07, "sentence_fisher_kl_divergence/var": 5.923108276637348e-15, "sentence_full_gradient_variance/max_squared_error": 2453.21435546875, "sentence_full_gradient_variance/metric": 2453.21435546875, "sentence_full_gradient_variance/p75": 2453.21435546875, "sentence_full_gradient_variance/p90": 2453.21435546875, "sentence_full_gradient_variance/p95": 2453.21435546875, "sentence_full_gradient_variance/p99": 2453.21435546875, "sentence_full_update_term": 0.0017340978374704719, "sentence_full_update_term/max": 0.017822265625, "sentence_full_update_term/median": 0.000911712646484375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.002506256103515625, "sentence_full_update_term/p85": 0.00383758544921875, "sentence_full_update_term/p90": 0.00445556640625, "sentence_full_update_term/p95": 0.00569915771484375, "sentence_full_update_term/p99": 0.008950834162533283, "sentence_full_update_term/var": 6.61709054838866e-06, "sentence_hessian_coeff": 46730.75, "sentence_hessian_coeff/max": 929792.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -380928.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 50816.0, "sentence_hessian_coeff/p99": 723559.0625, "sentence_hessian_coeff/var": 38878511104.0, "sentence_hessian_coeff_abs": 110789.421875, "sentence_hessian_coeff_abs/max": 929792.0, "sentence_hessian_coeff_abs/median": 50432.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 175104.0, "sentence_hessian_coeff_abs/p99": 723559.0625, "sentence_hessian_coeff_abs/var": 28681760768.0, "step": 89, "token_fisher_curvature": 299332.53125, "token_fisher_curvature/max": 231735296.0, "token_fisher_curvature/median": 6.708500942254059e-19, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 3.1019272970738538e-24, "token_fisher_curvature/p75": 1.5276668818842154e-13, "token_fisher_curvature/p85": 9.231371222995222e-11, "token_fisher_curvature/p90": 2.9103830456733704e-08, "token_fisher_curvature/p95": 0.00147247314453125, "token_fisher_curvature/p99": 138248.0, "token_fisher_curvature/var": 31876160946176.0, "token_fisher_kl_divergence": 6.28972713911935e-08, "token_fisher_kl_divergence/max": 4.863739013671875e-05, "token_fisher_kl_divergence/median": 1.4097807192914566e-31, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 6.523993647063696e-37, "token_fisher_kl_divergence/p75": 3.2109794286116065e-26, "token_fisher_kl_divergence/p85": 1.943874439499615e-23, "token_fisher_kl_divergence/p90": 6.1145190879919806e-21, "token_fisher_kl_divergence/p95": 3.0878077872387166e-16, "token_fisher_kl_divergence/p99": 2.9035618354100734e-08, "token_fisher_kl_divergence/var": 1.4069762065485603e-12, "token_full_update_term": 2.4585788196418434e-05, "token_full_update_term/max": 0.00994873046875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -2.7008354663848877e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.6601845766184287e-18, "token_full_update_term/p85": 1.790234627208065e-15, "token_full_update_term/p90": 5.262457136723242e-14, "token_full_update_term/p95": 1.0118128557223827e-11, "token_full_update_term/p99": 0.00011577829718589783, "token_full_update_term/var": 1.2386280445753073e-07, "token_hessian_coeff": 20333.748046875, "token_hessian_coeff/max": 226492416.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -30932992.0, "token_hessian_coeff/p25": -3.637978807091713e-09, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.040088653564453125, "token_hessian_coeff/var": 19204241620992.0, "token_hessian_coeff_abs": 253689.9375, "token_hessian_coeff_abs/max": 226492416.0, "token_hessian_coeff_abs/median": 2.731148640577885e-14, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 4.470348358154297e-07, "token_hessian_coeff_abs/p99": 1949696.0, "token_hessian_coeff_abs/var": 19140293165056.0 }, { "accuracy_reward": 0.8125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.1539473831653595, "adam_stats/lm_head/lr_effective_max": 2.588851884866017e-06, "adam_stats/lm_head/lr_effective_mean": -7.869477638977607e-13, "adam_stats/lm_head/lr_effective_min": -2.65571657109831e-06, "adam_stats/lm_head/lr_effective_std": 8.416118646437099e-08, "adam_stats/lr_effective_max": 2.8357148948998656e-06, "adam_stats/lr_effective_mean": -1.4098702674075758e-12, "adam_stats/lr_effective_min": -2.862805104086874e-06, "adam_stats/m_t_max": 0.002539905719459057, "adam_stats/m_t_mean": -2.793494775776839e-11, "adam_stats/m_t_min": -0.0035968718584626913, "adam_stats/v_t_max": 2.5349210773129016e-05, "adam_stats/v_t_mean": 1.920500000349157e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.8125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.1539473831653595, "all_logprobs": -0.007948936894536018, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.125, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -8.344650268554688e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -6.031990051269531e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012209225445985794, "clip_ratio": 0.0, "completion_length": 441.54168701171875, "completion_length/correct": 446.70513916015625, "completion_length/correct/max": 772.0, "completion_length/correct/median": 418.0, "completion_length/correct/min": 207.0, "completion_length/correct/p25": 346.5, "completion_length/correct/p75": 512.75, "completion_length/correct/var": 17258.056640625, "completion_length/incorrect": 419.1666564941406, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 393.0, "completion_length/incorrect/min": 242.0, "completion_length/incorrect/p25": 251.0, "completion_length/incorrect/p75": 537.25, "completion_length/incorrect/var": 41619.44140625, "completion_length/max": 1024.0, "completion_length/median": 417.0, "completion_length/min": 207.0, "completion_length/p25": 342.0, "completion_length/p75": 515.25, "completion_length/var": 21552.546875, "curvature_clip_ratio_token_fisher": 0.0022176087368279696, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0022176087368279696, "curvature_clip_ratio_total_full": 0.0022176087368279696, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.144, "feature_vector_variance/max_squared_error": 63898.90625, "feature_vector_variance/metric": 30987.875, "generated_tokens/total": 4930208.0, "global_fisher_curvature": 206848.0, "global_fisher_curvature/max": 206848.0, "global_fisher_curvature/median": 206848.0, "global_fisher_curvature/min": 206848.0, "global_fisher_curvature/p25": 206848.0, "global_fisher_curvature/p75": 206848.0, "global_fisher_curvature/p85": 206848.0, "global_fisher_curvature/p90": 206848.0, "global_fisher_curvature/p95": 206848.0, "global_fisher_curvature/p99": 206848.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.073364496231079e-08, "global_fisher_kl_divergence/max": 3.073364496231079e-08, "global_fisher_kl_divergence/median": 3.073364496231079e-08, "global_fisher_kl_divergence/min": 3.073364496231079e-08, "global_fisher_kl_divergence/p25": 3.073364496231079e-08, "global_fisher_kl_divergence/p75": 3.073364496231079e-08, "global_fisher_kl_divergence/p85": 3.073364496231079e-08, "global_fisher_kl_divergence/p90": 3.073364496231079e-08, "global_fisher_kl_divergence/p95": 3.073364496231079e-08, "global_fisher_kl_divergence/p99": 3.073364496231079e-08, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0057373046875, "global_full_update_term/max": 0.0057373046875, "global_full_update_term/median": 0.0057373046875, "global_full_update_term/min": 0.0057373046875, "global_full_update_term/p25": 0.0057373046875, "global_full_update_term/p75": 0.0057373046875, "global_full_update_term/p85": 0.0057373046875, "global_full_update_term/p90": 0.0057373046875, "global_full_update_term/p95": 0.0057373046875, "global_full_update_term/p99": 0.0057373046875, "global_full_update_term/var": NaN, "global_hessian_coeff": 59904.0, "global_hessian_coeff/max": 59904.0, "global_hessian_coeff/median": 59904.0, "global_hessian_coeff/min": 59904.0, "global_hessian_coeff/p25": 59904.0, "global_hessian_coeff/p75": 59904.0, "global_hessian_coeff/p99": 59904.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 59904.0, "global_hessian_coeff_abs/max": 59904.0, "global_hessian_coeff_abs/median": 59904.0, "global_hessian_coeff_abs/min": 59904.0, "global_hessian_coeff_abs/p25": 59904.0, "global_hessian_coeff_abs/p75": 59904.0, "global_hessian_coeff_abs/p99": 59904.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.7194551825523376, "learning_rate": 4.5230534410568764e-07, "loss": -0.8125, "masked_global_fisher_curvature": 57344.0, "masked_global_fisher_curvature/max": 57344.0, "masked_global_fisher_curvature/median": 57344.0, "masked_global_fisher_curvature/min": 57344.0, "masked_global_fisher_curvature/p25": 57344.0, "masked_global_fisher_curvature/p75": 57344.0, "masked_global_fisher_curvature/p85": 57344.0, "masked_global_fisher_curvature/p90": 57344.0, "masked_global_fisher_curvature/p95": 57344.0, "masked_global_fisher_curvature/p99": 57344.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/max": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/median": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/min": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/p25": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/p75": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/p85": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/p90": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/p95": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/p99": 8.556526154279709e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0031585693359375, "masked_global_full_update_term/max": 0.0031585693359375, "masked_global_full_update_term/median": 0.0031585693359375, "masked_global_full_update_term/min": 0.0031585693359375, "masked_global_full_update_term/p25": 0.0031585693359375, "masked_global_full_update_term/p75": 0.0031585693359375, "masked_global_full_update_term/p85": 0.0031585693359375, "masked_global_full_update_term/p90": 0.0031585693359375, "masked_global_full_update_term/p95": 0.0031585693359375, "masked_global_full_update_term/p99": 0.0031585693359375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -69632.0, "masked_global_hessian_coeff/max": -69632.0, "masked_global_hessian_coeff/median": -69632.0, "masked_global_hessian_coeff/min": -69632.0, "masked_global_hessian_coeff/p25": -69632.0, "masked_global_hessian_coeff/p75": -69632.0, "masked_global_hessian_coeff/p99": -69632.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 69632.0, "masked_global_hessian_coeff_abs/max": 69632.0, "masked_global_hessian_coeff_abs/median": 69632.0, "masked_global_hessian_coeff_abs/min": 69632.0, "masked_global_hessian_coeff_abs/p25": 69632.0, "masked_global_hessian_coeff_abs/p75": 69632.0, "masked_global_hessian_coeff_abs/p99": 69632.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 24.672039031982422, "masked_per_sentence_gradient_norm/max": 140.0, "masked_per_sentence_gradient_norm/median": 11.625, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 3.54296875, "masked_per_sentence_gradient_norm/p75": 33.9375, "masked_per_sentence_gradient_norm/var": 852.3997802734375, "masked_per_token_gradient_norm": 0.5580809712409973, "masked_per_token_gradient_norm/max": 152.0, "masked_per_token_gradient_norm/median": 1.0277290130034089e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 3.809452753245068e-15, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 9.918585419654846e-08, "masked_per_token_gradient_norm/var": 42.40695571899414, "masked_sentence_fisher_curvature": 119037.796875, "masked_sentence_fisher_curvature/max": 432128.0, "masked_sentence_fisher_curvature/median": 108032.0, "masked_sentence_fisher_curvature/min": 420.0, "masked_sentence_fisher_curvature/p25": 15680.0, "masked_sentence_fisher_curvature/p75": 202496.0, "masked_sentence_fisher_curvature/p85": 233472.0, "masked_sentence_fisher_curvature/p90": 242688.0, "masked_sentence_fisher_curvature/p95": 286208.0, "masked_sentence_fisher_curvature/p99": 420454.4375, "masked_sentence_fisher_curvature/var": 10309544960.0, "masked_sentence_fisher_kl_divergence": 1.7745033886740202e-08, "masked_sentence_fisher_kl_divergence/max": 6.426125764846802e-08, "masked_sentence_fisher_kl_divergence/median": 1.6065314412117004e-08, "masked_sentence_fisher_kl_divergence/min": 6.275513442233205e-11, "masked_sentence_fisher_kl_divergence/p25": 2.342858351767063e-09, "masked_sentence_fisher_kl_divergence/p75": 3.026798367500305e-08, "masked_sentence_fisher_kl_divergence/p85": 3.4924596548080444e-08, "masked_sentence_fisher_kl_divergence/p90": 3.620516508817673e-08, "masked_sentence_fisher_kl_divergence/p95": 4.260800778865814e-08, "masked_sentence_fisher_kl_divergence/p99": 6.24917504410405e-08, "masked_sentence_fisher_kl_divergence/var": 2.288328593132253e-16, "masked_sentence_full_gradient_variance/max_squared_error": 1394.22998046875, "masked_sentence_full_gradient_variance/metric": 1394.22998046875, "masked_sentence_full_gradient_variance/p75": 1394.22998046875, "masked_sentence_full_gradient_variance/p90": 1394.22998046875, "masked_sentence_full_gradient_variance/p95": 1394.22998046875, "masked_sentence_full_gradient_variance/p99": 1394.22998046875, "masked_sentence_full_update_term": 0.0011464953422546387, "masked_sentence_full_update_term/max": 0.007080078125, "masked_sentence_full_update_term/median": 0.00064849853515625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00019502639770507812, "masked_sentence_full_update_term/p75": 0.001720428466796875, "masked_sentence_full_update_term/p85": 0.0028076171875, "masked_sentence_full_update_term/p90": 0.00290679931640625, "masked_sentence_full_update_term/p95": 0.004364013671875, "masked_sentence_full_update_term/p99": 0.004673774819821119, "masked_sentence_full_update_term/var": 1.947401869983878e-06, "masked_sentence_hessian_coeff": -169546.671875, "masked_sentence_hessian_coeff/max": 0.0, "masked_sentence_hessian_coeff/median": -167936.0, "masked_sentence_hessian_coeff/min": -581632.0, "masked_sentence_hessian_coeff/p25": -254464.0, "masked_sentence_hessian_coeff/p75": -35840.0, "masked_sentence_hessian_coeff/p99": 0.0, "masked_sentence_hessian_coeff/var": 20769869824.0, "masked_sentence_hessian_coeff_abs": 169546.671875, "masked_sentence_hessian_coeff_abs/max": 581632.0, "masked_sentence_hessian_coeff_abs/median": 165888.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 35840.0, "masked_sentence_hessian_coeff_abs/p75": 254464.0, "masked_sentence_hessian_coeff_abs/p99": 525209.75, "masked_sentence_hessian_coeff_abs/var": 20769869824.0, "masked_token_fisher_curvature": 135272.921875, "masked_token_fisher_curvature/max": 63438848.0, "masked_token_fisher_curvature/median": 1.0706496453294356e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.042102137240287e-24, "masked_token_fisher_curvature/p75": 1.6608936448392342e-13, "masked_token_fisher_curvature/p85": 1.509761204943061e-10, "masked_token_fisher_curvature/p90": 3.050081431865692e-08, "masked_token_fisher_curvature/p95": 0.0014247000217437744, "masked_token_fisher_curvature/p99": 331776.0, "masked_token_fisher_curvature/var": 4617486204928.0, "masked_token_fisher_kl_divergence": 2.016613365185549e-08, "masked_token_fisher_kl_divergence/max": 9.47713851928711e-06, "masked_token_fisher_kl_divergence/median": 1.5946699939526313e-31, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.041591632752669e-37, "masked_token_fisher_kl_divergence/p75": 2.473867798773093e-26, "masked_token_fisher_kl_divergence/p85": 2.2540671692070004e-23, "masked_token_fisher_kl_divergence/p90": 4.552802091491864e-21, "masked_token_fisher_kl_divergence/p95": 2.1257816470651725e-16, "masked_token_fisher_kl_divergence/p99": 4.936009645462036e-08, "masked_token_fisher_kl_divergence/var": 1.0253428694203398e-13, "masked_token_full_update_term": 1.7096142983064055e-05, "masked_token_full_update_term/max": 0.004364013671875, "masked_token_full_update_term/median": 1.733770563911146e-21, "masked_token_full_update_term/min": -1.0356307029724121e-06, "masked_token_full_update_term/p25": -2.8496372102451803e-22, "masked_token_full_update_term/p75": 2.7755575615628914e-15, "masked_token_full_update_term/p85": 1.652011860642233e-13, "masked_token_full_update_term/p90": 2.937372567402008e-12, "masked_token_full_update_term/p95": 9.267324685424683e-10, "masked_token_full_update_term/p99": 0.000263214111328125, "masked_token_full_update_term/var": 3.983642571370183e-08, "masked_token_hessian_coeff": -203642.296875, "masked_token_hessian_coeff/max": 16252928.0, "masked_token_hessian_coeff/median": -2.7830537874251604e-10, "masked_token_hessian_coeff/min": -30408704.0, "masked_token_hessian_coeff/p25": -1.6570091247558594e-05, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.13671875, "masked_token_hessian_coeff/var": 3950390018048.0, "masked_token_hessian_coeff_abs": 204857.640625, "masked_token_hessian_coeff_abs/max": 30408704.0, "masked_token_hessian_coeff_abs/median": 5.4249539971351624e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 2.4584778657299466e-12, "masked_token_hessian_coeff_abs/p75": 0.0001049041748046875, "masked_token_hessian_coeff_abs/p99": 6356992.0, "masked_token_hessian_coeff_abs/var": 3949893255168.0, "mean_logprobs": -0.00799560546875, "mean_logprobs/var": 3.0517578125e-05, "num_completions/total": 8640, "per_sentence_gradient_norm": 48.39664840698242, "per_sentence_gradient_norm/max": 161.0, "per_sentence_gradient_norm/median": 33.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 7.40625, "per_sentence_gradient_norm/p75": 88.0, "per_sentence_gradient_norm/var": 2346.541015625, "per_token_feature_norm": 190.06825256347656, "per_token_feature_norm/max": 270.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 112.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 138.6532440185547, "per_token_gradient_norm": 0.9960243105888367, "per_token_gradient_norm/max": 270.0, "per_token_gradient_norm/median": 1.0459189070388675e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 4.0245584642661925e-15, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.0896474123001099e-07, "per_token_gradient_norm/var": 131.91139221191406, "per_token_policy_error_norm": 0.0046803029254078865, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004542670212686062, "policy_entropy": 0.008251362480223179, "policy_entropy/max": 1.4375, "policy_entropy/median": 3.255991032347083e-10, "policy_entropy/min": 8.046812998915853e-21, "policy_entropy/p25": 5.497824417943775e-13, "policy_entropy/p75": 6.51925802230835e-08, "policy_entropy/var": 0.004208394326269627, "policy_loss": -0.8125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.1539473831653595, "policy_sharpness": 9.753604888916016, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.5684881210327148, "reward": 0.8125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.1539473831653595, "rewards/accuracy_reward": 0.8125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.1539473831653595, "sentence_fisher_curvature": 414485.8125, "sentence_fisher_curvature/max": 1630208.0, "sentence_fisher_curvature/median": 278528.0, "sentence_fisher_curvature/min": 420.0, "sentence_fisher_curvature/p25": 95552.0, "sentence_fisher_curvature/p75": 612352.0, "sentence_fisher_curvature/p85": 948224.0, "sentence_fisher_curvature/p90": 1044480.0, "sentence_fisher_curvature/p95": 1124352.0, "sentence_fisher_curvature/p99": 1404519.125, "sentence_fisher_curvature/var": 157020176384.0, "sentence_fisher_kl_divergence": 6.18140703068093e-08, "sentence_fisher_kl_divergence/max": 2.4400651454925537e-07, "sentence_fisher_kl_divergence/median": 4.1443854570388794e-08, "sentence_fisher_kl_divergence/min": 6.275513442233205e-11, "sentence_fisher_kl_divergence/p25": 1.4217221178114414e-08, "sentence_fisher_kl_divergence/p75": 9.115319699048996e-08, "sentence_fisher_kl_divergence/p85": 1.4132820069789886e-07, "sentence_fisher_kl_divergence/p90": 1.555308699607849e-07, "sentence_fisher_kl_divergence/p95": 1.6763806343078613e-07, "sentence_fisher_kl_divergence/p99": 2.0950112400441867e-07, "sentence_fisher_kl_divergence/var": 3.4948429055364294e-15, "sentence_full_gradient_variance/max_squared_error": 4525.33349609375, "sentence_full_gradient_variance/metric": 4525.33349609375, "sentence_full_gradient_variance/p75": 4525.33349609375, "sentence_full_gradient_variance/p90": 4525.33349609375, "sentence_full_gradient_variance/p95": 4525.33349609375, "sentence_full_gradient_variance/p99": 4525.33349609375, "sentence_full_update_term": 0.0022067527752369642, "sentence_full_update_term/max": 0.00848388671875, "sentence_full_update_term/median": 0.00164794921875, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00031280517578125, "sentence_full_update_term/p75": 0.00360107421875, "sentence_full_update_term/p85": 0.00482177734375, "sentence_full_update_term/p90": 0.0052490234375, "sentence_full_update_term/p95": 0.00635528564453125, "sentence_full_update_term/p99": 0.00836792029440403, "sentence_full_update_term/var": 4.877694209426409e-06, "sentence_hessian_coeff": 44170.0, "sentence_hessian_coeff/max": 749568.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -499712.0, "sentence_hessian_coeff/p25": -117248.0, "sentence_hessian_coeff/p75": 132096.0, "sentence_hessian_coeff/p99": 695091.375, "sentence_hessian_coeff/var": 65247621120.0, "sentence_hessian_coeff_abs": 175998.0, "sentence_hessian_coeff_abs/max": 749568.0, "sentence_hessian_coeff_abs/median": 132096.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 6088.0, "sentence_hessian_coeff_abs/p75": 272384.0, "sentence_hessian_coeff_abs/p99": 695091.375, "sentence_hessian_coeff_abs/var": 35917799424.0, "step": 90, "token_fisher_curvature": 427259.6875, "token_fisher_curvature/max": 238026752.0, "token_fisher_curvature/median": 1.1316360175317453e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.1196503196671334e-24, "token_fisher_curvature/p75": 1.9229062786507711e-13, "token_fisher_curvature/p85": 1.7639578686612367e-10, "token_fisher_curvature/p90": 4.0046870708465576e-08, "token_fisher_curvature/p95": 0.002724289894104004, "token_fisher_curvature/p99": 1027168.0, "token_fisher_curvature/var": 48409725632512.0, "token_fisher_kl_divergence": 6.371262628590557e-08, "token_fisher_kl_divergence/max": 3.552436828613281e-05, "token_fisher_kl_divergence/median": 1.6871146312832186e-31, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.1591410678348977e-37, "token_fisher_kl_divergence/p75": 2.8727158724528366e-26, "token_fisher_kl_divergence/p85": 2.6257733790373478e-23, "token_fisher_kl_divergence/p90": 5.982170189983496e-21, "token_fisher_kl_divergence/p95": 4.056406903082954e-16, "token_fisher_kl_divergence/p99": 1.5321711543947458e-07, "token_fisher_kl_divergence/var": 1.0765379473282044e-12, "token_full_update_term": 3.075254790019244e-05, "token_full_update_term/max": 0.008544921875, "token_full_update_term/median": 1.998468359928115e-21, "token_full_update_term/min": -1.0356307029724121e-06, "token_full_update_term/p25": -2.564259898914386e-22, "token_full_update_term/p75": 2.942091015256665e-15, "token_full_update_term/p85": 1.8207657603852567e-13, "token_full_update_term/p90": 3.3950064981524974e-12, "token_full_update_term/p95": 1.3893100003770087e-09, "token_full_update_term/p99": 0.000507354736328125, "token_full_update_term/var": 1.2671553406562452e-07, "token_hessian_coeff": 3200.99658203125, "token_hessian_coeff/max": 236978176.0, "token_hessian_coeff/median": -2.7284841053187847e-10, "token_hessian_coeff/min": -30408704.0, "token_hessian_coeff/p25": -1.633167266845703e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.32106781005859375, "token_hessian_coeff/var": 32697523109888.0, "token_hessian_coeff_abs": 410795.09375, "token_hessian_coeff_abs/max": 236978176.0, "token_hessian_coeff_abs/median": 5.564652383327484e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 2.6147972675971687e-12, "token_hessian_coeff_abs/p75": 0.0001087188720703125, "token_hessian_coeff_abs/p99": 10485760.0, "token_hessian_coeff_abs/var": 32528775774208.0 }, { "accuracy_reward": 0.53125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.25164473056793213, "adam_stats/lm_head/lr_effective_max": 2.1533794551942265e-06, "adam_stats/lm_head/lr_effective_mean": -4.295417320443473e-12, "adam_stats/lm_head/lr_effective_min": -2.1924088287050836e-06, "adam_stats/lm_head/lr_effective_std": 7.188713624373122e-08, "adam_stats/lr_effective_max": 2.3128061457100557e-06, "adam_stats/lr_effective_mean": 2.8313241508259868e-12, "adam_stats/lr_effective_min": -2.2963743049331242e-06, "adam_stats/m_t_max": 0.002569036791101098, "adam_stats/m_t_mean": -4.7044129009021773e-11, "adam_stats/m_t_min": -0.003226503496989608, "adam_stats/v_t_max": 2.546697032812517e-05, "adam_stats/v_t_mean": 1.999987198422759e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.53125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.25164473056793213, "all_logprobs": -0.008973173797130585, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.875, "all_logprobs/p1": -0.201171875, "all_logprobs/p10": -1.430511474609375e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -7.534027099609375e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.01238801795989275, "clip_ratio": 0.0, "completion_length": 585.3541870117188, "completion_length/correct": 450.7254943847656, "completion_length/correct/max": 954.0, "completion_length/correct/median": 437.0, "completion_length/correct/min": 229.0, "completion_length/correct/p25": 308.0, "completion_length/correct/p75": 542.5, "completion_length/correct/var": 30914.482421875, "completion_length/incorrect": 737.933349609375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 758.0, "completion_length/incorrect/min": 310.0, "completion_length/incorrect/p25": 477.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 75341.9765625, "completion_length/max": 1024.0, "completion_length/median": 537.0, "completion_length/min": 229.0, "completion_length/p25": 314.0, "completion_length/p75": 872.0, "completion_length/var": 71923.765625, "curvature_clip_ratio_token_fisher": 0.000783001771196723, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.000783001771196723, "curvature_clip_ratio_total_full": 0.000783001771196723, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1456, "feature_vector_variance/max_squared_error": 65972.6953125, "feature_vector_variance/metric": 30993.75, "generated_tokens/total": 4986402.0, "global_fisher_curvature": 129536.0, "global_fisher_curvature/max": 129536.0, "global_fisher_curvature/median": 129536.0, "global_fisher_curvature/min": 129536.0, "global_fisher_curvature/p25": 129536.0, "global_fisher_curvature/p75": 129536.0, "global_fisher_curvature/p85": 129536.0, "global_fisher_curvature/p90": 129536.0, "global_fisher_curvature/p95": 129536.0, "global_fisher_curvature/p99": 129536.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.3271346688270569e-08, "global_fisher_kl_divergence/max": 1.3271346688270569e-08, "global_fisher_kl_divergence/median": 1.3271346688270569e-08, "global_fisher_kl_divergence/min": 1.3271346688270569e-08, "global_fisher_kl_divergence/p25": 1.3271346688270569e-08, "global_fisher_kl_divergence/p75": 1.3271346688270569e-08, "global_fisher_kl_divergence/p85": 1.3271346688270569e-08, "global_fisher_kl_divergence/p90": 1.3271346688270569e-08, "global_fisher_kl_divergence/p95": 1.3271346688270569e-08, "global_fisher_kl_divergence/p99": 1.3271346688270569e-08, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0023345947265625, "global_full_update_term/max": 0.0023345947265625, "global_full_update_term/median": 0.0023345947265625, "global_full_update_term/min": 0.0023345947265625, "global_full_update_term/p25": 0.0023345947265625, "global_full_update_term/p75": 0.0023345947265625, "global_full_update_term/p85": 0.0023345947265625, "global_full_update_term/p90": 0.0023345947265625, "global_full_update_term/p95": 0.0023345947265625, "global_full_update_term/p99": 0.0023345947265625, "global_full_update_term/var": NaN, "global_hessian_coeff": 18816.0, "global_hessian_coeff/max": 18816.0, "global_hessian_coeff/median": 18816.0, "global_hessian_coeff/min": 18816.0, "global_hessian_coeff/p25": 18816.0, "global_hessian_coeff/p75": 18816.0, "global_hessian_coeff/p99": 18816.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 18816.0, "global_hessian_coeff_abs/max": 18816.0, "global_hessian_coeff_abs/median": 18816.0, "global_hessian_coeff_abs/min": 18816.0, "global_hessian_coeff_abs/p25": 18816.0, "global_hessian_coeff_abs/p75": 18816.0, "global_hessian_coeff_abs/p99": 18816.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.6982598900794983, "learning_rate": 3.6707612778634855e-07, "loss": -0.5312, "masked_global_fisher_curvature": 69632.0, "masked_global_fisher_curvature/max": 69632.0, "masked_global_fisher_curvature/median": 69632.0, "masked_global_fisher_curvature/min": 69632.0, "masked_global_fisher_curvature/p25": 69632.0, "masked_global_fisher_curvature/p75": 69632.0, "masked_global_fisher_curvature/p85": 69632.0, "masked_global_fisher_curvature/p90": 69632.0, "masked_global_fisher_curvature/p95": 69632.0, "masked_global_fisher_curvature/p99": 69632.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/max": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/median": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/min": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/p25": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/p75": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/p85": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/p90": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/p95": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/p99": 7.130438461899757e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00156402587890625, "masked_global_full_update_term/max": 0.00156402587890625, "masked_global_full_update_term/median": 0.00156402587890625, "masked_global_full_update_term/min": 0.00156402587890625, "masked_global_full_update_term/p25": 0.00156402587890625, "masked_global_full_update_term/p75": 0.00156402587890625, "masked_global_full_update_term/p85": 0.00156402587890625, "masked_global_full_update_term/p90": 0.00156402587890625, "masked_global_full_update_term/p95": 0.00156402587890625, "masked_global_full_update_term/p99": 0.00156402587890625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -37376.0, "masked_global_hessian_coeff/max": -37376.0, "masked_global_hessian_coeff/median": -37376.0, "masked_global_hessian_coeff/min": -37376.0, "masked_global_hessian_coeff/p25": -37376.0, "masked_global_hessian_coeff/p75": -37376.0, "masked_global_hessian_coeff/p99": -37376.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 37376.0, "masked_global_hessian_coeff_abs/max": 37376.0, "masked_global_hessian_coeff_abs/median": 37376.0, "masked_global_hessian_coeff_abs/min": 37376.0, "masked_global_hessian_coeff_abs/p25": 37376.0, "masked_global_hessian_coeff_abs/p75": 37376.0, "masked_global_hessian_coeff_abs/p99": 37376.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 16.6064453125, "masked_per_sentence_gradient_norm/max": 148.0, "masked_per_sentence_gradient_norm/median": 1.703125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 25.09375, "masked_per_sentence_gradient_norm/var": 605.7138061523438, "masked_per_token_gradient_norm": 0.3959057331085205, "masked_per_token_gradient_norm/max": 174.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 9.604264050722122e-10, "masked_per_token_gradient_norm/var": 37.87282180786133, "masked_sentence_fisher_curvature": 160971.96875, "masked_sentence_fisher_curvature/max": 1003520.0, "masked_sentence_fisher_curvature/median": 17536.0, "masked_sentence_fisher_curvature/min": 808.0, "masked_sentence_fisher_curvature/p25": 4108.0, "masked_sentence_fisher_curvature/p75": 299520.0, "masked_sentence_fisher_curvature/p85": 399872.0, "masked_sentence_fisher_curvature/p90": 471040.0, "masked_sentence_fisher_curvature/p95": 508416.0, "masked_sentence_fisher_curvature/p99": 629966.0, "masked_sentence_fisher_curvature/var": 42599149568.0, "masked_sentence_fisher_kl_divergence": 1.6472068153916553e-08, "masked_sentence_fisher_kl_divergence/max": 1.0244548320770264e-07, "masked_sentence_fisher_kl_divergence/median": 1.7971615307033062e-09, "masked_sentence_fisher_kl_divergence/min": 8.276401786133647e-11, "masked_sentence_fisher_kl_divergence/p25": 4.206412995699793e-10, "masked_sentence_fisher_kl_divergence/p75": 3.055902197957039e-08, "masked_sentence_fisher_kl_divergence/p85": 4.086177796125412e-08, "masked_sentence_fisher_kl_divergence/p90": 4.819594323635101e-08, "masked_sentence_fisher_kl_divergence/p95": 5.209585651755333e-08, "masked_sentence_fisher_kl_divergence/p99": 6.440107824801089e-08, "masked_sentence_fisher_kl_divergence/var": 4.458733259347762e-16, "masked_sentence_full_gradient_variance/max_squared_error": 859.0533447265625, "masked_sentence_full_gradient_variance/metric": 859.0533447265625, "masked_sentence_full_gradient_variance/p75": 859.0533447265625, "masked_sentence_full_gradient_variance/p90": 859.0533447265625, "masked_sentence_full_gradient_variance/p95": 859.0533447265625, "masked_sentence_full_gradient_variance/p99": 859.0533447265625, "masked_sentence_full_update_term": 0.0007072886219248176, "masked_sentence_full_update_term/max": 0.0047607421875, "masked_sentence_full_update_term/median": 3.457069396972656e-05, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0012054443359375, "masked_sentence_full_update_term/p85": 0.0015735626220703125, "masked_sentence_full_update_term/p90": 0.001983642578125, "masked_sentence_full_update_term/p95": 0.002353668212890625, "masked_sentence_full_update_term/p99": 0.004615784157067537, "masked_sentence_full_update_term/var": 1.0238054528599605e-06, "masked_sentence_hessian_coeff": -102498.671875, "masked_sentence_hessian_coeff/max": 53504.0, "masked_sentence_hessian_coeff/median": -28928.0, "masked_sentence_hessian_coeff/min": -561152.0, "masked_sentence_hessian_coeff/p25": -207872.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 2675.36328125, "masked_sentence_hessian_coeff/var": 19801847808.0, "masked_sentence_hessian_coeff_abs": 103613.3359375, "masked_sentence_hessian_coeff_abs/max": 561152.0, "masked_sentence_hessian_coeff_abs/median": 28928.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 207872.0, "masked_sentence_hessian_coeff_abs/p99": 541696.0625, "masked_sentence_hessian_coeff_abs/var": 19569684480.0, "masked_token_fisher_curvature": 130035.96875, "masked_token_fisher_curvature/max": 95944704.0, "masked_token_fisher_curvature/median": 1.3552527156068805e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.1828682759508296e-22, "masked_token_fisher_curvature/p75": 9.947598300641403e-13, "masked_token_fisher_curvature/p85": 6.803020369261503e-10, "masked_token_fisher_curvature/p90": 1.0617077350616455e-07, "masked_token_fisher_curvature/p95": 0.0016910731792449951, "masked_token_fisher_curvature/p99": 193536.0, "masked_token_fisher_curvature/var": 6595917905920.0, "masked_token_fisher_kl_divergence": 1.330306620417332e-08, "masked_token_fisher_kl_divergence/max": 9.834766387939453e-06, "masked_token_fisher_kl_divergence/median": 1.3866695599588098e-30, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.2131101700486007e-35, "masked_token_fisher_kl_divergence/p75": 1.0178198943523583e-25, "masked_token_fisher_kl_divergence/p85": 6.948317145445432e-23, "masked_token_fisher_kl_divergence/p90": 1.0852609636695723e-20, "masked_token_fisher_kl_divergence/p95": 1.7317080386845818e-16, "masked_token_fisher_kl_divergence/p99": 1.979060471057892e-08, "masked_token_fisher_kl_divergence/var": 6.905190801749159e-14, "masked_token_full_update_term": 1.0218718671239913e-05, "masked_token_full_update_term/max": 0.004425048828125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -7.897615432739258e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.3552527156068805e-19, "masked_token_full_update_term/p85": 1.5543122344752192e-15, "masked_token_full_update_term/p90": 7.242990929245963e-14, "masked_token_full_update_term/p95": 1.432454155292362e-11, "masked_token_full_update_term/p99": 2.515595406293869e-05, "masked_token_full_update_term/var": 2.5221932986596585e-08, "masked_token_hessian_coeff": -96789.5625, "masked_token_hessian_coeff/max": 42991616.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -31719424.0, "masked_token_hessian_coeff/p25": -1.4370016288012266e-10, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.07421875, "masked_token_hessian_coeff/var": 2640104914944.0, "masked_token_hessian_coeff_abs": 126800.9140625, "masked_token_hessian_coeff_abs/max": 42991616.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 3.073364496231079e-07, "masked_token_hessian_coeff_abs/p99": 651264.0, "masked_token_hessian_coeff_abs/var": 2633395077120.0, "mean_logprobs": -0.00848388671875, "mean_logprobs/var": 3.0279159545898438e-05, "num_completions/total": 8736, "per_sentence_gradient_norm": 22.853515625, "per_sentence_gradient_norm/max": 148.0, "per_sentence_gradient_norm/median": 1.703125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 38.5625, "per_sentence_gradient_norm/var": 1166.2945556640625, "per_token_feature_norm": 189.78158569335938, "per_token_feature_norm/max": 270.0, "per_token_feature_norm/median": 189.0, "per_token_feature_norm/min": 111.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 134.8992462158203, "per_token_gradient_norm": 0.5680873990058899, "per_token_gradient_norm/max": 278.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 9.822542779147625e-10, "per_token_gradient_norm/var": 76.35588073730469, "per_token_policy_error_norm": 0.0054473672062158585, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0050349365919828415, "policy_entropy": 0.009511341340839863, "policy_entropy/max": 1.7890625, "policy_entropy/median": 1.2732925824820995e-09, "policy_entropy/min": 8.404155023538761e-22, "policy_entropy/p25": 5.7980287238024175e-12, "policy_entropy/p75": 1.555308699607849e-07, "policy_entropy/var": 0.005076143890619278, "policy_loss": -0.53125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.25164473056793213, "policy_sharpness": 9.738697052001953, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.7149631977081299, "reward": 0.53125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.25164473056793213, "rewards/accuracy_reward": 0.53125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.25164473056793213, "sentence_fisher_curvature": 298406.625, "sentence_fisher_curvature/max": 1794048.0, "sentence_fisher_curvature/median": 17536.0, "sentence_fisher_curvature/min": 808.0, "sentence_fisher_curvature/p25": 4108.0, "sentence_fisher_curvature/p75": 477696.0, "sentence_fisher_curvature/p85": 721920.0, "sentence_fisher_curvature/p90": 878592.0, "sentence_fisher_curvature/p95": 1169408.0, "sentence_fisher_curvature/p99": 1599488.625, "sentence_fisher_curvature/var": 168487862272.0, "sentence_fisher_kl_divergence": 3.0543162665708223e-08, "sentence_fisher_kl_divergence/max": 1.8347054719924927e-07, "sentence_fisher_kl_divergence/median": 1.7971615307033062e-09, "sentence_fisher_kl_divergence/min": 8.276401786133647e-11, "sentence_fisher_kl_divergence/p25": 4.206412995699793e-10, "sentence_fisher_kl_divergence/p75": 4.889443516731262e-08, "sentence_fisher_kl_divergence/p85": 7.380731403827667e-08, "sentence_fisher_kl_divergence/p90": 8.987262845039368e-08, "sentence_fisher_kl_divergence/p95": 1.19674950838089e-07, "sentence_fisher_kl_divergence/p99": 1.6400596791754651e-07, "sentence_fisher_kl_divergence/var": 1.76531714136133e-15, "sentence_full_gradient_variance/max_squared_error": 1646.9287109375, "sentence_full_gradient_variance/metric": 1646.9287109375, "sentence_full_gradient_variance/p75": 1646.9287109375, "sentence_full_gradient_variance/p90": 1646.9287109375, "sentence_full_gradient_variance/p95": 1646.9287109375, "sentence_full_gradient_variance/p99": 1646.9287109375, "sentence_full_update_term": 0.0010001461487263441, "sentence_full_update_term/max": 0.007171630859375, "sentence_full_update_term/median": 3.457069396972656e-05, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.001617431640625, "sentence_full_update_term/p85": 0.0022125244140625, "sentence_full_update_term/p90": 0.002685546875, "sentence_full_update_term/p95": 0.003894805908203125, "sentence_full_update_term/p99": 0.006272891070693731, "sentence_full_update_term/var": 2.3044406134431483e-06, "sentence_hessian_coeff": 8717.333984375, "sentence_hessian_coeff/max": 782336.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -540672.0, "sentence_hessian_coeff/p25": -28928.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 537191.1875, "sentence_hessian_coeff/var": 36778602496.0, "sentence_hessian_coeff_abs": 109928.0, "sentence_hessian_coeff_abs/max": 782336.0, "sentence_hessian_coeff_abs/median": 28928.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 187648.0, "sentence_hessian_coeff_abs/p99": 552755.9375, "sentence_hessian_coeff_abs/var": 24644032512.0, "step": 91, "token_fisher_curvature": 249936.140625, "token_fisher_curvature/max": 240123904.0, "token_fisher_curvature/median": 1.3877787807814457e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.195275985139125e-22, "token_fisher_curvature/p75": 1.0231815394945443e-12, "token_fisher_curvature/p85": 7.021299097687006e-10, "token_fisher_curvature/p90": 1.1548399925231934e-07, "token_fisher_curvature/p95": 0.0019789934158325195, "token_fisher_curvature/p99": 224256.0, "token_fisher_curvature/var": 26408598372352.0, "token_fisher_kl_divergence": 2.556699918443428e-08, "token_fisher_kl_divergence/max": 2.4557113647460938e-05, "token_fisher_kl_divergence/median": 1.4174844390690056e-30, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.2248651135568236e-35, "token_fisher_kl_divergence/p75": 1.050131637030211e-25, "token_fisher_kl_divergence/p85": 7.196471329211341e-23, "token_fisher_kl_divergence/p90": 1.1805521702356811e-20, "token_fisher_kl_divergence/p95": 2.0243409813019975e-16, "token_fisher_kl_divergence/p99": 2.293381839990616e-08, "token_fisher_kl_divergence/var": 2.763245090641603e-13, "token_full_update_term": 1.4561293028236832e-05, "token_full_update_term/max": 0.007049560546875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -7.897615432739258e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.5246593050577406e-19, "token_full_update_term/p85": 1.609823385706477e-15, "token_full_update_term/p90": 7.758203801611074e-14, "token_full_update_term/p95": 1.6711965145077556e-11, "token_full_update_term/p99": 4.984624683856964e-05, "token_full_update_term/var": 4.97405743260515e-08, "token_hessian_coeff": -4230.49658203125, "token_hessian_coeff/max": 234881024.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -31719424.0, "token_hessian_coeff/p25": -1.418811734765768e-10, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.09765625, "token_hessian_coeff/var": 16197122785280.0, "token_hessian_coeff_abs": 219184.875, "token_hessian_coeff_abs/max": 234881024.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.1478703022003174e-07, "token_hessian_coeff_abs/p99": 1101760.0, "token_hessian_coeff_abs/var": 16149096955904.0 }, { "accuracy_reward": 0.84375, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.13322368264198303, "adam_stats/lm_head/lr_effective_max": 1.6565593341510976e-06, "adam_stats/lm_head/lr_effective_mean": -3.20293379492409e-12, "adam_stats/lm_head/lr_effective_min": -1.7566046608408215e-06, "adam_stats/lm_head/lr_effective_std": 5.5472302307180144e-08, "adam_stats/lr_effective_max": 1.8280389895153348e-06, "adam_stats/lr_effective_mean": 3.6055420801761606e-12, "adam_stats/lr_effective_min": -1.8573794022813672e-06, "adam_stats/m_t_max": 0.004534514155238867, "adam_stats/m_t_mean": 3.544269391864141e-11, "adam_stats/m_t_min": -0.004875001031905413, "adam_stats/v_t_max": 2.5824012482189573e-05, "adam_stats/v_t_mean": 2.486562605125142e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.84375, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.13322368264198303, "all_logprobs": -0.00778670608997345, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -8.25, "all_logprobs/p1": -0.12069839239120483, "all_logprobs/p10": -9.5367431640625e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -6.618490442633629e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012248248793184757, "clip_ratio": 0.0, "completion_length": 480.85418701171875, "completion_length/correct": 470.1481628417969, "completion_length/correct/max": 951.0, "completion_length/correct/median": 534.0, "completion_length/correct/min": 221.0, "completion_length/correct/p25": 266.0, "completion_length/correct/p75": 638.0, "completion_length/correct/var": 47407.1484375, "completion_length/incorrect": 538.6666870117188, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 274.0, "completion_length/incorrect/min": 247.0, "completion_length/incorrect/p25": 268.0, "completion_length/incorrect/p75": 1013.5, "completion_length/incorrect/var": 129581.8046875, "completion_length/max": 1024.0, "completion_length/median": 314.0, "completion_length/min": 221.0, "completion_length/p25": 266.0, "completion_length/p75": 643.0, "completion_length/var": 59643.53515625, "curvature_clip_ratio_token_fisher": 0.0008448507287539542, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0008448507287539542, "curvature_clip_ratio_total_full": 0.0008448507287539542, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1472, "feature_vector_variance/max_squared_error": 67298.9140625, "feature_vector_variance/metric": 31147.71484375, "generated_tokens/total": 5032564.0, "global_fisher_curvature": 172032.0, "global_fisher_curvature/max": 172032.0, "global_fisher_curvature/median": 172032.0, "global_fisher_curvature/min": 172032.0, "global_fisher_curvature/p25": 172032.0, "global_fisher_curvature/p75": 172032.0, "global_fisher_curvature/p85": 172032.0, "global_fisher_curvature/p90": 172032.0, "global_fisher_curvature/p95": 172032.0, "global_fisher_curvature/p99": 172032.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.1583324521780014e-08, "global_fisher_kl_divergence/max": 1.1583324521780014e-08, "global_fisher_kl_divergence/median": 1.1583324521780014e-08, "global_fisher_kl_divergence/min": 1.1583324521780014e-08, "global_fisher_kl_divergence/p25": 1.1583324521780014e-08, "global_fisher_kl_divergence/p75": 1.1583324521780014e-08, "global_fisher_kl_divergence/p85": 1.1583324521780014e-08, "global_fisher_kl_divergence/p90": 1.1583324521780014e-08, "global_fisher_kl_divergence/p95": 1.1583324521780014e-08, "global_fisher_kl_divergence/p99": 1.1583324521780014e-08, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.003265380859375, "global_full_update_term/max": 0.003265380859375, "global_full_update_term/median": 0.003265380859375, "global_full_update_term/min": 0.003265380859375, "global_full_update_term/p25": 0.003265380859375, "global_full_update_term/p75": 0.003265380859375, "global_full_update_term/p85": 0.003265380859375, "global_full_update_term/p90": 0.003265380859375, "global_full_update_term/p95": 0.003265380859375, "global_full_update_term/p99": 0.003265380859375, "global_full_update_term/var": NaN, "global_hessian_coeff": 36096.0, "global_hessian_coeff/max": 36096.0, "global_hessian_coeff/median": 36096.0, "global_hessian_coeff/min": 36096.0, "global_hessian_coeff/p25": 36096.0, "global_hessian_coeff/p75": 36096.0, "global_hessian_coeff/p99": 36096.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 36096.0, "global_hessian_coeff_abs/max": 36096.0, "global_hessian_coeff_abs/median": 36096.0, "global_hessian_coeff_abs/min": 36096.0, "global_hessian_coeff_abs/p25": 36096.0, "global_hessian_coeff_abs/p75": 36096.0, "global_hessian_coeff_abs/p99": 36096.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 2.284374952316284, "learning_rate": 2.905372804626083e-07, "loss": -0.8438, "masked_global_fisher_curvature": 102912.0, "masked_global_fisher_curvature/max": 102912.0, "masked_global_fisher_curvature/median": 102912.0, "masked_global_fisher_curvature/min": 102912.0, "masked_global_fisher_curvature/p25": 102912.0, "masked_global_fisher_curvature/p75": 102912.0, "masked_global_fisher_curvature/p85": 102912.0, "masked_global_fisher_curvature/p90": 102912.0, "masked_global_fisher_curvature/p95": 102912.0, "masked_global_fisher_curvature/p99": 102912.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/max": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/median": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/min": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/p25": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/p75": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/p85": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/p90": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/p95": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/p99": 6.9267116487026215e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0028228759765625, "masked_global_full_update_term/max": 0.0028228759765625, "masked_global_full_update_term/median": 0.0028228759765625, "masked_global_full_update_term/min": 0.0028228759765625, "masked_global_full_update_term/p25": 0.0028228759765625, "masked_global_full_update_term/p75": 0.0028228759765625, "masked_global_full_update_term/p85": 0.0028228759765625, "masked_global_full_update_term/p90": 0.0028228759765625, "masked_global_full_update_term/p95": 0.0028228759765625, "masked_global_full_update_term/p99": 0.0028228759765625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": -32384.0, "masked_global_hessian_coeff/max": -32384.0, "masked_global_hessian_coeff/median": -32384.0, "masked_global_hessian_coeff/min": -32384.0, "masked_global_hessian_coeff/p25": -32384.0, "masked_global_hessian_coeff/p75": -32384.0, "masked_global_hessian_coeff/p99": -32384.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 32384.0, "masked_global_hessian_coeff_abs/max": 32384.0, "masked_global_hessian_coeff_abs/median": 32384.0, "masked_global_hessian_coeff_abs/min": 32384.0, "masked_global_hessian_coeff_abs/p25": 32384.0, "masked_global_hessian_coeff_abs/p75": 32384.0, "masked_global_hessian_coeff_abs/p99": 32384.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 38.865562438964844, "masked_per_sentence_gradient_norm/max": 191.0, "masked_per_sentence_gradient_norm/median": 27.25, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 2.859375, "masked_per_sentence_gradient_norm/p75": 54.75, "masked_per_sentence_gradient_norm/var": 1973.9158935546875, "masked_per_token_gradient_norm": 0.7477481961250305, "masked_per_token_gradient_norm/max": 220.0, "masked_per_token_gradient_norm/median": 3.892637323588133e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 1.8540724511240114e-14, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 2.691522240638733e-07, "masked_per_token_gradient_norm/var": 81.4207992553711, "masked_sentence_fisher_curvature": 265841.0, "masked_sentence_fisher_curvature/max": 1785856.0, "masked_sentence_fisher_curvature/median": 217088.0, "masked_sentence_fisher_curvature/min": 1616.0, "masked_sentence_fisher_curvature/p25": 14016.0, "masked_sentence_fisher_curvature/p75": 392192.0, "masked_sentence_fisher_curvature/p85": 487424.0, "masked_sentence_fisher_curvature/p90": 575488.0, "masked_sentence_fisher_curvature/p95": 833536.0, "masked_sentence_fisher_curvature/p99": 1451213.875, "masked_sentence_fisher_curvature/var": 105956655104.0, "masked_sentence_fisher_kl_divergence": 1.790980341809245e-08, "masked_sentence_fisher_kl_divergence/max": 1.2014061212539673e-07, "masked_sentence_fisher_kl_divergence/median": 1.461012288928032e-08, "masked_sentence_fisher_kl_divergence/min": 1.0868461686186492e-10, "masked_sentence_fisher_kl_divergence/p25": 9.458744898438454e-10, "masked_sentence_fisher_kl_divergence/p75": 2.639717422425747e-08, "masked_sentence_fisher_kl_divergence/p85": 3.277091309428215e-08, "masked_sentence_fisher_kl_divergence/p90": 3.876630216836929e-08, "masked_sentence_fisher_kl_divergence/p95": 5.611218512058258e-08, "masked_sentence_fisher_kl_divergence/p99": 9.757939523069581e-08, "masked_sentence_fisher_kl_divergence/var": 4.804371406221984e-16, "masked_sentence_full_gradient_variance/max_squared_error": 3371.38623046875, "masked_sentence_full_gradient_variance/metric": 3371.38623046875, "masked_sentence_full_gradient_variance/p75": 3371.38623046875, "masked_sentence_full_gradient_variance/p90": 3371.38623046875, "masked_sentence_full_gradient_variance/p95": 3371.38623046875, "masked_sentence_full_gradient_variance/p99": 3371.38623046875, "masked_sentence_full_update_term": 0.0011788036208599806, "masked_sentence_full_update_term/max": 0.006866455078125, "masked_sentence_full_update_term/median": 0.000850677490234375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 9.202957153320312e-05, "masked_sentence_full_update_term/p75": 0.001575469970703125, "masked_sentence_full_update_term/p85": 0.0021209716796875, "masked_sentence_full_update_term/p90": 0.003204345703125, "masked_sentence_full_update_term/p95": 0.0038909912109375, "masked_sentence_full_update_term/p99": 0.00527191674336791, "masked_sentence_full_update_term/var": 1.8208660321761272e-06, "masked_sentence_hessian_coeff": -73470.0, "masked_sentence_hessian_coeff/max": 385024.0, "masked_sentence_hessian_coeff/median": -81408.0, "masked_sentence_hessian_coeff/min": -456704.0, "masked_sentence_hessian_coeff/p25": -137472.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 373350.4375, "masked_sentence_hessian_coeff/var": 22408757248.0, "masked_sentence_hessian_coeff_abs": 119532.671875, "masked_sentence_hessian_coeff_abs/max": 456704.0, "masked_sentence_hessian_coeff_abs/median": 87040.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 11904.0, "masked_sentence_hessian_coeff_abs/p75": 189184.0, "masked_sentence_hessian_coeff_abs/p99": 456704.0, "masked_sentence_hessian_coeff_abs/var": 13424955392.0, "masked_token_fisher_curvature": 275810.125, "masked_token_fisher_curvature/max": 147849216.0, "masked_token_fisher_curvature/median": 5.231275482242559e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.1994118882018901e-23, "masked_token_fisher_curvature/p75": 6.465938895416912e-13, "masked_token_fisher_curvature/p85": 6.610179070776212e-10, "masked_token_fisher_curvature/p90": 1.4435499906539917e-07, "masked_token_fisher_curvature/p95": 0.0030517578125, "masked_token_fisher_curvature/p99": 540672.0, "masked_token_fisher_curvature/var": 21978780008448.0, "masked_token_fisher_kl_divergence": 1.8577184235368804e-08, "masked_token_fisher_kl_divergence/max": 9.953975677490234e-06, "masked_token_fisher_kl_divergence/median": 3.528303658117416e-31, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 8.081523661903227e-37, "masked_token_fisher_kl_divergence/p75": 4.362085261510107e-26, "masked_token_fisher_kl_divergence/p85": 4.460555297320863e-23, "masked_token_fisher_kl_divergence/p90": 9.740878893424454e-21, "masked_token_fisher_kl_divergence/p95": 2.0556473190325164e-16, "masked_token_fisher_kl_divergence/p99": 3.632158041000366e-08, "masked_token_fisher_kl_divergence/var": 9.967744132258899e-14, "masked_token_full_update_term": 1.5802945199538954e-05, "masked_token_full_update_term/max": 0.00445556640625, "masked_token_full_update_term/median": 5.505714157152952e-21, "masked_token_full_update_term/min": -2.253800630569458e-07, "masked_token_full_update_term/p25": -1.283784310682299e-21, "masked_token_full_update_term/p75": 2.373101715136272e-15, "masked_token_full_update_term/p85": 2.4637930584603396e-13, "masked_token_full_update_term/p90": 5.5706550483591855e-12, "masked_token_full_update_term/p95": 3.0630644687335007e-09, "masked_token_full_update_term/p99": 0.00022029876708984375, "masked_token_full_update_term/var": 3.663554082322662e-08, "masked_token_hessian_coeff": -110612.1484375, "masked_token_hessian_coeff/max": 121634816.0, "masked_token_hessian_coeff/median": -1.3642420526593924e-10, "masked_token_hessian_coeff/min": -31981568.0, "masked_token_hessian_coeff/p25": -2.110004425048828e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.578125, "masked_token_hessian_coeff/var": 9906764644352.0, "masked_token_hessian_coeff_abs": 282553.625, "masked_token_hessian_coeff_abs/max": 121634816.0, "masked_token_hessian_coeff_abs/median": 1.695007085800171e-07, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 1.1482370609883219e-11, "masked_token_hessian_coeff_abs/p75": 0.000217437744140625, "masked_token_hessian_coeff_abs/p99": 8290304.0, "masked_token_hessian_coeff_abs/var": 9839162949632.0, "mean_logprobs": -0.00811767578125, "mean_logprobs/var": 6.151199340820312e-05, "num_completions/total": 8832, "per_sentence_gradient_norm": 45.690757751464844, "per_sentence_gradient_norm/max": 198.0, "per_sentence_gradient_norm/median": 34.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 2.859375, "per_sentence_gradient_norm/p75": 60.9375, "per_sentence_gradient_norm/var": 2515.3076171875, "per_token_feature_norm": 190.37376403808594, "per_token_feature_norm/max": 274.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 108.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 119.810546875, "per_token_gradient_norm": 0.9522852301597595, "per_token_gradient_norm/max": 290.0, "per_token_gradient_norm/median": 3.9472070056945086e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 1.8762769116165146e-14, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 2.738088369369507e-07, "per_token_gradient_norm/var": 131.1494140625, "per_token_policy_error_norm": 0.0045922258868813515, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004614991135895252, "policy_entropy": 0.007960430346429348, "policy_entropy/max": 1.796875, "policy_entropy/median": 6.875779945403337e-10, "policy_entropy/min": 1.1064367873509298e-20, "policy_entropy/p25": 1.9753088054130785e-12, "policy_entropy/p75": 1.1315569281578064e-07, "policy_entropy/var": 0.0038331488613039255, "policy_loss": -0.84375, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.13322368264198303, "policy_sharpness": 9.754648208618164, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.5562392473220825, "reward": 0.84375, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.13322368264198303, "rewards/accuracy_reward": 0.84375, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.13322368264198303, "sentence_fisher_curvature": 414067.1875, "sentence_fisher_curvature/max": 2244608.0, "sentence_fisher_curvature/median": 245760.0, "sentence_fisher_curvature/min": 2192.0, "sentence_fisher_curvature/p25": 14016.0, "sentence_fisher_curvature/p75": 587776.0, "sentence_fisher_curvature/p85": 780288.0, "sentence_fisher_curvature/p90": 833536.0, "sentence_fisher_curvature/p95": 1423360.0, "sentence_fisher_curvature/p99": 2182349.0, "sentence_fisher_curvature/var": 231972601856.0, "sentence_fisher_kl_divergence": 2.790006803365941e-08, "sentence_fisher_kl_divergence/max": 1.5087425708770752e-07, "sentence_fisher_kl_divergence/median": 1.6530975699424744e-08, "sentence_fisher_kl_divergence/min": 1.4733814168721437e-10, "sentence_fisher_kl_divergence/p25": 9.458744898438454e-10, "sentence_fisher_kl_divergence/p75": 3.952300176024437e-08, "sentence_fisher_kl_divergence/p85": 5.25033101439476e-08, "sentence_fisher_kl_divergence/p90": 5.6228600442409515e-08, "sentence_fisher_kl_divergence/p95": 9.604264050722122e-08, "sentence_fisher_kl_divergence/p99": 1.4733524267285247e-07, "sentence_fisher_kl_divergence/var": 1.0531168044751005e-15, "sentence_full_gradient_variance/max_squared_error": 4470.25146484375, "sentence_full_gradient_variance/metric": 4470.25146484375, "sentence_full_gradient_variance/p75": 4470.25146484375, "sentence_full_gradient_variance/p90": 4470.25146484375, "sentence_full_gradient_variance/p95": 4470.25146484375, "sentence_full_gradient_variance/p99": 4470.25146484375, "sentence_full_update_term": 0.0014119769912213087, "sentence_full_update_term/max": 0.007415771484375, "sentence_full_update_term/median": 0.001220703125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 9.202957153320312e-05, "sentence_full_update_term/p75": 0.0016994476318359375, "sentence_full_update_term/p85": 0.002574920654296875, "sentence_full_update_term/p90": 0.0037078857421875, "sentence_full_update_term/p95": 0.00487518310546875, "sentence_full_update_term/p99": 0.006169132422655821, "sentence_full_update_term/var": 2.4565467811044073e-06, "sentence_hessian_coeff": 61842.3359375, "sentence_hessian_coeff/max": 1998848.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -456704.0, "sentence_hessian_coeff/p25": -122368.0, "sentence_hessian_coeff/p75": 200704.0, "sentence_hessian_coeff/p99": 1166133.875, "sentence_hessian_coeff/var": 113113686016.0, "sentence_hessian_coeff_abs": 207007.671875, "sentence_hessian_coeff_abs/max": 1998848.0, "sentence_hessian_coeff_abs/median": 122368.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 45696.0, "sentence_hessian_coeff_abs/p75": 252928.0, "sentence_hessian_coeff_abs/p99": 1166133.875, "sentence_hessian_coeff_abs/var": 73675153408.0, "step": 92, "token_fisher_curvature": 440125.5, "token_fisher_curvature/max": 249561088.0, "token_fisher_curvature/median": 5.285485590866834e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.2200914035157158e-23, "token_fisher_curvature/p75": 6.643574579356937e-13, "token_fisher_curvature/p85": 6.875779945403337e-10, "token_fisher_curvature/p90": 1.5459954738616943e-07, "token_fisher_curvature/p95": 0.00408935546875, "token_fisher_curvature/p99": 842880.0, "token_fisher_curvature/var": 54523393474560.0, "token_fisher_kl_divergence": 2.964778111902433e-08, "token_fisher_kl_divergence/max": 1.6808509826660156e-05, "token_fisher_kl_divergence/median": 3.559118537227612e-31, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 8.228460455756013e-37, "token_fisher_kl_divergence/p75": 4.4832542965520543e-26, "token_fisher_kl_divergence/p85": 4.632211430296955e-23, "token_fisher_kl_divergence/p90": 1.0429093163068573e-20, "token_fisher_kl_divergence/p95": 2.7582103268031233e-16, "token_fisher_kl_divergence/p99": 5.6759745348244905e-08, "token_fisher_kl_divergence/var": 2.4741011106152455e-13, "token_full_update_term": 2.0109206161578186e-05, "token_full_update_term/max": 0.005828857421875, "token_full_update_term/median": 5.6645328347631335e-21, "token_full_update_term/min": -2.253800630569458e-07, "token_full_update_term/p25": -1.2043749718772083e-21, "token_full_update_term/p75": 2.42861286636753e-15, "token_full_update_term/p85": 2.6059709945513987e-13, "token_full_update_term/p90": 5.8548721426632255e-12, "token_full_update_term/p95": 3.579032181733055e-09, "token_full_update_term/p99": 0.0002765655517578125, "token_full_update_term/var": 5.864744778705244e-08, "token_hessian_coeff": 37004.73828125, "token_hessian_coeff/max": 247463936.0, "token_hessian_coeff/median": -1.3278622645884752e-10, "token_hessian_coeff/min": -31981568.0, "token_hessian_coeff/p25": -2.0951032638549805e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.91864013671875, "token_hessian_coeff/var": 36879441330176.0, "token_hessian_coeff_abs": 429838.375, "token_hessian_coeff_abs/max": 247463936.0, "token_hessian_coeff_abs/median": 1.7229467630386353e-07, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 1.1667111721180845e-11, "token_hessian_coeff_abs/p75": 0.0002193450927734375, "token_hessian_coeff_abs/p99": 9816064.0, "token_hessian_coeff_abs/var": 36696041193472.0 }, { "accuracy_reward": 0.5416666865348816, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.2508772313594818, "adam_stats/lm_head/lr_effective_max": 1.3069982287561288e-06, "adam_stats/lm_head/lr_effective_mean": -1.4360422573300724e-12, "adam_stats/lm_head/lr_effective_min": -1.2827780437874026e-06, "adam_stats/lm_head/lr_effective_std": 4.1770899628090774e-08, "adam_stats/lr_effective_max": 1.4401838370758924e-06, "adam_stats/lr_effective_mean": 7.612801014578174e-12, "adam_stats/lr_effective_min": -1.413748236700485e-06, "adam_stats/m_t_max": 0.004434399306774139, "adam_stats/m_t_mean": 7.454366290904346e-11, "adam_stats/m_t_min": -0.004582813009619713, "adam_stats/v_t_max": 2.6436679036123678e-05, "adam_stats/v_t_mean": 2.583316156476445e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.5416666865348816, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.2508772313594818, "all_logprobs": -0.006505094468593597, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.0, "all_logprobs/p1": -0.10009765625, "all_logprobs/p10": -3.5762786865234375e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -2.3877568310126662e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.008420133031904697, "clip_ratio": 0.0, "completion_length": 480.19793701171875, "completion_length/correct": 387.3461608886719, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 342.0, "completion_length/correct/min": 192.0, "completion_length/correct/p25": 292.25, "completion_length/correct/p75": 394.75, "completion_length/correct/var": 37837.56640625, "completion_length/incorrect": 589.9318237304688, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 313.0, "completion_length/incorrect/min": 210.0, "completion_length/incorrect/p25": 252.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 140427.65625, "completion_length/max": 1024.0, "completion_length/median": 342.0, "completion_length/min": 192.0, "completion_length/p25": 257.0, "completion_length/p75": 556.0, "completion_length/var": 94171.03125, "curvature_clip_ratio_token_fisher": 2.1692443624488078e-05, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 2.1692443624488078e-05, "curvature_clip_ratio_total_full": 2.1692443624488078e-05, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1488, "feature_vector_variance/max_squared_error": 65799.8125, "feature_vector_variance/metric": 30950.1875, "generated_tokens/total": 5078663.0, "global_fisher_curvature": 84480.0, "global_fisher_curvature/max": 84480.0, "global_fisher_curvature/median": 84480.0, "global_fisher_curvature/min": 84480.0, "global_fisher_curvature/p25": 84480.0, "global_fisher_curvature/p75": 84480.0, "global_fisher_curvature/p85": 84480.0, "global_fisher_curvature/p90": 84480.0, "global_fisher_curvature/p95": 84480.0, "global_fisher_curvature/p99": 84480.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.5652192309498787e-09, "global_fisher_kl_divergence/max": 3.5652192309498787e-09, "global_fisher_kl_divergence/median": 3.5652192309498787e-09, "global_fisher_kl_divergence/min": 3.5652192309498787e-09, "global_fisher_kl_divergence/p25": 3.5652192309498787e-09, "global_fisher_kl_divergence/p75": 3.5652192309498787e-09, "global_fisher_kl_divergence/p85": 3.5652192309498787e-09, "global_fisher_kl_divergence/p90": 3.5652192309498787e-09, "global_fisher_kl_divergence/p95": 3.5652192309498787e-09, "global_fisher_kl_divergence/p99": 3.5652192309498787e-09, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.000518798828125, "global_full_update_term/max": 0.000518798828125, "global_full_update_term/median": 0.000518798828125, "global_full_update_term/min": 0.000518798828125, "global_full_update_term/p25": 0.000518798828125, "global_full_update_term/p75": 0.000518798828125, "global_full_update_term/p85": 0.000518798828125, "global_full_update_term/p90": 0.000518798828125, "global_full_update_term/p95": 0.000518798828125, "global_full_update_term/p99": 0.000518798828125, "global_full_update_term/var": NaN, "global_hessian_coeff": 17792.0, "global_hessian_coeff/max": 17792.0, "global_hessian_coeff/median": 17792.0, "global_hessian_coeff/min": 17792.0, "global_hessian_coeff/p25": 17792.0, "global_hessian_coeff/p75": 17792.0, "global_hessian_coeff/p99": 17792.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 17792.0, "global_hessian_coeff_abs/max": 17792.0, "global_hessian_coeff_abs/median": 17792.0, "global_hessian_coeff_abs/min": 17792.0, "global_hessian_coeff_abs/p25": 17792.0, "global_hessian_coeff_abs/p75": 17792.0, "global_hessian_coeff_abs/p99": 17792.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.6859022378921509, "learning_rate": 2.2278205293002645e-07, "loss": -0.5417, "masked_global_fisher_curvature": 80384.0, "masked_global_fisher_curvature/max": 80384.0, "masked_global_fisher_curvature/median": 80384.0, "masked_global_fisher_curvature/min": 80384.0, "masked_global_fisher_curvature/p25": 80384.0, "masked_global_fisher_curvature/p75": 80384.0, "masked_global_fisher_curvature/p85": 80384.0, "masked_global_fisher_curvature/p90": 80384.0, "masked_global_fisher_curvature/p95": 80384.0, "masked_global_fisher_curvature/p99": 80384.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/max": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/median": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/min": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/p25": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/p75": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/p85": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/p90": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/p95": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/p99": 3.3905962482094765e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00051116943359375, "masked_global_full_update_term/max": 0.00051116943359375, "masked_global_full_update_term/median": 0.00051116943359375, "masked_global_full_update_term/min": 0.00051116943359375, "masked_global_full_update_term/p25": 0.00051116943359375, "masked_global_full_update_term/p75": 0.00051116943359375, "masked_global_full_update_term/p85": 0.00051116943359375, "masked_global_full_update_term/p90": 0.00051116943359375, "masked_global_full_update_term/p95": 0.00051116943359375, "masked_global_full_update_term/p99": 0.00051116943359375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 13632.0, "masked_global_hessian_coeff/max": 13632.0, "masked_global_hessian_coeff/median": 13632.0, "masked_global_hessian_coeff/min": 13632.0, "masked_global_hessian_coeff/p25": 13632.0, "masked_global_hessian_coeff/p75": 13632.0, "masked_global_hessian_coeff/p99": 13632.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 13632.0, "masked_global_hessian_coeff_abs/max": 13632.0, "masked_global_hessian_coeff_abs/median": 13632.0, "masked_global_hessian_coeff_abs/min": 13632.0, "masked_global_hessian_coeff_abs/p25": 13632.0, "masked_global_hessian_coeff_abs/p75": 13632.0, "masked_global_hessian_coeff_abs/p99": 13632.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 15.202555656433105, "masked_per_sentence_gradient_norm/max": 223.0, "masked_per_sentence_gradient_norm/median": 2.953125, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 18.125, "masked_per_sentence_gradient_norm/var": 1153.7286376953125, "masked_per_token_gradient_norm": 0.40808042883872986, "masked_per_token_gradient_norm/max": 284.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 3.5288394428789616e-10, "masked_per_token_gradient_norm/var": 53.62395477294922, "masked_sentence_fisher_curvature": 209408.96875, "masked_sentence_fisher_curvature/max": 1622016.0, "masked_sentence_fisher_curvature/median": 26496.0, "masked_sentence_fisher_curvature/min": 62.75, "masked_sentence_fisher_curvature/p25": 6112.0, "masked_sentence_fisher_curvature/p75": 311296.0, "masked_sentence_fisher_curvature/p85": 514048.0, "masked_sentence_fisher_curvature/p90": 722944.0, "masked_sentence_fisher_curvature/p95": 954368.0, "masked_sentence_fisher_curvature/p99": 1590886.5, "masked_sentence_fisher_curvature/var": 127705202688.0, "masked_sentence_fisher_kl_divergence": 8.838982878955903e-09, "masked_sentence_fisher_kl_divergence/max": 6.845220923423767e-08, "masked_sentence_fisher_kl_divergence/median": 1.1204974725842476e-09, "masked_sentence_fisher_kl_divergence/min": 2.6432189770275727e-12, "masked_sentence_fisher_kl_divergence/p25": 2.587512426543981e-10, "masked_sentence_fisher_kl_divergence/p75": 1.3154931366443634e-08, "masked_sentence_fisher_kl_divergence/p85": 2.1740561351180077e-08, "masked_sentence_fisher_kl_divergence/p90": 3.055902197957039e-08, "masked_sentence_fisher_kl_divergence/p95": 4.022149369120598e-08, "masked_sentence_fisher_kl_divergence/p99": 6.712507882866703e-08, "masked_sentence_fisher_kl_divergence/var": 2.2742810810976323e-16, "masked_sentence_full_gradient_variance/max_squared_error": 1350.578369140625, "masked_sentence_full_gradient_variance/metric": 1350.578369140625, "masked_sentence_full_gradient_variance/p75": 1350.578369140625, "masked_sentence_full_gradient_variance/p90": 1350.578369140625, "masked_sentence_full_gradient_variance/p95": 1350.578369140625, "masked_sentence_full_gradient_variance/p99": 1350.578369140625, "masked_sentence_full_update_term": 0.000371590256690979, "masked_sentence_full_update_term/max": 0.004058837890625, "masked_sentence_full_update_term/median": 5.5789947509765625e-05, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.00033283233642578125, "masked_sentence_full_update_term/p85": 0.000629425048828125, "masked_sentence_full_update_term/p90": 0.001377105712890625, "masked_sentence_full_update_term/p95": 0.001811981201171875, "masked_sentence_full_update_term/p99": 0.003363039344549179, "masked_sentence_full_update_term/var": 5.163225296200835e-07, "masked_sentence_hessian_coeff": 8818.0, "masked_sentence_hessian_coeff/max": 1138688.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -405504.0, "masked_sentence_hessian_coeff/p25": -68800.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 734004.5, "masked_sentence_hessian_coeff/var": 42443001856.0, "masked_sentence_hessian_coeff_abs": 103440.671875, "masked_sentence_hessian_coeff_abs/max": 1138688.0, "masked_sentence_hessian_coeff_abs/median": 11520.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 149248.0, "masked_sentence_hessian_coeff_abs/p99": 734004.5, "masked_sentence_hessian_coeff_abs/var": 31708983296.0, "masked_token_fisher_curvature": 186714.921875, "masked_token_fisher_curvature/max": 229638144.0, "masked_token_fisher_curvature/median": 1.6805133673525319e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 1.6336817097922297e-23, "masked_token_fisher_curvature/p75": 1.7830181775480014e-13, "masked_token_fisher_curvature/p85": 1.772590962900722e-10, "masked_token_fisher_curvature/p90": 1.5832483768463135e-08, "masked_token_fisher_curvature/p95": 0.0001316070556640625, "masked_token_fisher_curvature/p99": 57648.0, "masked_token_fisher_curvature/var": 20585088286720.0, "masked_token_fisher_kl_divergence": 7.87808396296441e-09, "masked_token_fisher_kl_divergence/max": 9.715557098388672e-06, "masked_token_fisher_kl_divergence/median": 7.087422195345028e-32, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 6.906029311080939e-37, "masked_token_fisher_kl_divergence/p75": 7.509955817704032e-27, "masked_token_fisher_kl_divergence/p85": 7.491073643076662e-24, "masked_token_fisher_kl_divergence/p90": 6.683619349428464e-22, "masked_token_fisher_kl_divergence/p95": 5.55653613398821e-18, "masked_token_fisher_kl_divergence/p99": 2.4324435798916966e-09, "masked_token_fisher_kl_divergence/var": 3.665927763717332e-14, "masked_token_full_update_term": 6.831513474025996e-06, "masked_token_full_update_term/max": 0.00439453125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -8.083879947662354e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.03250548619601e-22, "masked_token_full_update_term/p85": 7.955841660380741e-17, "masked_token_full_update_term/p90": 6.300515664747763e-15, "masked_token_full_update_term/p95": 2.6474378245211483e-12, "masked_token_full_update_term/p99": 3.6656856536865234e-06, "masked_token_full_update_term/var": 1.5171995215723655e-08, "masked_token_hessian_coeff": -1653.375, "masked_token_hessian_coeff/max": 226492416.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -31719424.0, "masked_token_hessian_coeff/p25": -2.4010660126805305e-10, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.0694732666015625, "masked_token_hessian_coeff/var": 12940061179904.0, "masked_token_hessian_coeff_abs": 170219.34375, "masked_token_hessian_coeff_abs/max": 226492416.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 2.0582228899002075e-07, "masked_token_hessian_coeff_abs/p99": 86784.0, "masked_token_hessian_coeff_abs/var": 12911089025024.0, "mean_logprobs": -0.00640869140625, "mean_logprobs/var": 2.6702880859375e-05, "num_completions/total": 8928, "per_sentence_gradient_norm": 15.339030265808105, "per_sentence_gradient_norm/max": 223.0, "per_sentence_gradient_norm/median": 2.953125, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 18.125, "per_sentence_gradient_norm/var": 1151.760986328125, "per_token_feature_norm": 190.45738220214844, "per_token_feature_norm/max": 242.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 111.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 121.54545593261719, "per_token_gradient_norm": 0.41384175419807434, "per_token_gradient_norm/max": 284.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 3.5288394428789616e-10, "per_token_gradient_norm/var": 55.152950286865234, "per_token_policy_error_norm": 0.003865279955789447, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.003480074694380164, "policy_entropy": 0.007408314850181341, "policy_entropy/max": 1.125, "policy_entropy/median": 4.220055416226387e-10, "policy_entropy/min": 1.9058241313221758e-21, "policy_entropy/p25": 1.7337242752546445e-12, "policy_entropy/p75": 6.356276571750641e-08, "policy_entropy/var": 0.003779185703024268, "policy_loss": -0.5416666865348816, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.2508772313594818, "policy_sharpness": 9.791027069091797, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.345240592956543, "reward": 0.5416666865348816, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.2508772313594818, "rewards/accuracy_reward": 0.5416666865348816, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.2508772313594818, "sentence_fisher_curvature": 217770.984375, "sentence_fisher_curvature/max": 1622016.0, "sentence_fisher_curvature/median": 26496.0, "sentence_fisher_curvature/min": 498.0, "sentence_fisher_curvature/p25": 6144.0, "sentence_fisher_curvature/p75": 344576.0, "sentence_fisher_curvature/p85": 528384.0, "sentence_fisher_curvature/p90": 778240.0, "sentence_fisher_curvature/p95": 954368.0, "sentence_fisher_curvature/p99": 1590886.5, "sentence_fisher_curvature/var": 130879864832.0, "sentence_fisher_kl_divergence": 9.19305165325568e-09, "sentence_fisher_kl_divergence/max": 6.845220923423767e-08, "sentence_fisher_kl_divergence/median": 1.1204974725842476e-09, "sentence_fisher_kl_divergence/min": 2.1032064978498966e-11, "sentence_fisher_kl_divergence/p25": 2.601154847070575e-10, "sentence_fisher_kl_divergence/p75": 1.4551915228366852e-08, "sentence_fisher_kl_divergence/p85": 2.2351741790771484e-08, "sentence_fisher_kl_divergence/p90": 3.294553607702255e-08, "sentence_fisher_kl_divergence/p95": 4.022149369120598e-08, "sentence_fisher_kl_divergence/p99": 6.712507882866703e-08, "sentence_fisher_kl_divergence/var": 2.331399424406746e-16, "sentence_full_gradient_variance/max_squared_error": 1352.7991943359375, "sentence_full_gradient_variance/metric": 1352.7991943359375, "sentence_full_gradient_variance/p75": 1352.7991943359375, "sentence_full_gradient_variance/p90": 1352.7991943359375, "sentence_full_gradient_variance/p95": 1352.7991943359375, "sentence_full_gradient_variance/p99": 1352.7991943359375, "sentence_full_update_term": 0.00037401419831439853, "sentence_full_update_term/max": 0.004058837890625, "sentence_full_update_term/median": 5.5789947509765625e-05, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.00033283233642578125, "sentence_full_update_term/p85": 0.000629425048828125, "sentence_full_update_term/p90": 0.001377105712890625, "sentence_full_update_term/p95": 0.001811981201171875, "sentence_full_update_term/p99": 0.003363039344549179, "sentence_full_update_term/var": 5.151877076059463e-07, "sentence_hessian_coeff": 17174.66796875, "sentence_hessian_coeff/max": 1138688.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -405504.0, "sentence_hessian_coeff/p25": -68800.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 811828.25, "sentence_hessian_coeff/var": 48869498880.0, "sentence_hessian_coeff_abs": 111638.671875, "sentence_hessian_coeff_abs/max": 1138688.0, "sentence_hessian_coeff_abs/median": 11520.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 191488.0, "sentence_hessian_coeff_abs/p99": 811828.25, "sentence_hessian_coeff_abs/var": 36573192192.0, "step": 93, "token_fisher_curvature": 192124.46875, "token_fisher_curvature/max": 249561088.0, "token_fisher_curvature/median": 1.6805133673525319e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 1.6336817097922297e-23, "token_fisher_curvature/p75": 1.7852386235972517e-13, "token_fisher_curvature/p85": 1.7826096154749393e-10, "token_fisher_curvature/p90": 1.5832483768463135e-08, "token_fisher_curvature/p95": 0.0001316070556640625, "token_fisher_curvature/p99": 59136.0, "token_fisher_curvature/var": 21933643005952.0, "token_fisher_kl_divergence": 8.10676858975512e-09, "token_fisher_kl_divergence/max": 1.055002212524414e-05, "token_fisher_kl_divergence/median": 7.087422195345028e-32, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 6.906029311080939e-37, "token_fisher_kl_divergence/p75": 7.522577592187569e-27, "token_fisher_kl_divergence/p85": 7.548023089546378e-24, "token_fisher_kl_divergence/p90": 6.683619349428464e-22, "token_fisher_kl_divergence/p95": 5.55653613398821e-18, "token_fisher_kl_divergence/p99": 2.5029294192790985e-09, "token_fisher_kl_divergence/var": 3.906930998880988e-14, "token_full_update_term": 6.930665222171228e-06, "token_full_update_term/max": 0.00457763671875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -8.083879947662354e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.0531850015098356e-22, "token_full_update_term/p85": 7.979727989493313e-17, "token_full_update_term/p90": 6.300515664747763e-15, "token_full_update_term/p95": 2.673028465238758e-12, "token_full_update_term/p99": 3.6691781133413315e-06, "token_full_update_term/var": 1.562487206285823e-08, "token_hessian_coeff": 3692.013671875, "token_hessian_coeff/max": 246415360.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -31719424.0, "token_hessian_coeff/p25": -2.4010660126805305e-10, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.07373046875, "token_hessian_coeff/var": 14256975118336.0, "token_hessian_coeff_abs": 175561.0, "token_hessian_coeff_abs/max": 246415360.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 2.0582228899002075e-07, "token_hessian_coeff_abs/p99": 94720.0, "token_hessian_coeff_abs/var": 14226165858304.0 }, { "accuracy_reward": 0.6875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.21710526943206787, "adam_stats/lm_head/lr_effective_max": 9.712379096527002e-07, "adam_stats/lm_head/lr_effective_mean": -2.5875733847269267e-12, "adam_stats/lm_head/lr_effective_min": -9.613557949705864e-07, "adam_stats/lm_head/lr_effective_std": 3.080511490338722e-08, "adam_stats/lr_effective_max": 1.047246314556105e-06, "adam_stats/lr_effective_mean": 2.683562790387062e-12, "adam_stats/lr_effective_min": -1.053848677656788e-06, "adam_stats/m_t_max": 0.004417280200868845, "adam_stats/m_t_mean": 6.669619717625253e-11, "adam_stats/m_t_min": -0.0042572831735014915, "adam_stats/v_t_max": 2.654614218045026e-05, "adam_stats/v_t_mean": 2.6150511877459648e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.21710526943206787, "all_logprobs": -0.008434312418103218, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.59375, "all_logprobs/p1": -0.126953125, "all_logprobs/p10": -1.7881393432617188e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -9.775161743164062e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.013593561947345734, "clip_ratio": 0.0, "completion_length": 438.6458435058594, "completion_length/correct": 363.42425537109375, "completion_length/correct/max": 833.0, "completion_length/correct/median": 335.0, "completion_length/correct/min": 193.0, "completion_length/correct/p25": 259.0, "completion_length/correct/p75": 405.0, "completion_length/correct/var": 27009.693359375, "completion_length/incorrect": 604.1333618164062, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 485.0, "completion_length/incorrect/min": 365.0, "completion_length/incorrect/p25": 429.0, "completion_length/incorrect/p75": 819.75, "completion_length/incorrect/var": 49953.984375, "completion_length/max": 1024.0, "completion_length/median": 347.0, "completion_length/min": 193.0, "completion_length/p25": 298.0, "completion_length/p75": 492.75, "completion_length/var": 46308.6953125, "curvature_clip_ratio_token_fisher": 0.0, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0, "curvature_clip_ratio_total_full": 0.0, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1504, "feature_vector_variance/max_squared_error": 58878.33203125, "feature_vector_variance/metric": 31392.05078125, "generated_tokens/total": 5120773.0, "global_fisher_curvature": 135168.0, "global_fisher_curvature/max": 135168.0, "global_fisher_curvature/median": 135168.0, "global_fisher_curvature/min": 135168.0, "global_fisher_curvature/p25": 135168.0, "global_fisher_curvature/p75": 135168.0, "global_fisher_curvature/p85": 135168.0, "global_fisher_curvature/p90": 135168.0, "global_fisher_curvature/p95": 135168.0, "global_fisher_curvature/p99": 135168.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 3.3614924177527428e-09, "global_fisher_kl_divergence/max": 3.3614924177527428e-09, "global_fisher_kl_divergence/median": 3.3614924177527428e-09, "global_fisher_kl_divergence/min": 3.3614924177527428e-09, "global_fisher_kl_divergence/p25": 3.3614924177527428e-09, "global_fisher_kl_divergence/p75": 3.3614924177527428e-09, "global_fisher_kl_divergence/p85": 3.3614924177527428e-09, "global_fisher_kl_divergence/p90": 3.3614924177527428e-09, "global_fisher_kl_divergence/p95": 3.3614924177527428e-09, "global_fisher_kl_divergence/p99": 3.3614924177527428e-09, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.00128936767578125, "global_full_update_term/max": 0.00128936767578125, "global_full_update_term/median": 0.00128936767578125, "global_full_update_term/min": 0.00128936767578125, "global_full_update_term/p25": 0.00128936767578125, "global_full_update_term/p75": 0.00128936767578125, "global_full_update_term/p85": 0.00128936767578125, "global_full_update_term/p90": 0.00128936767578125, "global_full_update_term/p95": 0.00128936767578125, "global_full_update_term/p99": 0.00128936767578125, "global_full_update_term/var": NaN, "global_hessian_coeff": 25856.0, "global_hessian_coeff/max": 25856.0, "global_hessian_coeff/median": 25856.0, "global_hessian_coeff/min": 25856.0, "global_hessian_coeff/p25": 25856.0, "global_hessian_coeff/p75": 25856.0, "global_hessian_coeff/p99": 25856.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 25856.0, "global_hessian_coeff_abs/max": 25856.0, "global_hessian_coeff_abs/median": 25856.0, "global_hessian_coeff_abs/min": 25856.0, "global_hessian_coeff_abs/p25": 25856.0, "global_hessian_coeff_abs/p75": 25856.0, "global_hessian_coeff_abs/p99": 25856.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.5813037753105164, "learning_rate": 1.6389299449645734e-07, "loss": -0.6875, "masked_global_fisher_curvature": 135168.0, "masked_global_fisher_curvature/max": 135168.0, "masked_global_fisher_curvature/median": 135168.0, "masked_global_fisher_curvature/min": 135168.0, "masked_global_fisher_curvature/p25": 135168.0, "masked_global_fisher_curvature/p75": 135168.0, "masked_global_fisher_curvature/p85": 135168.0, "masked_global_fisher_curvature/p90": 135168.0, "masked_global_fisher_curvature/p95": 135168.0, "masked_global_fisher_curvature/p99": 135168.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/max": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/median": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/min": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/p25": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/p75": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/p85": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/p90": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/p95": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/p99": 3.3614924177527428e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00128936767578125, "masked_global_full_update_term/max": 0.00128936767578125, "masked_global_full_update_term/median": 0.00128936767578125, "masked_global_full_update_term/min": 0.00128936767578125, "masked_global_full_update_term/p25": 0.00128936767578125, "masked_global_full_update_term/p75": 0.00128936767578125, "masked_global_full_update_term/p85": 0.00128936767578125, "masked_global_full_update_term/p90": 0.00128936767578125, "masked_global_full_update_term/p95": 0.00128936767578125, "masked_global_full_update_term/p99": 0.00128936767578125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 25856.0, "masked_global_hessian_coeff/max": 25856.0, "masked_global_hessian_coeff/median": 25856.0, "masked_global_hessian_coeff/min": 25856.0, "masked_global_hessian_coeff/p25": 25856.0, "masked_global_hessian_coeff/p75": 25856.0, "masked_global_hessian_coeff/p99": 25856.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 25856.0, "masked_global_hessian_coeff_abs/max": 25856.0, "masked_global_hessian_coeff_abs/median": 25856.0, "masked_global_hessian_coeff_abs/min": 25856.0, "masked_global_hessian_coeff_abs/p25": 25856.0, "masked_global_hessian_coeff_abs/p75": 25856.0, "masked_global_hessian_coeff_abs/p99": 25856.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 32.669921875, "masked_per_sentence_gradient_norm/max": 280.0, "masked_per_sentence_gradient_norm/median": 18.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 33.625, "masked_per_sentence_gradient_norm/var": 2597.78662109375, "masked_per_token_gradient_norm": 0.5205031037330627, "masked_per_token_gradient_norm/max": 282.0, "masked_per_token_gradient_norm/median": 4.5075054799781356e-14, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.3067619875073433e-08, "masked_per_token_gradient_norm/var": 69.98361206054688, "masked_sentence_fisher_curvature": 252579.734375, "masked_sentence_fisher_curvature/max": 1474560.0, "masked_sentence_fisher_curvature/median": 39936.0, "masked_sentence_fisher_curvature/min": 374.0, "masked_sentence_fisher_curvature/p25": 8376.0, "masked_sentence_fisher_curvature/p75": 320000.0, "masked_sentence_fisher_curvature/p85": 658432.0, "masked_sentence_fisher_curvature/p90": 835584.0, "masked_sentence_fisher_curvature/p95": 1222656.0, "masked_sentence_fisher_curvature/p99": 1420083.375, "masked_sentence_fisher_curvature/var": 144174989312.0, "masked_sentence_fisher_kl_divergence": 6.267488750211214e-09, "masked_sentence_fisher_kl_divergence/max": 3.655441105365753e-08, "masked_sentence_fisher_kl_divergence/median": 9.89530235528946e-10, "masked_sentence_fisher_kl_divergence/min": 9.265477274311706e-12, "masked_sentence_fisher_kl_divergence/p25": 2.0827428670600057e-10, "masked_sentence_fisher_kl_divergence/p75": 7.930793799459934e-09, "masked_sentence_fisher_kl_divergence/p85": 1.6298145055770874e-08, "masked_sentence_fisher_kl_divergence/p90": 2.0721927285194397e-08, "masked_sentence_fisher_kl_divergence/p95": 3.032619133591652e-08, "masked_sentence_fisher_kl_divergence/p99": 3.522728064808689e-08, "masked_sentence_fisher_kl_divergence/var": 8.876518166698393e-17, "masked_sentence_full_gradient_variance/max_squared_error": 3576.05029296875, "masked_sentence_full_gradient_variance/metric": 3576.05029296875, "masked_sentence_full_gradient_variance/p75": 3576.05029296875, "masked_sentence_full_gradient_variance/p90": 3576.05029296875, "masked_sentence_full_gradient_variance/p95": 3576.05029296875, "masked_sentence_full_gradient_variance/p99": 3576.05029296875, "masked_sentence_full_update_term": 0.0005258421297185123, "masked_sentence_full_update_term/max": 0.003936767578125, "masked_sentence_full_update_term/median": 0.0002994537353515625, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0006437301635742188, "masked_sentence_full_update_term/p85": 0.0010089874267578125, "masked_sentence_full_update_term/p90": 0.001323699951171875, "masked_sentence_full_update_term/p95": 0.002147674560546875, "masked_sentence_full_update_term/p99": 0.003777313744649291, "masked_sentence_full_update_term/var": 6.012083417772374e-07, "masked_sentence_hessian_coeff": 27287.333984375, "masked_sentence_hessian_coeff/max": 1155072.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -301056.0, "masked_sentence_hessian_coeff/p25": -134144.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 781518.0, "masked_sentence_hessian_coeff/var": 66025480192.0, "masked_sentence_hessian_coeff_abs": 159075.34375, "masked_sentence_hessian_coeff_abs/max": 1155072.0, "masked_sentence_hessian_coeff_abs/median": 89600.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 219136.0, "masked_sentence_hessian_coeff_abs/p99": 781518.0, "masked_sentence_hessian_coeff_abs/var": 41206583296.0, "masked_token_fisher_curvature": 240150.25, "masked_token_fisher_curvature/max": 249561088.0, "masked_token_fisher_curvature/median": 3.426078865054194e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 5.169878828456423e-23, "masked_token_fisher_curvature/p75": 2.4300561562995426e-12, "masked_token_fisher_curvature/p85": 2.08092387765646e-09, "masked_token_fisher_curvature/p90": 3.021123120561242e-07, "masked_token_fisher_curvature/p95": 0.0020851492881774902, "masked_token_fisher_curvature/p99": 129536.0, "masked_token_fisher_curvature/var": 30325684043776.0, "masked_token_fisher_kl_divergence": 5.958237903058716e-09, "masked_token_fisher_kl_divergence/max": 6.198883056640625e-06, "masked_token_fisher_kl_divergence/median": 8.504906634414034e-31, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.2812888423962934e-36, "masked_token_fisher_kl_divergence/p75": 6.018062073750055e-26, "masked_token_fisher_kl_divergence/p85": 5.169878828456423e-23, "masked_token_fisher_kl_divergence/p90": 7.474817605335435e-21, "masked_token_fisher_kl_divergence/p95": 5.177150076913009e-17, "masked_token_fisher_kl_divergence/p99": 3.2159732654690742e-09, "masked_token_fisher_kl_divergence/var": 1.8657906437295063e-14, "masked_token_full_update_term": 6.683475476165768e-06, "masked_token_full_update_term/max": 0.0035247802734375, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -4.842877388000488e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.363913744254155e-18, "masked_token_full_update_term/p85": 5.88418203051333e-15, "masked_token_full_update_term/p90": 1.900701818158268e-13, "masked_token_full_update_term/p95": 3.218492139467344e-11, "masked_token_full_update_term/p99": 2.142554149031639e-05, "masked_token_full_update_term/var": 1.1630751473035161e-08, "masked_token_hessian_coeff": 6374.95166015625, "masked_token_hessian_coeff/max": 245366784.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -32112640.0, "masked_token_hessian_coeff/p25": -1.0826624929904938e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.1904296875, "masked_token_hessian_coeff/var": 21950793515008.0, "masked_token_hessian_coeff_abs": 242137.953125, "masked_token_hessian_coeff_abs/max": 245366784.0, "masked_token_hessian_coeff_abs/median": 2.8762769943568856e-11, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 8.38935375213623e-06, "masked_token_hessian_coeff_abs/p99": 898544.0, "masked_token_hessian_coeff_abs/var": 21892201185280.0, "mean_logprobs": -0.007293701171875, "mean_logprobs/var": 4.00543212890625e-05, "num_completions/total": 9024, "per_sentence_gradient_norm": 32.669921875, "per_sentence_gradient_norm/max": 280.0, "per_sentence_gradient_norm/median": 18.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 33.625, "per_sentence_gradient_norm/var": 2597.78662109375, "per_token_feature_norm": 190.2515411376953, "per_token_feature_norm/max": 260.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 110.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 140.99508666992188, "per_token_gradient_norm": 0.5205031037330627, "per_token_gradient_norm/max": 282.0, "per_token_gradient_norm/median": 4.5075054799781356e-14, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.3067619875073433e-08, "per_token_gradient_norm/var": 69.98361206054688, "per_token_policy_error_norm": 0.004840744659304619, "per_token_policy_error_norm/max": 1.96875, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004786426667124033, "policy_entropy": 0.008816028945147991, "policy_entropy/max": 2.328125, "policy_entropy/median": 1.673470251262188e-09, "policy_entropy/min": 2.583450489125616e-20, "policy_entropy/p25": 4.234834705130197e-12, "policy_entropy/p75": 2.1327286958694458e-07, "policy_entropy/var": 0.0049606808461248875, "policy_loss": -0.6875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.21710526943206787, "policy_sharpness": 9.741432189941406, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6446770429611206, "reward": 0.6875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.21710526943206787, "rewards/accuracy_reward": 0.6875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.21710526943206787, "sentence_fisher_curvature": 252579.734375, "sentence_fisher_curvature/max": 1474560.0, "sentence_fisher_curvature/median": 39936.0, "sentence_fisher_curvature/min": 374.0, "sentence_fisher_curvature/p25": 8376.0, "sentence_fisher_curvature/p75": 320000.0, "sentence_fisher_curvature/p85": 658432.0, "sentence_fisher_curvature/p90": 835584.0, "sentence_fisher_curvature/p95": 1222656.0, "sentence_fisher_curvature/p99": 1420083.375, "sentence_fisher_curvature/var": 144174989312.0, "sentence_fisher_kl_divergence": 6.267488750211214e-09, "sentence_fisher_kl_divergence/max": 3.655441105365753e-08, "sentence_fisher_kl_divergence/median": 9.89530235528946e-10, "sentence_fisher_kl_divergence/min": 9.265477274311706e-12, "sentence_fisher_kl_divergence/p25": 2.0827428670600057e-10, "sentence_fisher_kl_divergence/p75": 7.930793799459934e-09, "sentence_fisher_kl_divergence/p85": 1.6298145055770874e-08, "sentence_fisher_kl_divergence/p90": 2.0721927285194397e-08, "sentence_fisher_kl_divergence/p95": 3.032619133591652e-08, "sentence_fisher_kl_divergence/p99": 3.522728064808689e-08, "sentence_fisher_kl_divergence/var": 8.876518166698393e-17, "sentence_full_gradient_variance/max_squared_error": 3576.05029296875, "sentence_full_gradient_variance/metric": 3576.05029296875, "sentence_full_gradient_variance/p75": 3576.05029296875, "sentence_full_gradient_variance/p90": 3576.05029296875, "sentence_full_gradient_variance/p95": 3576.05029296875, "sentence_full_gradient_variance/p99": 3576.05029296875, "sentence_full_update_term": 0.0005258421297185123, "sentence_full_update_term/max": 0.003936767578125, "sentence_full_update_term/median": 0.0002994537353515625, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0006437301635742188, "sentence_full_update_term/p85": 0.0010089874267578125, "sentence_full_update_term/p90": 0.001323699951171875, "sentence_full_update_term/p95": 0.002147674560546875, "sentence_full_update_term/p99": 0.003777313744649291, "sentence_full_update_term/var": 6.012083417772374e-07, "sentence_hessian_coeff": 27287.333984375, "sentence_hessian_coeff/max": 1155072.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -301056.0, "sentence_hessian_coeff/p25": -134144.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 781518.0, "sentence_hessian_coeff/var": 66025480192.0, "sentence_hessian_coeff_abs": 159075.34375, "sentence_hessian_coeff_abs/max": 1155072.0, "sentence_hessian_coeff_abs/median": 89600.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 219136.0, "sentence_hessian_coeff_abs/p99": 781518.0, "sentence_hessian_coeff_abs/var": 41206583296.0, "step": 94, "token_fisher_curvature": 240150.25, "token_fisher_curvature/max": 249561088.0, "token_fisher_curvature/median": 3.426078865054194e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 5.169878828456423e-23, "token_fisher_curvature/p75": 2.4300561562995426e-12, "token_fisher_curvature/p85": 2.08092387765646e-09, "token_fisher_curvature/p90": 3.021123120561242e-07, "token_fisher_curvature/p95": 0.0020851492881774902, "token_fisher_curvature/p99": 129536.0, "token_fisher_curvature/var": 30325684043776.0, "token_fisher_kl_divergence": 5.958237903058716e-09, "token_fisher_kl_divergence/max": 6.198883056640625e-06, "token_fisher_kl_divergence/median": 8.504906634414034e-31, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.2812888423962934e-36, "token_fisher_kl_divergence/p75": 6.018062073750055e-26, "token_fisher_kl_divergence/p85": 5.169878828456423e-23, "token_fisher_kl_divergence/p90": 7.474817605335435e-21, "token_fisher_kl_divergence/p95": 5.177150076913009e-17, "token_fisher_kl_divergence/p99": 3.2159732654690742e-09, "token_fisher_kl_divergence/var": 1.8657906437295063e-14, "token_full_update_term": 6.683475476165768e-06, "token_full_update_term/max": 0.0035247802734375, "token_full_update_term/median": 0.0, "token_full_update_term/min": -4.842877388000488e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.363913744254155e-18, "token_full_update_term/p85": 5.88418203051333e-15, "token_full_update_term/p90": 1.900701818158268e-13, "token_full_update_term/p95": 3.218492139467344e-11, "token_full_update_term/p99": 2.142554149031639e-05, "token_full_update_term/var": 1.1630751473035161e-08, "token_hessian_coeff": 6374.95166015625, "token_hessian_coeff/max": 245366784.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -32112640.0, "token_hessian_coeff/p25": -1.0826624929904938e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.1904296875, "token_hessian_coeff/var": 21950793515008.0, "token_hessian_coeff_abs": 242137.953125, "token_hessian_coeff_abs/max": 245366784.0, "token_hessian_coeff_abs/median": 2.8762769943568856e-11, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 8.38935375213623e-06, "token_hessian_coeff_abs/p99": 898544.0, "token_hessian_coeff_abs/var": 21892201185280.0 }, { "accuracy_reward": 0.71875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20427630841732025, "adam_stats/lm_head/lr_effective_max": 7.047538588267344e-07, "adam_stats/lm_head/lr_effective_mean": -6.019167911493206e-13, "adam_stats/lm_head/lr_effective_min": -6.849755891380482e-07, "adam_stats/lm_head/lr_effective_std": 2.2305238545072825e-08, "adam_stats/lr_effective_max": 7.427605055454478e-07, "adam_stats/lr_effective_mean": 3.0424714397153663e-12, "adam_stats/lr_effective_min": -7.392347356471873e-07, "adam_stats/m_t_max": 0.003489352762699127, "adam_stats/m_t_mean": 5.7308567613656436e-11, "adam_stats/m_t_min": -0.003544665640220046, "adam_stats/v_t_max": 2.6629802960087545e-05, "adam_stats/v_t_mean": 2.6583214796094268e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.71875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20427630841732025, "all_logprobs": -0.007312719244509935, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -4.03125, "all_logprobs/p1": -0.10009765625, "all_logprobs/p10": -4.76837158203125e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -3.528594970703125e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.009314978495240211, "clip_ratio": 0.0, "completion_length": 506.375, "completion_length/correct": 444.6811828613281, "completion_length/correct/max": 991.0, "completion_length/correct/median": 410.0, "completion_length/correct/min": 218.0, "completion_length/correct/p25": 335.0, "completion_length/correct/p75": 514.0, "completion_length/correct/var": 34365.6328125, "completion_length/incorrect": 664.0370483398438, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 481.0, "completion_length/incorrect/min": 423.0, "completion_length/incorrect/p25": 433.0, "completion_length/incorrect/p75": 1024.0, "completion_length/incorrect/var": 79603.9609375, "completion_length/max": 1024.0, "completion_length/median": 428.0, "completion_length/min": 218.0, "completion_length/p25": 350.5, "completion_length/p75": 525.0, "completion_length/var": 56214.078125, "curvature_clip_ratio_token_fisher": 0.0, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0, "curvature_clip_ratio_total_full": 0.0, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.152, "feature_vector_variance/max_squared_error": 63170.78125, "feature_vector_variance/metric": 31231.9375, "generated_tokens/total": 5169385.0, "global_fisher_curvature": 155648.0, "global_fisher_curvature/max": 155648.0, "global_fisher_curvature/median": 155648.0, "global_fisher_curvature/min": 155648.0, "global_fisher_curvature/p25": 155648.0, "global_fisher_curvature/p75": 155648.0, "global_fisher_curvature/p85": 155648.0, "global_fisher_curvature/p90": 155648.0, "global_fisher_curvature/p95": 155648.0, "global_fisher_curvature/p99": 155648.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.0954757928848267e-09, "global_fisher_kl_divergence/max": 2.0954757928848267e-09, "global_fisher_kl_divergence/median": 2.0954757928848267e-09, "global_fisher_kl_divergence/min": 2.0954757928848267e-09, "global_fisher_kl_divergence/p25": 2.0954757928848267e-09, "global_fisher_kl_divergence/p75": 2.0954757928848267e-09, "global_fisher_kl_divergence/p85": 2.0954757928848267e-09, "global_fisher_kl_divergence/p90": 2.0954757928848267e-09, "global_fisher_kl_divergence/p95": 2.0954757928848267e-09, "global_fisher_kl_divergence/p99": 2.0954757928848267e-09, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.001373291015625, "global_full_update_term/max": 0.001373291015625, "global_full_update_term/median": 0.001373291015625, "global_full_update_term/min": 0.001373291015625, "global_full_update_term/p25": 0.001373291015625, "global_full_update_term/p75": 0.001373291015625, "global_full_update_term/p85": 0.001373291015625, "global_full_update_term/p90": 0.001373291015625, "global_full_update_term/p95": 0.001373291015625, "global_full_update_term/p99": 0.001373291015625, "global_full_update_term/var": NaN, "global_hessian_coeff": 36864.0, "global_hessian_coeff/max": 36864.0, "global_hessian_coeff/median": 36864.0, "global_hessian_coeff/min": 36864.0, "global_hessian_coeff/p25": 36864.0, "global_hessian_coeff/p75": 36864.0, "global_hessian_coeff/p99": 36864.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 36864.0, "global_hessian_coeff_abs/max": 36864.0, "global_hessian_coeff_abs/median": 36864.0, "global_hessian_coeff_abs/min": 36864.0, "global_hessian_coeff_abs/p25": 36864.0, "global_hessian_coeff_abs/p75": 36864.0, "global_hessian_coeff_abs/p99": 36864.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.6919779777526855, "learning_rate": 1.1394185240843985e-07, "loss": -0.7188, "masked_global_fisher_curvature": 155648.0, "masked_global_fisher_curvature/max": 155648.0, "masked_global_fisher_curvature/median": 155648.0, "masked_global_fisher_curvature/min": 155648.0, "masked_global_fisher_curvature/p25": 155648.0, "masked_global_fisher_curvature/p75": 155648.0, "masked_global_fisher_curvature/p85": 155648.0, "masked_global_fisher_curvature/p90": 155648.0, "masked_global_fisher_curvature/p95": 155648.0, "masked_global_fisher_curvature/p99": 155648.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/max": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/median": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/min": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/p25": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/p75": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/p85": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/p90": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/p95": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/p99": 2.0954757928848267e-09, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.001373291015625, "masked_global_full_update_term/max": 0.001373291015625, "masked_global_full_update_term/median": 0.001373291015625, "masked_global_full_update_term/min": 0.001373291015625, "masked_global_full_update_term/p25": 0.001373291015625, "masked_global_full_update_term/p75": 0.001373291015625, "masked_global_full_update_term/p85": 0.001373291015625, "masked_global_full_update_term/p90": 0.001373291015625, "masked_global_full_update_term/p95": 0.001373291015625, "masked_global_full_update_term/p99": 0.001373291015625, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 36864.0, "masked_global_hessian_coeff/max": 36864.0, "masked_global_hessian_coeff/median": 36864.0, "masked_global_hessian_coeff/min": 36864.0, "masked_global_hessian_coeff/p25": 36864.0, "masked_global_hessian_coeff/p75": 36864.0, "masked_global_hessian_coeff/p99": 36864.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 36864.0, "masked_global_hessian_coeff_abs/max": 36864.0, "masked_global_hessian_coeff_abs/median": 36864.0, "masked_global_hessian_coeff_abs/min": 36864.0, "masked_global_hessian_coeff_abs/p25": 36864.0, "masked_global_hessian_coeff_abs/p75": 36864.0, "masked_global_hessian_coeff_abs/p99": 36864.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 32.44498825073242, "masked_per_sentence_gradient_norm/max": 216.0, "masked_per_sentence_gradient_norm/median": 16.875, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 44.625, "masked_per_sentence_gradient_norm/var": 2214.501708984375, "masked_per_token_gradient_norm": 0.8096143007278442, "masked_per_token_gradient_norm/max": 312.0, "masked_per_token_gradient_norm/median": 7.531752999057062e-13, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.2863893061876297e-08, "masked_per_token_gradient_norm/var": 111.18659210205078, "masked_sentence_fisher_curvature": 409199.46875, "masked_sentence_fisher_curvature/max": 1875968.0, "masked_sentence_fisher_curvature/median": 161792.0, "masked_sentence_fisher_curvature/min": 157.0, "masked_sentence_fisher_curvature/p25": 9152.0, "masked_sentence_fisher_curvature/p75": 724992.0, "masked_sentence_fisher_curvature/p85": 987136.0, "masked_sentence_fisher_curvature/p90": 1030144.0, "masked_sentence_fisher_curvature/p95": 1368064.0, "masked_sentence_fisher_curvature/p99": 1704755.75, "masked_sentence_fisher_curvature/var": 224043876352.0, "masked_sentence_fisher_kl_divergence": 5.4953090966591844e-09, "masked_sentence_fisher_kl_divergence/max": 2.514570951461792e-08, "masked_sentence_fisher_kl_divergence/median": 2.168235369026661e-09, "masked_sentence_fisher_kl_divergence/min": 2.1032064978498966e-12, "masked_sentence_fisher_kl_divergence/p25": 1.227817847393453e-10, "masked_sentence_fisher_kl_divergence/p75": 9.720679372549057e-09, "masked_sentence_fisher_kl_divergence/p85": 1.3271346688270569e-08, "masked_sentence_fisher_kl_divergence/p90": 1.382431946694851e-08, "masked_sentence_fisher_kl_divergence/p95": 1.8364517018198967e-08, "masked_sentence_fisher_kl_divergence/p99": 2.2933825505333516e-08, "masked_sentence_fisher_kl_divergence/var": 4.038637541512988e-17, "masked_sentence_full_gradient_variance/max_squared_error": 3188.861328125, "masked_sentence_full_gradient_variance/metric": 3188.861328125, "masked_sentence_full_gradient_variance/p75": 3188.861328125, "masked_sentence_full_gradient_variance/p90": 3188.861328125, "masked_sentence_full_gradient_variance/p95": 3188.861328125, "masked_sentence_full_gradient_variance/p99": 3188.861328125, "masked_sentence_full_update_term": 0.0004911472788080573, "masked_sentence_full_update_term/max": 0.0030670166015625, "masked_sentence_full_update_term/median": 0.0002307891845703125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0007343292236328125, "masked_sentence_full_update_term/p85": 0.0010776519775390625, "masked_sentence_full_update_term/p90": 0.00128173828125, "masked_sentence_full_update_term/p95": 0.00200653076171875, "masked_sentence_full_update_term/p99": 0.002748108934611082, "masked_sentence_full_update_term/var": 4.261059700638725e-07, "masked_sentence_hessian_coeff": 73170.671875, "masked_sentence_hessian_coeff/max": 1097728.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -421888.0, "masked_sentence_hessian_coeff/p25": -113152.0, "masked_sentence_hessian_coeff/p75": 161280.0, "masked_sentence_hessian_coeff/p99": 833127.25, "masked_sentence_hessian_coeff/var": 83240067072.0, "masked_sentence_hessian_coeff_abs": 189821.34375, "masked_sentence_hessian_coeff_abs/max": 1097728.0, "masked_sentence_hessian_coeff_abs/median": 146432.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 223232.0, "masked_sentence_hessian_coeff_abs/p99": 833127.25, "masked_sentence_hessian_coeff_abs/var": 52238946304.0, "masked_token_fisher_curvature": 390305.3125, "masked_token_fisher_curvature/max": 245366784.0, "masked_token_fisher_curvature/median": 1.7889335846010823e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 7.134432783269864e-24, "masked_token_fisher_curvature/p75": 3.2862601528904634e-13, "masked_token_fisher_curvature/p85": 1.9826984498649836e-10, "masked_token_fisher_curvature/p90": 3.608874976634979e-08, "masked_token_fisher_curvature/p95": 0.000492095947265625, "masked_token_fisher_curvature/p99": 383232.0, "masked_token_fisher_curvature/var": 47630377811968.0, "masked_token_fisher_kl_divergence": 5.241271860967345e-09, "masked_token_fisher_kl_divergence/max": 3.293156623840332e-06, "masked_token_fisher_kl_divergence/median": 2.4074124304840448e-32, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 9.550891600431086e-38, "masked_token_fisher_kl_divergence/p75": 4.417621069237666e-27, "masked_token_fisher_kl_divergence/p85": 2.662487596655058e-24, "masked_token_fisher_kl_divergence/p90": 4.830734777309682e-22, "masked_token_fisher_kl_divergence/p95": 6.613633252161577e-18, "masked_token_fisher_kl_divergence/p99": 5.155015969648957e-09, "masked_token_fisher_kl_divergence/var": 8.587677417055614e-15, "masked_token_full_update_term": 7.806304893165361e-06, "masked_token_full_update_term/max": 0.0025634765625, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.2014061212539673e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.44811591673966e-18, "masked_token_full_update_term/p85": 2.456368441983159e-15, "masked_token_full_update_term/p90": 8.42867442507611e-14, "masked_token_full_update_term/p95": 2.660272002685815e-11, "masked_token_full_update_term/p99": 8.535385131835938e-05, "masked_token_full_update_term/var": 1.0337402933657813e-08, "masked_token_hessian_coeff": 34550.19921875, "masked_token_hessian_coeff/max": 236978176.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -32768000.0, "masked_token_hessian_coeff/p25": -2.4028122425079346e-07, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.3984375, "masked_token_hessian_coeff/var": 30654131601408.0, "masked_token_hessian_coeff_abs": 356196.71875, "masked_token_hessian_coeff_abs/max": 236978176.0, "masked_token_hessian_coeff_abs/median": 5.311449058353901e-10, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 8.225440979003906e-06, "masked_token_hessian_coeff_abs/p99": 6415872.0, "masked_token_hessian_coeff_abs/var": 30528442990592.0, "mean_logprobs": -0.007659912109375, "mean_logprobs/var": 3.0517578125e-05, "num_completions/total": 9120, "per_sentence_gradient_norm": 32.44498825073242, "per_sentence_gradient_norm/max": 216.0, "per_sentence_gradient_norm/median": 16.875, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 44.625, "per_sentence_gradient_norm/var": 2214.501708984375, "per_token_feature_norm": 191.12413024902344, "per_token_feature_norm/max": 266.0, "per_token_feature_norm/median": 191.0, "per_token_feature_norm/min": 102.0, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 198.0, "per_token_feature_norm/var": 126.32575988769531, "per_token_gradient_norm": 0.8096143007278442, "per_token_gradient_norm/max": 312.0, "per_token_gradient_norm/median": 7.531752999057062e-13, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.2863893061876297e-08, "per_token_gradient_norm/var": 111.18659210205078, "per_token_policy_error_norm": 0.004454958718270063, "per_token_policy_error_norm/max": 1.921875, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0042440155521035194, "policy_entropy": 0.00793606135994196, "policy_entropy/max": 1.3671875, "policy_entropy/median": 4.420144250616431e-10, "policy_entropy/min": 1.6623021589865644e-20, "policy_entropy/p25": 1.4210854715202004e-12, "policy_entropy/p75": 7.450580596923828e-08, "policy_entropy/var": 0.004210402257740498, "policy_loss": -0.71875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20427630841732025, "policy_sharpness": 9.770157814025879, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.4653202295303345, "reward": 0.71875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20427630841732025, "rewards/accuracy_reward": 0.71875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20427630841732025, "sentence_fisher_curvature": 409199.46875, "sentence_fisher_curvature/max": 1875968.0, "sentence_fisher_curvature/median": 161792.0, "sentence_fisher_curvature/min": 157.0, "sentence_fisher_curvature/p25": 9152.0, "sentence_fisher_curvature/p75": 724992.0, "sentence_fisher_curvature/p85": 987136.0, "sentence_fisher_curvature/p90": 1030144.0, "sentence_fisher_curvature/p95": 1368064.0, "sentence_fisher_curvature/p99": 1704755.75, "sentence_fisher_curvature/var": 224043876352.0, "sentence_fisher_kl_divergence": 5.4953090966591844e-09, "sentence_fisher_kl_divergence/max": 2.514570951461792e-08, "sentence_fisher_kl_divergence/median": 2.168235369026661e-09, "sentence_fisher_kl_divergence/min": 2.1032064978498966e-12, "sentence_fisher_kl_divergence/p25": 1.227817847393453e-10, "sentence_fisher_kl_divergence/p75": 9.720679372549057e-09, "sentence_fisher_kl_divergence/p85": 1.3271346688270569e-08, "sentence_fisher_kl_divergence/p90": 1.382431946694851e-08, "sentence_fisher_kl_divergence/p95": 1.8364517018198967e-08, "sentence_fisher_kl_divergence/p99": 2.2933825505333516e-08, "sentence_fisher_kl_divergence/var": 4.038637541512988e-17, "sentence_full_gradient_variance/max_squared_error": 3188.861328125, "sentence_full_gradient_variance/metric": 3188.861328125, "sentence_full_gradient_variance/p75": 3188.861328125, "sentence_full_gradient_variance/p90": 3188.861328125, "sentence_full_gradient_variance/p95": 3188.861328125, "sentence_full_gradient_variance/p99": 3188.861328125, "sentence_full_update_term": 0.0004911472788080573, "sentence_full_update_term/max": 0.0030670166015625, "sentence_full_update_term/median": 0.0002307891845703125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0007343292236328125, "sentence_full_update_term/p85": 0.0010776519775390625, "sentence_full_update_term/p90": 0.00128173828125, "sentence_full_update_term/p95": 0.00200653076171875, "sentence_full_update_term/p99": 0.002748108934611082, "sentence_full_update_term/var": 4.261059700638725e-07, "sentence_hessian_coeff": 73170.671875, "sentence_hessian_coeff/max": 1097728.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -421888.0, "sentence_hessian_coeff/p25": -113152.0, "sentence_hessian_coeff/p75": 161280.0, "sentence_hessian_coeff/p99": 833127.25, "sentence_hessian_coeff/var": 83240067072.0, "sentence_hessian_coeff_abs": 189821.34375, "sentence_hessian_coeff_abs/max": 1097728.0, "sentence_hessian_coeff_abs/median": 146432.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 223232.0, "sentence_hessian_coeff_abs/p99": 833127.25, "sentence_hessian_coeff_abs/var": 52238946304.0, "step": 95, "token_fisher_curvature": 390305.3125, "token_fisher_curvature/max": 245366784.0, "token_fisher_curvature/median": 1.7889335846010823e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 7.134432783269864e-24, "token_fisher_curvature/p75": 3.2862601528904634e-13, "token_fisher_curvature/p85": 1.9826984498649836e-10, "token_fisher_curvature/p90": 3.608874976634979e-08, "token_fisher_curvature/p95": 0.000492095947265625, "token_fisher_curvature/p99": 383232.0, "token_fisher_curvature/var": 47630377811968.0, "token_fisher_kl_divergence": 5.241271860967345e-09, "token_fisher_kl_divergence/max": 3.293156623840332e-06, "token_fisher_kl_divergence/median": 2.4074124304840448e-32, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 9.550891600431086e-38, "token_fisher_kl_divergence/p75": 4.417621069237666e-27, "token_fisher_kl_divergence/p85": 2.662487596655058e-24, "token_fisher_kl_divergence/p90": 4.830734777309682e-22, "token_fisher_kl_divergence/p95": 6.613633252161577e-18, "token_fisher_kl_divergence/p99": 5.155015969648957e-09, "token_fisher_kl_divergence/var": 8.587677417055614e-15, "token_full_update_term": 7.806304893165361e-06, "token_full_update_term/max": 0.0025634765625, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.2014061212539673e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.44811591673966e-18, "token_full_update_term/p85": 2.456368441983159e-15, "token_full_update_term/p90": 8.42867442507611e-14, "token_full_update_term/p95": 2.660272002685815e-11, "token_full_update_term/p99": 8.535385131835938e-05, "token_full_update_term/var": 1.0337402933657813e-08, "token_hessian_coeff": 34550.19921875, "token_hessian_coeff/max": 236978176.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -32768000.0, "token_hessian_coeff/p25": -2.4028122425079346e-07, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.3984375, "token_hessian_coeff/var": 30654131601408.0, "token_hessian_coeff_abs": 356196.71875, "token_hessian_coeff_abs/max": 236978176.0, "token_hessian_coeff_abs/median": 5.311449058353901e-10, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 8.225440979003906e-06, "token_hessian_coeff_abs/p99": 6415872.0, "token_hessian_coeff_abs/var": 30528442990592.0 }, { "accuracy_reward": 0.71875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.20427633821964264, "adam_stats/lm_head/lr_effective_max": 4.669654742883722e-07, "adam_stats/lm_head/lr_effective_mean": 8.268160461843976e-13, "adam_stats/lm_head/lr_effective_min": -4.3632937263282656e-07, "adam_stats/lm_head/lr_effective_std": 1.499885904365783e-08, "adam_stats/lr_effective_max": 4.805543198926898e-07, "adam_stats/lr_effective_mean": 3.7167677789662346e-12, "adam_stats/lr_effective_min": -4.7171243977572885e-07, "adam_stats/m_t_max": 0.0048827421851456165, "adam_stats/m_t_mean": -1.6807139013863548e-11, "adam_stats/m_t_min": -0.0037601592484861612, "adam_stats/v_t_max": 2.6651585358195007e-05, "adam_stats/v_t_mean": 2.890212591824537e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.71875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.20427633821964264, "all_logprobs": -0.009753969497978687, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.6875, "all_logprobs/p1": -0.162109375, "all_logprobs/p10": -2.6226043701171875e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00013661361299455166, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.015292036347091198, "clip_ratio": 0.0, "completion_length": 479.7083435058594, "completion_length/correct": 467.8985595703125, "completion_length/correct/max": 956.0, "completion_length/correct/median": 417.0, "completion_length/correct/min": 282.0, "completion_length/correct/p25": 384.0, "completion_length/correct/p75": 509.0, "completion_length/correct/var": 18800.033203125, "completion_length/incorrect": 509.8888854980469, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 434.0, "completion_length/incorrect/min": 275.0, "completion_length/incorrect/p25": 335.0, "completion_length/incorrect/p75": 582.5, "completion_length/incorrect/var": 50808.0234375, "completion_length/max": 1024.0, "completion_length/median": 417.0, "completion_length/min": 275.0, "completion_length/p25": 365.75, "completion_length/p75": 550.5, "completion_length/var": 27722.396484375, "curvature_clip_ratio_token_fisher": 0.0, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0, "curvature_clip_ratio_total_full": 0.0, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1536, "feature_vector_variance/max_squared_error": 66834.6796875, "feature_vector_variance/metric": 31160.552734375, "generated_tokens/total": 5215437.0, "global_fisher_curvature": 151552.0, "global_fisher_curvature/max": 151552.0, "global_fisher_curvature/median": 151552.0, "global_fisher_curvature/min": 151552.0, "global_fisher_curvature/p25": 151552.0, "global_fisher_curvature/p75": 151552.0, "global_fisher_curvature/p85": 151552.0, "global_fisher_curvature/p90": 151552.0, "global_fisher_curvature/p95": 151552.0, "global_fisher_curvature/p99": 151552.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 9.822542779147625e-10, "global_fisher_kl_divergence/max": 9.822542779147625e-10, "global_fisher_kl_divergence/median": 9.822542779147625e-10, "global_fisher_kl_divergence/min": 9.822542779147625e-10, "global_fisher_kl_divergence/p25": 9.822542779147625e-10, "global_fisher_kl_divergence/p75": 9.822542779147625e-10, "global_fisher_kl_divergence/p85": 9.822542779147625e-10, "global_fisher_kl_divergence/p90": 9.822542779147625e-10, "global_fisher_kl_divergence/p95": 9.822542779147625e-10, "global_fisher_kl_divergence/p99": 9.822542779147625e-10, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.00128936767578125, "global_full_update_term/max": 0.00128936767578125, "global_full_update_term/median": 0.00128936767578125, "global_full_update_term/min": 0.00128936767578125, "global_full_update_term/p25": 0.00128936767578125, "global_full_update_term/p75": 0.00128936767578125, "global_full_update_term/p85": 0.00128936767578125, "global_full_update_term/p90": 0.00128936767578125, "global_full_update_term/p95": 0.00128936767578125, "global_full_update_term/p99": 0.00128936767578125, "global_full_update_term/var": NaN, "global_hessian_coeff": 31104.0, "global_hessian_coeff/max": 31104.0, "global_hessian_coeff/median": 31104.0, "global_hessian_coeff/min": 31104.0, "global_hessian_coeff/p25": 31104.0, "global_hessian_coeff/p75": 31104.0, "global_hessian_coeff/p99": 31104.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 31104.0, "global_hessian_coeff_abs/max": 31104.0, "global_hessian_coeff_abs/median": 31104.0, "global_hessian_coeff_abs/min": 31104.0, "global_hessian_coeff_abs/p25": 31104.0, "global_hessian_coeff_abs/p75": 31104.0, "global_hessian_coeff_abs/p99": 31104.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.8541772365570068, "learning_rate": 7.298948443822229e-08, "loss": -0.7188, "masked_global_fisher_curvature": 151552.0, "masked_global_fisher_curvature/max": 151552.0, "masked_global_fisher_curvature/median": 151552.0, "masked_global_fisher_curvature/min": 151552.0, "masked_global_fisher_curvature/p25": 151552.0, "masked_global_fisher_curvature/p75": 151552.0, "masked_global_fisher_curvature/p85": 151552.0, "masked_global_fisher_curvature/p90": 151552.0, "masked_global_fisher_curvature/p95": 151552.0, "masked_global_fisher_curvature/p99": 151552.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/max": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/median": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/min": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/p25": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/p75": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/p85": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/p90": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/p95": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/p99": 9.822542779147625e-10, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00128936767578125, "masked_global_full_update_term/max": 0.00128936767578125, "masked_global_full_update_term/median": 0.00128936767578125, "masked_global_full_update_term/min": 0.00128936767578125, "masked_global_full_update_term/p25": 0.00128936767578125, "masked_global_full_update_term/p75": 0.00128936767578125, "masked_global_full_update_term/p85": 0.00128936767578125, "masked_global_full_update_term/p90": 0.00128936767578125, "masked_global_full_update_term/p95": 0.00128936767578125, "masked_global_full_update_term/p99": 0.00128936767578125, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 31104.0, "masked_global_hessian_coeff/max": 31104.0, "masked_global_hessian_coeff/median": 31104.0, "masked_global_hessian_coeff/min": 31104.0, "masked_global_hessian_coeff/p25": 31104.0, "masked_global_hessian_coeff/p75": 31104.0, "masked_global_hessian_coeff/p99": 31104.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 31104.0, "masked_global_hessian_coeff_abs/max": 31104.0, "masked_global_hessian_coeff_abs/median": 31104.0, "masked_global_hessian_coeff_abs/min": 31104.0, "masked_global_hessian_coeff_abs/p25": 31104.0, "masked_global_hessian_coeff_abs/p75": 31104.0, "masked_global_hessian_coeff_abs/p99": 31104.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 48.78759765625, "masked_per_sentence_gradient_norm/max": 260.0, "masked_per_sentence_gradient_norm/median": 33.5, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 78.125, "masked_per_sentence_gradient_norm/var": 3414.72705078125, "masked_per_token_gradient_norm": 0.9512550234794617, "masked_per_token_gradient_norm/max": 272.0, "masked_per_token_gradient_norm/median": 5.2295945351943374e-11, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.471489667892456e-07, "masked_per_token_gradient_norm/var": 126.37926483154297, "masked_sentence_fisher_curvature": 437668.84375, "masked_sentence_fisher_curvature/max": 1654784.0, "masked_sentence_fisher_curvature/median": 315392.0, "masked_sentence_fisher_curvature/min": 38.75, "masked_sentence_fisher_curvature/p25": 7216.0, "masked_sentence_fisher_curvature/p75": 722944.0, "masked_sentence_fisher_curvature/p85": 995328.0, "masked_sentence_fisher_curvature/p90": 1114112.0, "masked_sentence_fisher_curvature/p95": 1284096.0, "masked_sentence_fisher_curvature/p99": 1623654.5, "masked_sentence_fisher_curvature/var": 208621305856.0, "masked_sentence_fisher_kl_divergence": 2.841956447952043e-09, "masked_sentence_fisher_kl_divergence/max": 1.076841726899147e-08, "masked_sentence_fisher_kl_divergence/median": 2.051820047199726e-09, "masked_sentence_fisher_kl_divergence/min": 2.5224267119483557e-13, "masked_sentence_fisher_kl_divergence/p25": 4.6782133722444996e-11, "masked_sentence_fisher_kl_divergence/p75": 4.69299266114831e-09, "masked_sentence_fisher_kl_divergence/p85": 6.468326319009066e-09, "masked_sentence_fisher_kl_divergence/p90": 7.232301868498325e-09, "masked_sentence_fisher_kl_divergence/p95": 8.352799341082573e-09, "masked_sentence_fisher_kl_divergence/p99": 1.054722886806303e-08, "masked_sentence_fisher_kl_divergence/var": 8.805471623886213e-18, "masked_sentence_full_gradient_variance/max_squared_error": 5666.38671875, "masked_sentence_full_gradient_variance/metric": 5666.38671875, "masked_sentence_full_gradient_variance/p75": 5666.38671875, "masked_sentence_full_gradient_variance/p90": 5666.38671875, "masked_sentence_full_gradient_variance/p95": 5666.38671875, "masked_sentence_full_gradient_variance/p99": 5666.38671875, "masked_sentence_full_update_term": 0.0005141335423104465, "masked_sentence_full_update_term/max": 0.002197265625, "masked_sentence_full_update_term/median": 0.0003986358642578125, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0008382797241210938, "masked_sentence_full_update_term/p85": 0.00119781494140625, "masked_sentence_full_update_term/p90": 0.0013580322265625, "masked_sentence_full_update_term/p95": 0.0015411376953125, "masked_sentence_full_update_term/p99": 0.0020668033976107836, "masked_sentence_full_update_term/var": 3.258386982452066e-07, "masked_sentence_hessian_coeff": 67396.5, "masked_sentence_hessian_coeff/max": 839680.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -382976.0, "masked_sentence_hessian_coeff/p25": -23168.0, "masked_sentence_hessian_coeff/p75": 128896.0, "masked_sentence_hessian_coeff/p99": 746291.5, "masked_sentence_hessian_coeff/var": 59576737792.0, "masked_sentence_hessian_coeff_abs": 151527.84375, "masked_sentence_hessian_coeff_abs/max": 839680.0, "masked_sentence_hessian_coeff_abs/median": 59136.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 229888.0, "masked_sentence_hessian_coeff_abs/p99": 746291.5, "masked_sentence_hessian_coeff_abs/var": 40964460544.0, "masked_token_fisher_curvature": 456140.21875, "masked_token_fisher_curvature/max": 255852544.0, "masked_token_fisher_curvature/median": 5.2909066017292616e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 7.982292911136717e-23, "masked_token_fisher_curvature/p75": 3.254285729781259e-12, "masked_token_fisher_curvature/p85": 4.773028194904327e-09, "masked_token_fisher_curvature/p90": 1.3783574104309082e-06, "masked_token_fisher_curvature/p95": 0.017578125, "masked_token_fisher_curvature/p99": 897024.0, "masked_token_fisher_curvature/var": 53902959443968.0, "masked_token_fisher_kl_divergence": 2.9606652685032486e-09, "masked_token_fisher_kl_divergence/max": 1.6614794731140137e-06, "masked_token_fisher_kl_divergence/median": 3.4358590207868288e-31, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 5.172175143618065e-37, "masked_token_fisher_kl_divergence/p75": 2.1103606936472508e-26, "masked_token_fisher_kl_divergence/p85": 3.101927297073854e-23, "masked_token_fisher_kl_divergence/p90": 8.946785505373547e-21, "masked_token_fisher_kl_divergence/p95": 1.1449174941446927e-16, "masked_token_fisher_kl_divergence/p99": 5.820766091346741e-09, "masked_token_fisher_kl_divergence/var": 2.2705029435759637e-15, "masked_token_full_update_term": 6.436836429202231e-06, "masked_token_full_update_term/max": 0.00182342529296875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.6391277313232422e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 5.238864897449957e-16, "masked_token_full_update_term/p85": 6.439293542825908e-14, "masked_token_full_update_term/p90": 1.9753088054130785e-12, "masked_token_full_update_term/p95": 7.14678094482224e-10, "masked_token_full_update_term/p99": 8.749589323997498e-05, "masked_token_full_update_term/var": 5.841109373960762e-09, "masked_token_hessian_coeff": 27550.546875, "masked_token_hessian_coeff/max": 252706816.0, "masked_token_hessian_coeff/median": -3.7481129311345285e-13, "masked_token_hessian_coeff/min": -33030144.0, "masked_token_hessian_coeff/p25": -1.1742115020751953e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.6600341796875, "masked_token_hessian_coeff/var": 33736265564160.0, "masked_token_hessian_coeff_abs": 414279.5625, "masked_token_hessian_coeff_abs/max": 252706816.0, "masked_token_hessian_coeff_abs/median": 1.5133991837501526e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 0.00012028217315673828, "masked_token_hessian_coeff_abs/p99": 9306112.0, "masked_token_hessian_coeff_abs/var": 33565398007808.0, "mean_logprobs": -0.0098876953125, "mean_logprobs/var": 5.078315734863281e-05, "num_completions/total": 9216, "per_sentence_gradient_norm": 48.78759765625, "per_sentence_gradient_norm/max": 260.0, "per_sentence_gradient_norm/median": 33.5, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 78.125, "per_sentence_gradient_norm/var": 3414.72705078125, "per_token_feature_norm": 189.9502410888672, "per_token_feature_norm/max": 272.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 111.5, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 128.4999542236328, "per_token_gradient_norm": 0.9512550234794617, "per_token_gradient_norm/max": 272.0, "per_token_gradient_norm/median": 5.2295945351943374e-11, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.471489667892456e-07, "per_token_gradient_norm/var": 126.37926483154297, "per_token_policy_error_norm": 0.005716873332858086, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.005509102251380682, "policy_entropy": 0.010011276230216026, "policy_entropy/max": 2.84375, "policy_entropy/median": 1.9936123862862587e-09, "policy_entropy/min": 6.696854239229312e-21, "policy_entropy/p25": 6.536993168992922e-12, "policy_entropy/p75": 2.2165477275848389e-07, "policy_entropy/var": 0.005915031302720308, "policy_loss": -0.71875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.20427633821964264, "policy_sharpness": 9.71977710723877, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8051304817199707, "reward": 0.71875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.20427633821964264, "rewards/accuracy_reward": 0.71875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.20427633821964264, "sentence_fisher_curvature": 437668.84375, "sentence_fisher_curvature/max": 1654784.0, "sentence_fisher_curvature/median": 315392.0, "sentence_fisher_curvature/min": 38.75, "sentence_fisher_curvature/p25": 7216.0, "sentence_fisher_curvature/p75": 722944.0, "sentence_fisher_curvature/p85": 995328.0, "sentence_fisher_curvature/p90": 1114112.0, "sentence_fisher_curvature/p95": 1284096.0, "sentence_fisher_curvature/p99": 1623654.5, "sentence_fisher_curvature/var": 208621305856.0, "sentence_fisher_kl_divergence": 2.841956447952043e-09, "sentence_fisher_kl_divergence/max": 1.076841726899147e-08, "sentence_fisher_kl_divergence/median": 2.051820047199726e-09, "sentence_fisher_kl_divergence/min": 2.5224267119483557e-13, "sentence_fisher_kl_divergence/p25": 4.6782133722444996e-11, "sentence_fisher_kl_divergence/p75": 4.69299266114831e-09, "sentence_fisher_kl_divergence/p85": 6.468326319009066e-09, "sentence_fisher_kl_divergence/p90": 7.232301868498325e-09, "sentence_fisher_kl_divergence/p95": 8.352799341082573e-09, "sentence_fisher_kl_divergence/p99": 1.054722886806303e-08, "sentence_fisher_kl_divergence/var": 8.805471623886213e-18, "sentence_full_gradient_variance/max_squared_error": 5666.38671875, "sentence_full_gradient_variance/metric": 5666.38671875, "sentence_full_gradient_variance/p75": 5666.38671875, "sentence_full_gradient_variance/p90": 5666.38671875, "sentence_full_gradient_variance/p95": 5666.38671875, "sentence_full_gradient_variance/p99": 5666.38671875, "sentence_full_update_term": 0.0005141335423104465, "sentence_full_update_term/max": 0.002197265625, "sentence_full_update_term/median": 0.0003986358642578125, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0008382797241210938, "sentence_full_update_term/p85": 0.00119781494140625, "sentence_full_update_term/p90": 0.0013580322265625, "sentence_full_update_term/p95": 0.0015411376953125, "sentence_full_update_term/p99": 0.0020668033976107836, "sentence_full_update_term/var": 3.258386982452066e-07, "sentence_hessian_coeff": 67396.5, "sentence_hessian_coeff/max": 839680.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -382976.0, "sentence_hessian_coeff/p25": -23168.0, "sentence_hessian_coeff/p75": 128896.0, "sentence_hessian_coeff/p99": 746291.5, "sentence_hessian_coeff/var": 59576737792.0, "sentence_hessian_coeff_abs": 151527.84375, "sentence_hessian_coeff_abs/max": 839680.0, "sentence_hessian_coeff_abs/median": 59136.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 229888.0, "sentence_hessian_coeff_abs/p99": 746291.5, "sentence_hessian_coeff_abs/var": 40964460544.0, "step": 96, "token_fisher_curvature": 456140.21875, "token_fisher_curvature/max": 255852544.0, "token_fisher_curvature/median": 5.2909066017292616e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 7.982292911136717e-23, "token_fisher_curvature/p75": 3.254285729781259e-12, "token_fisher_curvature/p85": 4.773028194904327e-09, "token_fisher_curvature/p90": 1.3783574104309082e-06, "token_fisher_curvature/p95": 0.017578125, "token_fisher_curvature/p99": 897024.0, "token_fisher_curvature/var": 53902959443968.0, "token_fisher_kl_divergence": 2.9606652685032486e-09, "token_fisher_kl_divergence/max": 1.6614794731140137e-06, "token_fisher_kl_divergence/median": 3.4358590207868288e-31, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.172175143618065e-37, "token_fisher_kl_divergence/p75": 2.1103606936472508e-26, "token_fisher_kl_divergence/p85": 3.101927297073854e-23, "token_fisher_kl_divergence/p90": 8.946785505373547e-21, "token_fisher_kl_divergence/p95": 1.1449174941446927e-16, "token_fisher_kl_divergence/p99": 5.820766091346741e-09, "token_fisher_kl_divergence/var": 2.2705029435759637e-15, "token_full_update_term": 6.436836429202231e-06, "token_full_update_term/max": 0.00182342529296875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.6391277313232422e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 5.238864897449957e-16, "token_full_update_term/p85": 6.439293542825908e-14, "token_full_update_term/p90": 1.9753088054130785e-12, "token_full_update_term/p95": 7.14678094482224e-10, "token_full_update_term/p99": 8.749589323997498e-05, "token_full_update_term/var": 5.841109373960762e-09, "token_hessian_coeff": 27550.546875, "token_hessian_coeff/max": 252706816.0, "token_hessian_coeff/median": -3.7481129311345285e-13, "token_hessian_coeff/min": -33030144.0, "token_hessian_coeff/p25": -1.1742115020751953e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.6600341796875, "token_hessian_coeff/var": 33736265564160.0, "token_hessian_coeff_abs": 414279.5625, "token_hessian_coeff_abs/max": 252706816.0, "token_hessian_coeff_abs/median": 1.5133991837501526e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 0.00012028217315673828, "token_hessian_coeff_abs/p99": 9306112.0, "token_hessian_coeff_abs/var": 33565398007808.0 }, { "accuracy_reward": 0.78125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 1.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.17269736528396606, "adam_stats/lm_head/lr_effective_max": 2.4300271661559236e-07, "adam_stats/lm_head/lr_effective_mean": 1.1909970351522275e-13, "adam_stats/lm_head/lr_effective_min": -2.5550519922035164e-07, "adam_stats/lm_head/lr_effective_std": 8.721251276710973e-09, "adam_stats/lr_effective_max": 2.6670048214327835e-07, "adam_stats/lr_effective_mean": 7.636897841542534e-13, "adam_stats/lr_effective_min": -2.665739486928942e-07, "adam_stats/m_t_max": 0.006401401478797197, "adam_stats/m_t_mean": -6.113833744825214e-11, "adam_stats/m_t_min": -0.005374993197619915, "adam_stats/v_t_max": 2.7786652935901657e-05, "adam_stats/v_t_mean": 3.0229551500893237e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.78125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 1.0, "advantages/p75": 1.0, "advantages/var": 0.17269736528396606, "all_logprobs": -0.009333414025604725, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.5, "all_logprobs/p1": -0.16015625, "all_logprobs/p10": -1.1920928955078125e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -7.574551273137331e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.014040648937225342, "clip_ratio": 0.0, "completion_length": 490.875, "completion_length/correct": 471.6800231933594, "completion_length/correct/max": 1024.0, "completion_length/correct/median": 451.0, "completion_length/correct/min": 195.0, "completion_length/correct/p25": 321.5, "completion_length/correct/p75": 620.5, "completion_length/correct/var": 33655.87109375, "completion_length/incorrect": 559.4285888671875, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 469.0, "completion_length/incorrect/min": 449.0, "completion_length/incorrect/p25": 449.0, "completion_length/incorrect/p75": 557.0, "completion_length/incorrect/var": 23864.85546875, "completion_length/max": 1024.0, "completion_length/median": 469.0, "completion_length/min": 195.0, "completion_length/p25": 372.25, "completion_length/p75": 609.5, "completion_length/var": 32570.07421875, "curvature_clip_ratio_token_fisher": 0.0, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0, "curvature_clip_ratio_total_full": 0.0, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1552, "feature_vector_variance/max_squared_error": 56801.328125, "feature_vector_variance/metric": 31306.7109375, "generated_tokens/total": 5262561.0, "global_fisher_curvature": 208896.0, "global_fisher_curvature/max": 208896.0, "global_fisher_curvature/median": 208896.0, "global_fisher_curvature/min": 208896.0, "global_fisher_curvature/p25": 208896.0, "global_fisher_curvature/p75": 208896.0, "global_fisher_curvature/p85": 208896.0, "global_fisher_curvature/p90": 208896.0, "global_fisher_curvature/p95": 208896.0, "global_fisher_curvature/p99": 208896.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 5.566107574850321e-10, "global_fisher_kl_divergence/max": 5.566107574850321e-10, "global_fisher_kl_divergence/median": 5.566107574850321e-10, "global_fisher_kl_divergence/min": 5.566107574850321e-10, "global_fisher_kl_divergence/p25": 5.566107574850321e-10, "global_fisher_kl_divergence/p75": 5.566107574850321e-10, "global_fisher_kl_divergence/p85": 5.566107574850321e-10, "global_fisher_kl_divergence/p90": 5.566107574850321e-10, "global_fisher_kl_divergence/p95": 5.566107574850321e-10, "global_fisher_kl_divergence/p99": 5.566107574850321e-10, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.0008392333984375, "global_full_update_term/max": 0.0008392333984375, "global_full_update_term/median": 0.0008392333984375, "global_full_update_term/min": 0.0008392333984375, "global_full_update_term/p25": 0.0008392333984375, "global_full_update_term/p75": 0.0008392333984375, "global_full_update_term/p85": 0.0008392333984375, "global_full_update_term/p90": 0.0008392333984375, "global_full_update_term/p95": 0.0008392333984375, "global_full_update_term/p99": 0.0008392333984375, "global_full_update_term/var": NaN, "global_hessian_coeff": 51200.0, "global_hessian_coeff/max": 51200.0, "global_hessian_coeff/median": 51200.0, "global_hessian_coeff/min": 51200.0, "global_hessian_coeff/p25": 51200.0, "global_hessian_coeff/p75": 51200.0, "global_hessian_coeff/p99": 51200.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 51200.0, "global_hessian_coeff_abs/max": 51200.0, "global_hessian_coeff_abs/median": 51200.0, "global_hessian_coeff_abs/min": 51200.0, "global_hessian_coeff_abs/p25": 51200.0, "global_hessian_coeff_abs/p75": 51200.0, "global_hessian_coeff_abs/p99": 51200.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 1.0887542963027954, "learning_rate": 4.108578473795033e-08, "loss": -0.7812, "masked_global_fisher_curvature": 208896.0, "masked_global_fisher_curvature/max": 208896.0, "masked_global_fisher_curvature/median": 208896.0, "masked_global_fisher_curvature/min": 208896.0, "masked_global_fisher_curvature/p25": 208896.0, "masked_global_fisher_curvature/p75": 208896.0, "masked_global_fisher_curvature/p85": 208896.0, "masked_global_fisher_curvature/p90": 208896.0, "masked_global_fisher_curvature/p95": 208896.0, "masked_global_fisher_curvature/p99": 208896.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/max": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/median": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/min": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/p25": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/p75": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/p85": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/p90": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/p95": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/p99": 5.566107574850321e-10, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.0008392333984375, "masked_global_full_update_term/max": 0.0008392333984375, "masked_global_full_update_term/median": 0.0008392333984375, "masked_global_full_update_term/min": 0.0008392333984375, "masked_global_full_update_term/p25": 0.0008392333984375, "masked_global_full_update_term/p75": 0.0008392333984375, "masked_global_full_update_term/p85": 0.0008392333984375, "masked_global_full_update_term/p90": 0.0008392333984375, "masked_global_full_update_term/p95": 0.0008392333984375, "masked_global_full_update_term/p99": 0.0008392333984375, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 51200.0, "masked_global_hessian_coeff/max": 51200.0, "masked_global_hessian_coeff/median": 51200.0, "masked_global_hessian_coeff/min": 51200.0, "masked_global_hessian_coeff/p25": 51200.0, "masked_global_hessian_coeff/p75": 51200.0, "masked_global_hessian_coeff/p99": 51200.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 51200.0, "masked_global_hessian_coeff_abs/max": 51200.0, "masked_global_hessian_coeff_abs/median": 51200.0, "masked_global_hessian_coeff_abs/min": 51200.0, "masked_global_hessian_coeff_abs/p25": 51200.0, "masked_global_hessian_coeff_abs/p75": 51200.0, "masked_global_hessian_coeff_abs/p99": 51200.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 61.9765625, "masked_per_sentence_gradient_norm/max": 242.0, "masked_per_sentence_gradient_norm/median": 53.25, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 18.875, "masked_per_sentence_gradient_norm/p75": 89.25, "masked_per_sentence_gradient_norm/var": 3071.936279296875, "masked_per_token_gradient_norm": 1.1325156688690186, "masked_per_token_gradient_norm/max": 286.0, "masked_per_token_gradient_norm/median": 1.2187229003757238e-10, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 1.3724580723479835e-20, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.3504177331924438e-07, "masked_per_token_gradient_norm/var": 157.876220703125, "masked_sentence_fisher_curvature": 600458.125, "masked_sentence_fisher_curvature/max": 2473984.0, "masked_sentence_fisher_curvature/median": 477184.0, "masked_sentence_fisher_curvature/min": 728.0, "masked_sentence_fisher_curvature/p25": 157696.0, "masked_sentence_fisher_curvature/p75": 996352.0, "masked_sentence_fisher_curvature/p85": 1183744.0, "masked_sentence_fisher_curvature/p90": 1318912.0, "masked_sentence_fisher_curvature/p95": 1554432.0, "masked_sentence_fisher_curvature/p99": 2302771.75, "masked_sentence_fisher_curvature/var": 312465752064.0, "masked_sentence_fisher_kl_divergence": 1.5985247570071692e-09, "masked_sentence_fisher_kl_divergence/max": 6.577465683221817e-09, "masked_sentence_fisher_kl_divergence/median": 1.2732925824820995e-09, "masked_sentence_fisher_kl_divergence/min": 1.9326762412674725e-12, "masked_sentence_fisher_kl_divergence/p25": 4.2018655221909285e-10, "masked_sentence_fisher_kl_divergence/p75": 2.6520865503698587e-09, "masked_sentence_fisher_kl_divergence/p85": 3.1468516681343317e-09, "masked_sentence_fisher_kl_divergence/p90": 3.5070115700364113e-09, "masked_sentence_fisher_kl_divergence/p95": 4.132743924856186e-09, "masked_sentence_fisher_kl_divergence/p99": 6.135088881364936e-09, "masked_sentence_fisher_kl_divergence/var": 2.2141145524158423e-18, "masked_sentence_full_gradient_variance/max_squared_error": 6786.03125, "masked_sentence_full_gradient_variance/metric": 6786.03125, "masked_sentence_full_gradient_variance/p75": 6786.03125, "masked_sentence_full_gradient_variance/p90": 6786.03125, "masked_sentence_full_gradient_variance/p95": 6786.03125, "masked_sentence_full_gradient_variance/p99": 6786.03125, "masked_sentence_full_update_term": 0.000417172908782959, "masked_sentence_full_update_term/max": 0.00160980224609375, "masked_sentence_full_update_term/median": 0.0003509521484375, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.00014400482177734375, "masked_sentence_full_update_term/p75": 0.0006113052368164062, "masked_sentence_full_update_term/p85": 0.0008182525634765625, "masked_sentence_full_update_term/p90": 0.0009326934814453125, "masked_sentence_full_update_term/p95": 0.0010833740234375, "masked_sentence_full_update_term/p99": 0.0014358526095747948, "masked_sentence_full_update_term/var": 1.3579230540017306e-07, "masked_sentence_hessian_coeff": 114938.3359375, "masked_sentence_hessian_coeff/max": 1531904.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -378880.0, "masked_sentence_hessian_coeff/p25": -80896.0, "masked_sentence_hessian_coeff/p75": 235264.0, "masked_sentence_hessian_coeff/p99": 1477427.375, "masked_sentence_hessian_coeff/var": 117704744960.0, "masked_sentence_hessian_coeff_abs": 228242.34375, "masked_sentence_hessian_coeff_abs/max": 1531904.0, "masked_sentence_hessian_coeff_abs/median": 131072.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 50688.0, "masked_sentence_hessian_coeff_abs/p75": 351744.0, "masked_sentence_hessian_coeff_abs/p99": 1477427.375, "masked_sentence_hessian_coeff_abs/var": 78411702272.0, "masked_token_fisher_curvature": 552931.5, "masked_token_fisher_curvature/max": 262144000.0, "masked_token_fisher_curvature/median": 7.752045533271357e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 7.134432783269864e-24, "masked_token_fisher_curvature/p75": 7.602807272633072e-13, "masked_token_fisher_curvature/p85": 8.476490620523691e-10, "masked_token_fisher_curvature/p90": 2.8870999813079834e-07, "masked_token_fisher_curvature/p95": 0.005700111389160156, "masked_token_fisher_curvature/p99": 2310144.0, "masked_token_fisher_curvature/var": 69173870854144.0, "masked_token_fisher_kl_divergence": 1.4716938778747135e-09, "masked_token_fisher_kl_divergence/max": 6.966292858123779e-07, "masked_token_fisher_kl_divergence/median": 2.0607450404943424e-32, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 1.900994770470418e-38, "masked_token_fisher_kl_divergence/p75": 2.0194839173657902e-27, "masked_token_fisher_kl_divergence/p85": 2.261821987449685e-24, "masked_token_fisher_kl_divergence/p90": 7.676236084492097e-22, "masked_token_fisher_kl_divergence/p95": 1.518814777721686e-17, "masked_token_fisher_kl_divergence/p99": 6.1409082263708115e-09, "masked_token_fisher_kl_divergence/var": 4.899738845753345e-16, "masked_token_full_update_term": 4.8645415517967194e-06, "masked_token_full_update_term/max": 0.00118255615234375, "masked_token_full_update_term/median": 1.2730826615073942e-24, "masked_token_full_update_term/min": -7.217749953269958e-08, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 3.8250652645288596e-16, "masked_token_full_update_term/p85": 3.5083047578154947e-14, "masked_token_full_update_term/p90": 9.059419880941277e-13, "masked_token_full_update_term/p95": 5.227605015534209e-10, "masked_token_full_update_term/p99": 0.00010919570922851562, "masked_token_full_update_term/var": 2.916215713355541e-09, "masked_token_hessian_coeff": 59497.8125, "masked_token_hessian_coeff/max": 257949696.0, "masked_token_hessian_coeff/median": -9.237055564881302e-13, "masked_token_hessian_coeff/min": -34078720.0, "masked_token_hessian_coeff/p25": -1.245737075805664e-05, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 1.3515625, "masked_token_hessian_coeff/var": 45619464896512.0, "masked_token_hessian_coeff_abs": 504266.125, "masked_token_hessian_coeff_abs/max": 257949696.0, "masked_token_hessian_coeff_abs/median": 4.0512531995773315e-08, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 1.2034644114589099e-17, "masked_token_hessian_coeff_abs/p75": 0.00012874603271484375, "masked_token_hessian_coeff_abs/p99": 14221312.0, "masked_token_hessian_coeff_abs/var": 45368712626176.0, "mean_logprobs": -0.00982666015625, "mean_logprobs/var": 3.9577484130859375e-05, "num_completions/total": 9312, "per_sentence_gradient_norm": 61.9765625, "per_sentence_gradient_norm/max": 242.0, "per_sentence_gradient_norm/median": 53.25, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 18.875, "per_sentence_gradient_norm/p75": 89.25, "per_sentence_gradient_norm/var": 3071.936279296875, "per_token_feature_norm": 190.3101043701172, "per_token_feature_norm/max": 252.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 104.5, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 197.0, "per_token_feature_norm/var": 148.25277709960938, "per_token_gradient_norm": 1.1325156688690186, "per_token_gradient_norm/max": 286.0, "per_token_gradient_norm/median": 1.2187229003757238e-10, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 1.3724580723479835e-20, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.3504177331924438e-07, "per_token_gradient_norm/var": 157.876220703125, "per_token_policy_error_norm": 0.005589060485363007, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0055660889483988285, "policy_entropy": 0.009407815523445606, "policy_entropy/max": 1.453125, "policy_entropy/median": 7.785274647176266e-10, "policy_entropy/min": 9.85999290163209e-22, "policy_entropy/p25": 1.3997691894473974e-12, "policy_entropy/p75": 1.0663643479347229e-07, "policy_entropy/var": 0.004955420736223459, "policy_loss": -0.78125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": -1.0, "policy_loss/var": 0.17269736528396606, "policy_sharpness": 9.741031646728516, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.6752285957336426, "reward": 0.78125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 1.0, "reward/p75": 1.0, "reward/var": 0.17269736528396606, "rewards/accuracy_reward": 0.78125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 1.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.17269736528396606, "sentence_fisher_curvature": 600458.125, "sentence_fisher_curvature/max": 2473984.0, "sentence_fisher_curvature/median": 477184.0, "sentence_fisher_curvature/min": 728.0, "sentence_fisher_curvature/p25": 157696.0, "sentence_fisher_curvature/p75": 996352.0, "sentence_fisher_curvature/p85": 1183744.0, "sentence_fisher_curvature/p90": 1318912.0, "sentence_fisher_curvature/p95": 1554432.0, "sentence_fisher_curvature/p99": 2302771.75, "sentence_fisher_curvature/var": 312465752064.0, "sentence_fisher_kl_divergence": 1.5985247570071692e-09, "sentence_fisher_kl_divergence/max": 6.577465683221817e-09, "sentence_fisher_kl_divergence/median": 1.2732925824820995e-09, "sentence_fisher_kl_divergence/min": 1.9326762412674725e-12, "sentence_fisher_kl_divergence/p25": 4.2018655221909285e-10, "sentence_fisher_kl_divergence/p75": 2.6520865503698587e-09, "sentence_fisher_kl_divergence/p85": 3.1468516681343317e-09, "sentence_fisher_kl_divergence/p90": 3.5070115700364113e-09, "sentence_fisher_kl_divergence/p95": 4.132743924856186e-09, "sentence_fisher_kl_divergence/p99": 6.135088881364936e-09, "sentence_fisher_kl_divergence/var": 2.2141145524158423e-18, "sentence_full_gradient_variance/max_squared_error": 6786.03125, "sentence_full_gradient_variance/metric": 6786.03125, "sentence_full_gradient_variance/p75": 6786.03125, "sentence_full_gradient_variance/p90": 6786.03125, "sentence_full_gradient_variance/p95": 6786.03125, "sentence_full_gradient_variance/p99": 6786.03125, "sentence_full_update_term": 0.000417172908782959, "sentence_full_update_term/max": 0.00160980224609375, "sentence_full_update_term/median": 0.0003509521484375, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.00014400482177734375, "sentence_full_update_term/p75": 0.0006113052368164062, "sentence_full_update_term/p85": 0.0008182525634765625, "sentence_full_update_term/p90": 0.0009326934814453125, "sentence_full_update_term/p95": 0.0010833740234375, "sentence_full_update_term/p99": 0.0014358526095747948, "sentence_full_update_term/var": 1.3579230540017306e-07, "sentence_hessian_coeff": 114938.3359375, "sentence_hessian_coeff/max": 1531904.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -378880.0, "sentence_hessian_coeff/p25": -80896.0, "sentence_hessian_coeff/p75": 235264.0, "sentence_hessian_coeff/p99": 1477427.375, "sentence_hessian_coeff/var": 117704744960.0, "sentence_hessian_coeff_abs": 228242.34375, "sentence_hessian_coeff_abs/max": 1531904.0, "sentence_hessian_coeff_abs/median": 131072.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 50688.0, "sentence_hessian_coeff_abs/p75": 351744.0, "sentence_hessian_coeff_abs/p99": 1477427.375, "sentence_hessian_coeff_abs/var": 78411702272.0, "step": 97, "token_fisher_curvature": 552931.5, "token_fisher_curvature/max": 262144000.0, "token_fisher_curvature/median": 7.752045533271357e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 7.134432783269864e-24, "token_fisher_curvature/p75": 7.602807272633072e-13, "token_fisher_curvature/p85": 8.476490620523691e-10, "token_fisher_curvature/p90": 2.8870999813079834e-07, "token_fisher_curvature/p95": 0.005700111389160156, "token_fisher_curvature/p99": 2310144.0, "token_fisher_curvature/var": 69173870854144.0, "token_fisher_kl_divergence": 1.4716938778747135e-09, "token_fisher_kl_divergence/max": 6.966292858123779e-07, "token_fisher_kl_divergence/median": 2.0607450404943424e-32, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 1.900994770470418e-38, "token_fisher_kl_divergence/p75": 2.0194839173657902e-27, "token_fisher_kl_divergence/p85": 2.261821987449685e-24, "token_fisher_kl_divergence/p90": 7.676236084492097e-22, "token_fisher_kl_divergence/p95": 1.518814777721686e-17, "token_fisher_kl_divergence/p99": 6.1409082263708115e-09, "token_fisher_kl_divergence/var": 4.899738845753345e-16, "token_full_update_term": 4.8645415517967194e-06, "token_full_update_term/max": 0.00118255615234375, "token_full_update_term/median": 1.2730826615073942e-24, "token_full_update_term/min": -7.217749953269958e-08, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 3.8250652645288596e-16, "token_full_update_term/p85": 3.5083047578154947e-14, "token_full_update_term/p90": 9.059419880941277e-13, "token_full_update_term/p95": 5.227605015534209e-10, "token_full_update_term/p99": 0.00010919570922851562, "token_full_update_term/var": 2.916215713355541e-09, "token_hessian_coeff": 59497.8125, "token_hessian_coeff/max": 257949696.0, "token_hessian_coeff/median": -9.237055564881302e-13, "token_hessian_coeff/min": -34078720.0, "token_hessian_coeff/p25": -1.245737075805664e-05, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 1.3515625, "token_hessian_coeff/var": 45619464896512.0, "token_hessian_coeff_abs": 504266.125, "token_hessian_coeff_abs/max": 257949696.0, "token_hessian_coeff_abs/median": 4.0512531995773315e-08, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 1.2034644114589099e-17, "token_hessian_coeff_abs/p75": 0.00012874603271484375, "token_hessian_coeff_abs/p99": 14221312.0, "token_hessian_coeff_abs/var": 45368712626176.0 }, { "accuracy_reward": 0.6875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.21710523962974548, "adam_stats/lm_head/lr_effective_max": 1.0797511151849903e-07, "adam_stats/lm_head/lr_effective_mean": -2.8286826548630517e-13, "adam_stats/lm_head/lr_effective_min": -1.1564672774966311e-07, "adam_stats/lm_head/lr_effective_std": 3.756472910509956e-09, "adam_stats/lr_effective_max": 1.185646993917544e-07, "adam_stats/lr_effective_mean": -9.958404408169294e-14, "adam_stats/lr_effective_min": -1.1812101519126372e-07, "adam_stats/m_t_max": 0.004412383772432804, "adam_stats/m_t_mean": -8.31318625049704e-11, "adam_stats/m_t_min": -0.004465179517865181, "adam_stats/v_t_max": 2.7772728572017513e-05, "adam_stats/v_t_mean": 3.0781174050614757e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.6875, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.21710523962974548, "all_logprobs": -0.006367179565131664, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -6.5, "all_logprobs/p1": -0.06201660633087158, "all_logprobs/p10": -4.76837158203125e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -2.753734588623047e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.010276582092046738, "clip_ratio": 0.0, "completion_length": 428.1145935058594, "completion_length/correct": 341.93939208984375, "completion_length/correct/max": 883.0, "completion_length/correct/median": 307.0, "completion_length/correct/min": 231.0, "completion_length/correct/p25": 254.0, "completion_length/correct/p75": 326.75, "completion_length/correct/var": 16416.3046875, "completion_length/incorrect": 617.7000122070312, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 531.0, "completion_length/incorrect/min": 287.0, "completion_length/incorrect/p25": 398.0, "completion_length/incorrect/p75": 818.0, "completion_length/incorrect/var": 64326.7734375, "completion_length/max": 1024.0, "completion_length/median": 320.0, "completion_length/min": 231.0, "completion_length/p25": 265.0, "completion_length/p75": 492.0, "completion_length/var": 47378.33203125, "curvature_clip_ratio_token_fisher": 0.0, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0, "curvature_clip_ratio_total_full": 0.0, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1568, "feature_vector_variance/max_squared_error": 58133.7734375, "feature_vector_variance/metric": 30922.69921875, "generated_tokens/total": 5303660.0, "global_fisher_curvature": 125440.0, "global_fisher_curvature/max": 125440.0, "global_fisher_curvature/median": 125440.0, "global_fisher_curvature/min": 125440.0, "global_fisher_curvature/p25": 125440.0, "global_fisher_curvature/p75": 125440.0, "global_fisher_curvature/p85": 125440.0, "global_fisher_curvature/p90": 125440.0, "global_fisher_curvature/p95": 125440.0, "global_fisher_curvature/p99": 125440.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.0595613275654614e-10, "global_fisher_kl_divergence/max": 1.0595613275654614e-10, "global_fisher_kl_divergence/median": 1.0595613275654614e-10, "global_fisher_kl_divergence/min": 1.0595613275654614e-10, "global_fisher_kl_divergence/p25": 1.0595613275654614e-10, "global_fisher_kl_divergence/p75": 1.0595613275654614e-10, "global_fisher_kl_divergence/p85": 1.0595613275654614e-10, "global_fisher_kl_divergence/p90": 1.0595613275654614e-10, "global_fisher_kl_divergence/p95": 1.0595613275654614e-10, "global_fisher_kl_divergence/p99": 1.0595613275654614e-10, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 0.00014781951904296875, "global_full_update_term/max": 0.00014781951904296875, "global_full_update_term/median": 0.00014781951904296875, "global_full_update_term/min": 0.00014781951904296875, "global_full_update_term/p25": 0.00014781951904296875, "global_full_update_term/p75": 0.00014781951904296875, "global_full_update_term/p85": 0.00014781951904296875, "global_full_update_term/p90": 0.00014781951904296875, "global_full_update_term/p95": 0.00014781951904296875, "global_full_update_term/p99": 0.00014781951904296875, "global_full_update_term/var": NaN, "global_hessian_coeff": 11392.0, "global_hessian_coeff/max": 11392.0, "global_hessian_coeff/median": 11392.0, "global_hessian_coeff/min": 11392.0, "global_hessian_coeff/p25": 11392.0, "global_hessian_coeff/p75": 11392.0, "global_hessian_coeff/p99": 11392.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 11392.0, "global_hessian_coeff_abs/max": 11392.0, "global_hessian_coeff_abs/median": 11392.0, "global_hessian_coeff_abs/min": 11392.0, "global_hessian_coeff_abs/p25": 11392.0, "global_hessian_coeff_abs/p75": 11392.0, "global_hessian_coeff_abs/p99": 11392.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.6518873572349548, "learning_rate": 1.8269623051318517e-08, "loss": -0.6875, "masked_global_fisher_curvature": 125440.0, "masked_global_fisher_curvature/max": 125440.0, "masked_global_fisher_curvature/median": 125440.0, "masked_global_fisher_curvature/min": 125440.0, "masked_global_fisher_curvature/p25": 125440.0, "masked_global_fisher_curvature/p75": 125440.0, "masked_global_fisher_curvature/p85": 125440.0, "masked_global_fisher_curvature/p90": 125440.0, "masked_global_fisher_curvature/p95": 125440.0, "masked_global_fisher_curvature/p99": 125440.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/max": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/median": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/min": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/p25": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/p75": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/p85": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/p90": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/p95": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/p99": 1.0595613275654614e-10, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 0.00014781951904296875, "masked_global_full_update_term/max": 0.00014781951904296875, "masked_global_full_update_term/median": 0.00014781951904296875, "masked_global_full_update_term/min": 0.00014781951904296875, "masked_global_full_update_term/p25": 0.00014781951904296875, "masked_global_full_update_term/p75": 0.00014781951904296875, "masked_global_full_update_term/p85": 0.00014781951904296875, "masked_global_full_update_term/p90": 0.00014781951904296875, "masked_global_full_update_term/p95": 0.00014781951904296875, "masked_global_full_update_term/p99": 0.00014781951904296875, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 11392.0, "masked_global_hessian_coeff/max": 11392.0, "masked_global_hessian_coeff/median": 11392.0, "masked_global_hessian_coeff/min": 11392.0, "masked_global_hessian_coeff/p25": 11392.0, "masked_global_hessian_coeff/p75": 11392.0, "masked_global_hessian_coeff/p99": 11392.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 11392.0, "masked_global_hessian_coeff_abs/max": 11392.0, "masked_global_hessian_coeff_abs/median": 11392.0, "masked_global_hessian_coeff_abs/min": 11392.0, "masked_global_hessian_coeff_abs/p25": 11392.0, "masked_global_hessian_coeff_abs/p75": 11392.0, "masked_global_hessian_coeff_abs/p99": 11392.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 24.846355438232422, "masked_per_sentence_gradient_norm/max": 165.0, "masked_per_sentence_gradient_norm/median": 10.75, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 31.09375, "masked_per_sentence_gradient_norm/var": 1263.4073486328125, "masked_per_token_gradient_norm": 0.4537426829338074, "masked_per_token_gradient_norm/max": 278.0, "masked_per_token_gradient_norm/median": 6.13398221105399e-15, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 4.743924364447594e-09, "masked_per_token_gradient_norm/var": 60.84831237792969, "masked_sentence_fisher_curvature": 273015.9375, "masked_sentence_fisher_curvature/max": 3145728.0, "masked_sentence_fisher_curvature/median": 113664.0, "masked_sentence_fisher_curvature/min": 564.0, "masked_sentence_fisher_curvature/p25": 11056.0, "masked_sentence_fisher_curvature/p75": 378880.0, "masked_sentence_fisher_curvature/p85": 435712.0, "masked_sentence_fisher_curvature/p90": 550912.0, "masked_sentence_fisher_curvature/p95": 808960.0, "masked_sentence_fisher_curvature/p99": 2274102.0, "masked_sentence_fisher_curvature/var": 211831914496.0, "masked_sentence_fisher_kl_divergence": 2.3033945584227666e-10, "masked_sentence_fisher_kl_divergence/max": 2.648448571562767e-09, "masked_sentence_fisher_kl_divergence/median": 9.595169103704393e-11, "masked_sentence_fisher_kl_divergence/min": 4.760636329592671e-13, "masked_sentence_fisher_kl_divergence/p25": 9.322320693172514e-12, "masked_sentence_fisher_kl_divergence/p75": 3.2014213502407074e-10, "masked_sentence_fisher_kl_divergence/p85": 3.67435859516263e-10, "masked_sentence_fisher_kl_divergence/p90": 4.6475179260596633e-10, "masked_sentence_fisher_kl_divergence/p95": 6.830305210314691e-10, "masked_sentence_fisher_kl_divergence/p99": 1.9157619934873082e-09, "masked_sentence_fisher_kl_divergence/var": 1.5051021704375728e-19, "masked_sentence_full_gradient_variance/max_squared_error": 1810.0882568359375, "masked_sentence_full_gradient_variance/metric": 1810.0882568359375, "masked_sentence_full_gradient_variance/p75": 1810.0882568359375, "masked_sentence_full_gradient_variance/p90": 1810.0882568359375, "masked_sentence_full_gradient_variance/p95": 1810.0882568359375, "masked_sentence_full_gradient_variance/p99": 1810.0882568359375, "masked_sentence_full_update_term": 7.536200428148732e-05, "masked_sentence_full_update_term/max": 0.0003814697265625, "masked_sentence_full_update_term/median": 2.574920654296875e-05, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 0.0001354217529296875, "masked_sentence_full_update_term/p85": 0.00014281272888183594, "masked_sentence_full_update_term/p90": 0.00019884109497070312, "masked_sentence_full_update_term/p95": 0.0002779960632324219, "masked_sentence_full_update_term/p99": 0.0003814697265625, "masked_sentence_full_update_term/var": 9.078091167680213e-09, "masked_sentence_hessian_coeff": 34052.5, "masked_sentence_hessian_coeff/max": 2506752.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -475136.0, "masked_sentence_hessian_coeff/p25": -96000.0, "masked_sentence_hessian_coeff/p75": 35648.0, "masked_sentence_hessian_coeff/p99": 1401654.75, "masked_sentence_hessian_coeff/var": 142601633792.0, "masked_sentence_hessian_coeff_abs": 184838.171875, "masked_sentence_hessian_coeff_abs/max": 2506752.0, "masked_sentence_hessian_coeff_abs/median": 93184.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 239616.0, "masked_sentence_hessian_coeff_abs/p99": 1401654.75, "masked_sentence_hessian_coeff_abs/var": 109248618496.0, "masked_token_fisher_curvature": 223616.359375, "masked_token_fisher_curvature/max": 257949696.0, "masked_token_fisher_curvature/median": 1.734723475976807e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 2.3393701698765314e-24, "masked_token_fisher_curvature/p75": 3.268496584496461e-13, "masked_token_fisher_curvature/p85": 3.212363708371413e-10, "masked_token_fisher_curvature/p90": 3.827699401881546e-08, "masked_token_fisher_curvature/p95": 0.000218857079744339, "masked_token_fisher_curvature/p99": 60456.0, "masked_token_fisher_curvature/var": 28587312807936.0, "masked_token_fisher_kl_divergence": 1.8877779894843627e-10, "masked_token_fisher_kl_divergence/max": 2.1792948246002197e-07, "masked_token_fisher_kl_divergence/median": 1.4625030515190572e-33, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 2.0203809154758067e-39, "masked_token_fisher_kl_divergence/p75": 2.7610131682735413e-28, "masked_token_fisher_kl_divergence/p85": 2.7077492799530186e-25, "masked_token_fisher_kl_divergence/p90": 3.2342438833396604e-23, "masked_token_fisher_kl_divergence/p95": 1.8498405474645868e-19, "masked_token_fisher_kl_divergence/p99": 5.0967230436071986e-11, "masked_token_fisher_kl_divergence/var": 2.0373949832464928e-17, "masked_token_full_update_term": 1.1074267831645557e-06, "masked_token_full_update_term/max": 0.000659942626953125, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -1.5925616025924683e-07, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 1.7025362239811437e-19, "masked_token_full_update_term/p85": 3.4867941867133823e-16, "masked_token_full_update_term/p90": 1.7385832357108555e-14, "masked_token_full_update_term/p95": 3.410605131648481e-12, "masked_token_full_update_term/p99": 2.3695756681263447e-06, "masked_token_full_update_term/var": 3.6464956054693687e-10, "masked_token_hessian_coeff": 10521.78125, "masked_token_hessian_coeff/max": 252706816.0, "masked_token_hessian_coeff/median": 0.0, "masked_token_hessian_coeff/min": -33554432.0, "masked_token_hessian_coeff/p25": -5.052424967288971e-08, "masked_token_hessian_coeff/p75": -0.0, "masked_token_hessian_coeff/p99": 0.287109375, "masked_token_hessian_coeff/var": 19169930117120.0, "masked_token_hessian_coeff_abs": 231474.03125, "masked_token_hessian_coeff_abs/max": 252706816.0, "masked_token_hessian_coeff_abs/median": 3.225864020350855e-12, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 3.546476364135742e-06, "masked_token_hessian_coeff_abs/p99": 729488.0, "masked_token_hessian_coeff_abs/var": 19116456935424.0, "mean_logprobs": -0.006378173828125, "mean_logprobs/var": 4.3392181396484375e-05, "num_completions/total": 9408, "per_sentence_gradient_norm": 24.846355438232422, "per_sentence_gradient_norm/max": 165.0, "per_sentence_gradient_norm/median": 10.75, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 31.09375, "per_sentence_gradient_norm/var": 1263.4073486328125, "per_token_feature_norm": 190.37158203125, "per_token_feature_norm/max": 258.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 104.0, "per_token_feature_norm/p25": 185.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 133.76170349121094, "per_token_gradient_norm": 0.4537426829338074, "per_token_gradient_norm/max": 278.0, "per_token_gradient_norm/median": 6.13398221105399e-15, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 4.743924364447594e-09, "per_token_gradient_norm/var": 60.84831237792969, "per_token_policy_error_norm": 0.003727327799424529, "per_token_policy_error_norm/max": 2.0, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0037378731649369, "policy_entropy": 0.006420675665140152, "policy_entropy/max": 1.328125, "policy_entropy/median": 4.420144250616431e-10, "policy_entropy/min": 3.441071348220595e-21, "policy_entropy/p25": 8.704148513061227e-13, "policy_entropy/p75": 6.938353180885315e-08, "policy_entropy/var": 0.003354481654241681, "policy_loss": -0.6875, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.21710523962974548, "policy_sharpness": 9.80034351348877, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.2480792999267578, "reward": 0.6875, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.21710523962974548, "rewards/accuracy_reward": 0.6875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.21710523962974548, "sentence_fisher_curvature": 273015.9375, "sentence_fisher_curvature/max": 3145728.0, "sentence_fisher_curvature/median": 113664.0, "sentence_fisher_curvature/min": 564.0, "sentence_fisher_curvature/p25": 11056.0, "sentence_fisher_curvature/p75": 378880.0, "sentence_fisher_curvature/p85": 435712.0, "sentence_fisher_curvature/p90": 550912.0, "sentence_fisher_curvature/p95": 808960.0, "sentence_fisher_curvature/p99": 2274102.0, "sentence_fisher_curvature/var": 211831914496.0, "sentence_fisher_kl_divergence": 2.3033945584227666e-10, "sentence_fisher_kl_divergence/max": 2.648448571562767e-09, "sentence_fisher_kl_divergence/median": 9.595169103704393e-11, "sentence_fisher_kl_divergence/min": 4.760636329592671e-13, "sentence_fisher_kl_divergence/p25": 9.322320693172514e-12, "sentence_fisher_kl_divergence/p75": 3.2014213502407074e-10, "sentence_fisher_kl_divergence/p85": 3.67435859516263e-10, "sentence_fisher_kl_divergence/p90": 4.6475179260596633e-10, "sentence_fisher_kl_divergence/p95": 6.830305210314691e-10, "sentence_fisher_kl_divergence/p99": 1.9157619934873082e-09, "sentence_fisher_kl_divergence/var": 1.5051021704375728e-19, "sentence_full_gradient_variance/max_squared_error": 1810.0882568359375, "sentence_full_gradient_variance/metric": 1810.0882568359375, "sentence_full_gradient_variance/p75": 1810.0882568359375, "sentence_full_gradient_variance/p90": 1810.0882568359375, "sentence_full_gradient_variance/p95": 1810.0882568359375, "sentence_full_gradient_variance/p99": 1810.0882568359375, "sentence_full_update_term": 7.536200428148732e-05, "sentence_full_update_term/max": 0.0003814697265625, "sentence_full_update_term/median": 2.574920654296875e-05, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 0.0001354217529296875, "sentence_full_update_term/p85": 0.00014281272888183594, "sentence_full_update_term/p90": 0.00019884109497070312, "sentence_full_update_term/p95": 0.0002779960632324219, "sentence_full_update_term/p99": 0.0003814697265625, "sentence_full_update_term/var": 9.078091167680213e-09, "sentence_hessian_coeff": 34052.5, "sentence_hessian_coeff/max": 2506752.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -475136.0, "sentence_hessian_coeff/p25": -96000.0, "sentence_hessian_coeff/p75": 35648.0, "sentence_hessian_coeff/p99": 1401654.75, "sentence_hessian_coeff/var": 142601633792.0, "sentence_hessian_coeff_abs": 184838.171875, "sentence_hessian_coeff_abs/max": 2506752.0, "sentence_hessian_coeff_abs/median": 93184.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 239616.0, "sentence_hessian_coeff_abs/p99": 1401654.75, "sentence_hessian_coeff_abs/var": 109248618496.0, "step": 98, "token_fisher_curvature": 223616.359375, "token_fisher_curvature/max": 257949696.0, "token_fisher_curvature/median": 1.734723475976807e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 2.3393701698765314e-24, "token_fisher_curvature/p75": 3.268496584496461e-13, "token_fisher_curvature/p85": 3.212363708371413e-10, "token_fisher_curvature/p90": 3.827699401881546e-08, "token_fisher_curvature/p95": 0.000218857079744339, "token_fisher_curvature/p99": 60456.0, "token_fisher_curvature/var": 28587312807936.0, "token_fisher_kl_divergence": 1.8877779894843627e-10, "token_fisher_kl_divergence/max": 2.1792948246002197e-07, "token_fisher_kl_divergence/median": 1.4625030515190572e-33, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 2.0203809154758067e-39, "token_fisher_kl_divergence/p75": 2.7610131682735413e-28, "token_fisher_kl_divergence/p85": 2.7077492799530186e-25, "token_fisher_kl_divergence/p90": 3.2342438833396604e-23, "token_fisher_kl_divergence/p95": 1.8498405474645868e-19, "token_fisher_kl_divergence/p99": 5.0967230436071986e-11, "token_fisher_kl_divergence/var": 2.0373949832464928e-17, "token_full_update_term": 1.1074267831645557e-06, "token_full_update_term/max": 0.000659942626953125, "token_full_update_term/median": 0.0, "token_full_update_term/min": -1.5925616025924683e-07, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 1.7025362239811437e-19, "token_full_update_term/p85": 3.4867941867133823e-16, "token_full_update_term/p90": 1.7385832357108555e-14, "token_full_update_term/p95": 3.410605131648481e-12, "token_full_update_term/p99": 2.3695756681263447e-06, "token_full_update_term/var": 3.6464956054693687e-10, "token_hessian_coeff": 10521.78125, "token_hessian_coeff/max": 252706816.0, "token_hessian_coeff/median": 0.0, "token_hessian_coeff/min": -33554432.0, "token_hessian_coeff/p25": -5.052424967288971e-08, "token_hessian_coeff/p75": -0.0, "token_hessian_coeff/p99": 0.287109375, "token_hessian_coeff/var": 19169930117120.0, "token_hessian_coeff_abs": 231474.03125, "token_hessian_coeff_abs/max": 252706816.0, "token_hessian_coeff_abs/median": 3.225864020350855e-12, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 3.546476364135742e-06, "token_hessian_coeff_abs/p99": 729488.0, "token_hessian_coeff_abs/var": 19116456935424.0 }, { "accuracy_reward": 0.53125, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 1.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.25164473056793213, "adam_stats/lm_head/lr_effective_max": 2.708793189754033e-08, "adam_stats/lm_head/lr_effective_mean": -5.141542844689512e-14, "adam_stats/lm_head/lr_effective_min": -2.952033462122472e-08, "adam_stats/lm_head/lr_effective_std": 9.328574579470228e-10, "adam_stats/lr_effective_max": 3.012282334680094e-08, "adam_stats/lr_effective_mean": 3.9595188700046816e-14, "adam_stats/lr_effective_min": -3.0212081725267126e-08, "adam_stats/m_t_max": 0.003490056376904249, "adam_stats/m_t_mean": -7.781116029281776e-11, "adam_stats/m_t_min": -0.004257259424775839, "adam_stats/v_t_max": 2.7868351025972515e-05, "adam_stats/v_t_mean": 3.142875933462297e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.53125, "advantages/max": 1.0, "advantages/median": 1.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.25164473056793213, "all_logprobs": -0.00790372770279646, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -5.75, "all_logprobs/p1": -0.1005859375, "all_logprobs/p10": -3.5762786865234375e-07, "all_logprobs/p25": 0.0, "all_logprobs/p5": -2.0623207092285156e-05, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.012089135125279427, "clip_ratio": 0.0, "completion_length": 599.6771240234375, "completion_length/correct": 542.0980834960938, "completion_length/correct/max": 989.0, "completion_length/correct/median": 429.0, "completion_length/correct/min": 306.0, "completion_length/correct/p25": 384.5, "completion_length/correct/p75": 718.0, "completion_length/correct/var": 41971.2109375, "completion_length/incorrect": 664.933349609375, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 595.0, "completion_length/incorrect/min": 242.0, "completion_length/incorrect/p25": 526.0, "completion_length/incorrect/p75": 967.0, "completion_length/incorrect/var": 78216.5703125, "completion_length/max": 1024.0, "completion_length/median": 569.0, "completion_length/min": 242.0, "completion_length/p25": 412.0, "completion_length/p75": 785.25, "completion_length/var": 62113.671875, "curvature_clip_ratio_token_fisher": 0.0, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0, "curvature_clip_ratio_total_full": 0.0, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.1584, "feature_vector_variance/max_squared_error": 61370.82421875, "feature_vector_variance/metric": 30969.578125, "generated_tokens/total": 5361229.0, "global_fisher_curvature": 128000.0, "global_fisher_curvature/max": 128000.0, "global_fisher_curvature/median": 128000.0, "global_fisher_curvature/min": 128000.0, "global_fisher_curvature/p25": 128000.0, "global_fisher_curvature/p75": 128000.0, "global_fisher_curvature/p85": 128000.0, "global_fisher_curvature/p90": 128000.0, "global_fisher_curvature/p95": 128000.0, "global_fisher_curvature/p99": 128000.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 2.1373125491663814e-11, "global_fisher_kl_divergence/max": 2.1373125491663814e-11, "global_fisher_kl_divergence/median": 2.1373125491663814e-11, "global_fisher_kl_divergence/min": 2.1373125491663814e-11, "global_fisher_kl_divergence/p25": 2.1373125491663814e-11, "global_fisher_kl_divergence/p75": 2.1373125491663814e-11, "global_fisher_kl_divergence/p85": 2.1373125491663814e-11, "global_fisher_kl_divergence/p90": 2.1373125491663814e-11, "global_fisher_kl_divergence/p95": 2.1373125491663814e-11, "global_fisher_kl_divergence/p99": 2.1373125491663814e-11, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 6.914138793945312e-05, "global_full_update_term/max": 6.914138793945312e-05, "global_full_update_term/median": 6.914138793945312e-05, "global_full_update_term/min": 6.914138793945312e-05, "global_full_update_term/p25": 6.914138793945312e-05, "global_full_update_term/p75": 6.914138793945312e-05, "global_full_update_term/p85": 6.914138793945312e-05, "global_full_update_term/p90": 6.914138793945312e-05, "global_full_update_term/p95": 6.914138793945312e-05, "global_full_update_term/p99": 6.914138793945312e-05, "global_full_update_term/var": NaN, "global_hessian_coeff": 46336.0, "global_hessian_coeff/max": 46336.0, "global_hessian_coeff/median": 46336.0, "global_hessian_coeff/min": 46336.0, "global_hessian_coeff/p25": 46336.0, "global_hessian_coeff/p75": 46336.0, "global_hessian_coeff/p99": 46336.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 46336.0, "global_hessian_coeff_abs/max": 46336.0, "global_hessian_coeff_abs/median": 46336.0, "global_hessian_coeff_abs/min": 46336.0, "global_hessian_coeff_abs/p25": 46336.0, "global_hessian_coeff_abs/p75": 46336.0, "global_hessian_coeff_abs/p99": 46336.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.8601635098457336, "learning_rate": 4.568797356781784e-09, "loss": -0.5312, "masked_global_fisher_curvature": 128000.0, "masked_global_fisher_curvature/max": 128000.0, "masked_global_fisher_curvature/median": 128000.0, "masked_global_fisher_curvature/min": 128000.0, "masked_global_fisher_curvature/p25": 128000.0, "masked_global_fisher_curvature/p75": 128000.0, "masked_global_fisher_curvature/p85": 128000.0, "masked_global_fisher_curvature/p90": 128000.0, "masked_global_fisher_curvature/p95": 128000.0, "masked_global_fisher_curvature/p99": 128000.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/max": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/median": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/min": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/p25": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/p75": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/p85": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/p90": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/p95": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/p99": 2.1373125491663814e-11, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 6.914138793945312e-05, "masked_global_full_update_term/max": 6.914138793945312e-05, "masked_global_full_update_term/median": 6.914138793945312e-05, "masked_global_full_update_term/min": 6.914138793945312e-05, "masked_global_full_update_term/p25": 6.914138793945312e-05, "masked_global_full_update_term/p75": 6.914138793945312e-05, "masked_global_full_update_term/p85": 6.914138793945312e-05, "masked_global_full_update_term/p90": 6.914138793945312e-05, "masked_global_full_update_term/p95": 6.914138793945312e-05, "masked_global_full_update_term/p99": 6.914138793945312e-05, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 46336.0, "masked_global_hessian_coeff/max": 46336.0, "masked_global_hessian_coeff/median": 46336.0, "masked_global_hessian_coeff/min": 46336.0, "masked_global_hessian_coeff/p25": 46336.0, "masked_global_hessian_coeff/p75": 46336.0, "masked_global_hessian_coeff/p99": 46336.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 46336.0, "masked_global_hessian_coeff_abs/max": 46336.0, "masked_global_hessian_coeff_abs/median": 46336.0, "masked_global_hessian_coeff_abs/min": 46336.0, "masked_global_hessian_coeff_abs/p25": 46336.0, "masked_global_hessian_coeff_abs/p75": 46336.0, "masked_global_hessian_coeff_abs/p99": 46336.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 29.955730438232422, "masked_per_sentence_gradient_norm/max": 192.0, "masked_per_sentence_gradient_norm/median": 16.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 46.9375, "masked_per_sentence_gradient_norm/var": 1768.3363037109375, "masked_per_token_gradient_norm": 0.5747005343437195, "masked_per_token_gradient_norm/max": 290.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 1.229636836796999e-09, "masked_per_token_gradient_norm/var": 82.84355163574219, "masked_sentence_fisher_curvature": 320769.9375, "masked_sentence_fisher_curvature/max": 1433600.0, "masked_sentence_fisher_curvature/median": 132096.0, "masked_sentence_fisher_curvature/min": 454.0, "masked_sentence_fisher_curvature/p25": 7552.0, "masked_sentence_fisher_curvature/p75": 518144.0, "masked_sentence_fisher_curvature/p85": 683008.0, "masked_sentence_fisher_curvature/p90": 874496.0, "masked_sentence_fisher_curvature/p95": 1181696.0, "masked_sentence_fisher_curvature/p99": 1402470.5, "masked_sentence_fisher_curvature/var": 151664738304.0, "masked_sentence_fisher_kl_divergence": 5.356175919457584e-11, "masked_sentence_fisher_kl_divergence/max": 2.4010660126805305e-10, "masked_sentence_fisher_kl_divergence/median": 2.205524651799351e-11, "masked_sentence_fisher_kl_divergence/min": 7.593925488436071e-14, "masked_sentence_fisher_kl_divergence/p25": 1.2594369991347776e-12, "masked_sentence_fisher_kl_divergence/p75": 8.65156835061498e-11, "masked_sentence_fisher_kl_divergence/p85": 1.141415850725025e-10, "masked_sentence_fisher_kl_divergence/p90": 1.4597389963455498e-10, "masked_sentence_fisher_kl_divergence/p95": 1.9690560293383896e-10, "masked_sentence_fisher_kl_divergence/p99": 2.3492249812129273e-10, "masked_sentence_fisher_kl_divergence/var": 4.233245822653221e-21, "masked_sentence_full_gradient_variance/max_squared_error": 2577.76171875, "masked_sentence_full_gradient_variance/metric": 2577.76171875, "masked_sentence_full_gradient_variance/p75": 2577.76171875, "masked_sentence_full_gradient_variance/p90": 2577.76171875, "masked_sentence_full_gradient_variance/p95": 2577.76171875, "masked_sentence_full_gradient_variance/p99": 2577.76171875, "masked_sentence_full_update_term": 4.978477954864502e-05, "masked_sentence_full_update_term/max": 0.0003795623779296875, "masked_sentence_full_update_term/median": 2.467632293701172e-05, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 8.58306884765625e-05, "masked_sentence_full_update_term/p85": 0.00011515617370605469, "masked_sentence_full_update_term/p90": 0.00012087821960449219, "masked_sentence_full_update_term/p95": 0.0001773834228515625, "masked_sentence_full_update_term/p99": 0.00030527138733305037, "masked_sentence_full_update_term/var": 5.05051422905467e-09, "masked_sentence_hessian_coeff": 77796.0, "masked_sentence_hessian_coeff/max": 704512.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -348160.0, "masked_sentence_hessian_coeff/p25": 0.0, "masked_sentence_hessian_coeff/p75": 125952.0, "masked_sentence_hessian_coeff/p99": 634470.625, "masked_sentence_hessian_coeff/var": 35753349120.0, "masked_sentence_hessian_coeff_abs": 107628.0, "masked_sentence_hessian_coeff_abs/max": 704512.0, "masked_sentence_hessian_coeff_abs/median": 3488.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 203520.0, "masked_sentence_hessian_coeff_abs/p99": 634470.625, "masked_sentence_hessian_coeff_abs/var": 30163558400.0, "masked_token_fisher_curvature": 292275.8125, "masked_token_fisher_curvature/max": 251658240.0, "masked_token_fisher_curvature/median": 1.7753810574450135e-18, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 3.5155176033503676e-24, "masked_token_fisher_curvature/p75": 1.0391687510491465e-13, "masked_token_fisher_curvature/p85": 7.257838774421543e-11, "masked_token_fisher_curvature/p90": 1.2887085176771507e-08, "masked_token_fisher_curvature/p95": 8.783116936683655e-05, "masked_token_fisher_curvature/p99": 91136.0, "masked_token_fisher_curvature/var": 37024417447936.0, "masked_token_fisher_kl_divergence": 4.876001338804947e-11, "masked_token_fisher_kl_divergence/max": 4.190951585769653e-08, "masked_token_fisher_kl_divergence/median": 2.96412655503348e-34, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 5.510129769479473e-40, "masked_token_fisher_kl_divergence/p75": 1.735493991486226e-29, "masked_token_fisher_kl_divergence/p85": 1.2137216672504182e-26, "masked_token_fisher_kl_divergence/p90": 2.1506493977986983e-24, "masked_token_fisher_kl_divergence/p95": 1.4674597656996987e-20, "masked_token_fisher_kl_divergence/p99": 1.5234036254696548e-11, "masked_token_fisher_kl_divergence/var": 1.0305438967273787e-18, "masked_token_full_update_term": 6.145971838122932e-07, "masked_token_full_update_term/max": 0.0002899169921875, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -5.471520125865936e-08, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.341043854678289e-21, "masked_token_full_update_term/p85": 3.590911476589881e-17, "masked_token_full_update_term/p90": 1.214306433183765e-15, "masked_token_full_update_term/p95": 4.0323300254385686e-13, "masked_token_full_update_term/p99": 1.7881393432617188e-06, "masked_token_full_update_term/var": 9.547343471361103e-11, "masked_token_hessian_coeff": 44771.25, "masked_token_hessian_coeff/max": 244318208.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -34078720.0, "masked_token_hessian_coeff/p25": -8.149072527885437e-09, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.06689453125, "masked_token_hessian_coeff/var": 22715157184512.0, "masked_token_hessian_coeff_abs": 260702.328125, "masked_token_hessian_coeff_abs/max": 244318208.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 8.083879947662354e-07, "masked_token_hessian_coeff_abs/p99": 1062016.0, "masked_token_hessian_coeff_abs/var": 22649197559808.0, "mean_logprobs": -0.0079345703125, "mean_logprobs/var": 3.552436828613281e-05, "num_completions/total": 9504, "per_sentence_gradient_norm": 29.955730438232422, "per_sentence_gradient_norm/max": 192.0, "per_sentence_gradient_norm/median": 16.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 46.9375, "per_sentence_gradient_norm/var": 1768.3363037109375, "per_token_feature_norm": 190.7418212890625, "per_token_feature_norm/max": 264.0, "per_token_feature_norm/median": 190.0, "per_token_feature_norm/min": 101.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 198.0, "per_token_feature_norm/var": 147.6659393310547, "per_token_gradient_norm": 0.5747005343437195, "per_token_gradient_norm/max": 290.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 1.229636836796999e-09, "per_token_gradient_norm/var": 82.84355163574219, "per_token_policy_error_norm": 0.004756113048642874, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.004877401050180197, "policy_entropy": 0.007743372116237879, "policy_entropy/max": 2.796875, "policy_entropy/median": 4.31100488640368e-10, "policy_entropy/min": 4.489274620447792e-20, "policy_entropy/p25": 1.0018652574217413e-12, "policy_entropy/p75": 5.611218512058258e-08, "policy_entropy/var": 0.0046765743754804134, "policy_loss": -0.53125, "policy_loss/max": 0.0, "policy_loss/median": -1.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.25164473056793213, "policy_sharpness": 9.78945255279541, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.3943309783935547, "reward": 0.53125, "reward/max": 1.0, "reward/median": 1.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.25164473056793213, "rewards/accuracy_reward": 0.53125, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 1.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.25164473056793213, "sentence_fisher_curvature": 320769.9375, "sentence_fisher_curvature/max": 1433600.0, "sentence_fisher_curvature/median": 132096.0, "sentence_fisher_curvature/min": 454.0, "sentence_fisher_curvature/p25": 7552.0, "sentence_fisher_curvature/p75": 518144.0, "sentence_fisher_curvature/p85": 683008.0, "sentence_fisher_curvature/p90": 874496.0, "sentence_fisher_curvature/p95": 1181696.0, "sentence_fisher_curvature/p99": 1402470.5, "sentence_fisher_curvature/var": 151664738304.0, "sentence_fisher_kl_divergence": 5.356175919457584e-11, "sentence_fisher_kl_divergence/max": 2.4010660126805305e-10, "sentence_fisher_kl_divergence/median": 2.205524651799351e-11, "sentence_fisher_kl_divergence/min": 7.593925488436071e-14, "sentence_fisher_kl_divergence/p25": 1.2594369991347776e-12, "sentence_fisher_kl_divergence/p75": 8.65156835061498e-11, "sentence_fisher_kl_divergence/p85": 1.141415850725025e-10, "sentence_fisher_kl_divergence/p90": 1.4597389963455498e-10, "sentence_fisher_kl_divergence/p95": 1.9690560293383896e-10, "sentence_fisher_kl_divergence/p99": 2.3492249812129273e-10, "sentence_fisher_kl_divergence/var": 4.233245822653221e-21, "sentence_full_gradient_variance/max_squared_error": 2577.76171875, "sentence_full_gradient_variance/metric": 2577.76171875, "sentence_full_gradient_variance/p75": 2577.76171875, "sentence_full_gradient_variance/p90": 2577.76171875, "sentence_full_gradient_variance/p95": 2577.76171875, "sentence_full_gradient_variance/p99": 2577.76171875, "sentence_full_update_term": 4.978477954864502e-05, "sentence_full_update_term/max": 0.0003795623779296875, "sentence_full_update_term/median": 2.467632293701172e-05, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 8.58306884765625e-05, "sentence_full_update_term/p85": 0.00011515617370605469, "sentence_full_update_term/p90": 0.00012087821960449219, "sentence_full_update_term/p95": 0.0001773834228515625, "sentence_full_update_term/p99": 0.00030527138733305037, "sentence_full_update_term/var": 5.05051422905467e-09, "sentence_hessian_coeff": 77796.0, "sentence_hessian_coeff/max": 704512.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -348160.0, "sentence_hessian_coeff/p25": 0.0, "sentence_hessian_coeff/p75": 125952.0, "sentence_hessian_coeff/p99": 634470.625, "sentence_hessian_coeff/var": 35753349120.0, "sentence_hessian_coeff_abs": 107628.0, "sentence_hessian_coeff_abs/max": 704512.0, "sentence_hessian_coeff_abs/median": 3488.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 203520.0, "sentence_hessian_coeff_abs/p99": 634470.625, "sentence_hessian_coeff_abs/var": 30163558400.0, "step": 99, "token_fisher_curvature": 292275.8125, "token_fisher_curvature/max": 251658240.0, "token_fisher_curvature/median": 1.7753810574450135e-18, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 3.5155176033503676e-24, "token_fisher_curvature/p75": 1.0391687510491465e-13, "token_fisher_curvature/p85": 7.257838774421543e-11, "token_fisher_curvature/p90": 1.2887085176771507e-08, "token_fisher_curvature/p95": 8.783116936683655e-05, "token_fisher_curvature/p99": 91136.0, "token_fisher_curvature/var": 37024417447936.0, "token_fisher_kl_divergence": 4.876001338804947e-11, "token_fisher_kl_divergence/max": 4.190951585769653e-08, "token_fisher_kl_divergence/median": 2.96412655503348e-34, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 5.510129769479473e-40, "token_fisher_kl_divergence/p75": 1.735493991486226e-29, "token_fisher_kl_divergence/p85": 1.2137216672504182e-26, "token_fisher_kl_divergence/p90": 2.1506493977986983e-24, "token_fisher_kl_divergence/p95": 1.4674597656996987e-20, "token_fisher_kl_divergence/p99": 1.5234036254696548e-11, "token_fisher_kl_divergence/var": 1.0305438967273787e-18, "token_full_update_term": 6.145971838122932e-07, "token_full_update_term/max": 0.0002899169921875, "token_full_update_term/median": 0.0, "token_full_update_term/min": -5.471520125865936e-08, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.341043854678289e-21, "token_full_update_term/p85": 3.590911476589881e-17, "token_full_update_term/p90": 1.214306433183765e-15, "token_full_update_term/p95": 4.0323300254385686e-13, "token_full_update_term/p99": 1.7881393432617188e-06, "token_full_update_term/var": 9.547343471361103e-11, "token_hessian_coeff": 44771.25, "token_hessian_coeff/max": 244318208.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -34078720.0, "token_hessian_coeff/p25": -8.149072527885437e-09, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.06689453125, "token_hessian_coeff/var": 22715157184512.0, "token_hessian_coeff_abs": 260702.328125, "token_hessian_coeff_abs/max": 244318208.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 8.083879947662354e-07, "token_hessian_coeff_abs/p99": 1062016.0, "token_hessian_coeff_abs/var": 22649197559808.0 }, { "accuracy_reward": 0.46875, "accuracy_reward/correct": 1.0, "accuracy_reward/correct/max": 1.0, "accuracy_reward/correct/median": 1.0, "accuracy_reward/correct/min": 1.0, "accuracy_reward/correct/p25": 1.0, "accuracy_reward/correct/p75": 1.0, "accuracy_reward/correct/var": 0.0, "accuracy_reward/incorrect": 0.0, "accuracy_reward/incorrect/max": 0.0, "accuracy_reward/incorrect/median": 0.0, "accuracy_reward/incorrect/min": 0.0, "accuracy_reward/incorrect/p25": 0.0, "accuracy_reward/incorrect/p75": 0.0, "accuracy_reward/incorrect/var": 0.0, "accuracy_reward/max": 1.0, "accuracy_reward/median": 0.0, "accuracy_reward/min": 0.0, "accuracy_reward/p25": 0.0, "accuracy_reward/p75": 1.0, "accuracy_reward/var": 0.25164473056793213, "adam_stats/lm_head/lr_effective_max": 0.0, "adam_stats/lm_head/lr_effective_mean": 0.0, "adam_stats/lm_head/lr_effective_min": 0.0, "adam_stats/lm_head/lr_effective_std": 0.0, "adam_stats/lr_effective_max": 0.0, "adam_stats/lr_effective_mean": 0.0, "adam_stats/lr_effective_min": 0.0, "adam_stats/m_t_max": 0.003916438203305006, "adam_stats/m_t_mean": -5.864969621072191e-11, "adam_stats/m_t_min": -0.0036701092030853033, "adam_stats/v_t_max": 2.7859794499818236e-05, "adam_stats/v_t_mean": 3.196898908791601e-12, "adam_stats/v_t_min": 0.0, "advantages": 0.46875, "advantages/max": 1.0, "advantages/median": 0.0, "advantages/min": 0.0, "advantages/p25": 0.0, "advantages/p75": 1.0, "advantages/var": 0.25164473056793213, "all_logprobs": -0.009668182581663132, "all_logprobs/max": 0.0, "all_logprobs/median": 0.0, "all_logprobs/min": -7.125, "all_logprobs/p1": -0.20414066314697266, "all_logprobs/p10": -1.7881393432617188e-06, "all_logprobs/p25": 0.0, "all_logprobs/p5": -0.00016598706133663654, "all_logprobs/p75": 0.0, "all_logprobs/var": 0.013521862216293812, "clip_ratio": 0.0, "completion_length": 578.125, "completion_length/correct": 548.86669921875, "completion_length/correct/max": 1015.0, "completion_length/correct/median": 511.0, "completion_length/correct/min": 337.0, "completion_length/correct/p25": 429.0, "completion_length/correct/p75": 602.0, "completion_length/correct/var": 28687.482421875, "completion_length/incorrect": 603.9412231445312, "completion_length/incorrect/max": 1024.0, "completion_length/incorrect/median": 536.0, "completion_length/incorrect/min": 308.0, "completion_length/incorrect/p25": 411.0, "completion_length/incorrect/p75": 761.0, "completion_length/incorrect/var": 58153.13671875, "completion_length/max": 1024.0, "completion_length/median": 527.0, "completion_length/min": 308.0, "completion_length/p25": 413.25, "completion_length/p75": 657.25, "completion_length/var": 44657.03515625, "curvature_clip_ratio_token_fisher": 0.0, "curvature_clip_ratio_token_hessian": 0.0, "curvature_clip_ratio_total_fisher": 0.0, "curvature_clip_ratio_total_full": 0.0, "curvature_clip_ratio_total_hessian": 0.0, "epoch": 0.16, "feature_vector_variance/max_squared_error": 67186.578125, "feature_vector_variance/metric": 30947.58203125, "generated_tokens/total": 5416729.0, "global_fisher_curvature": 137216.0, "global_fisher_curvature/max": 137216.0, "global_fisher_curvature/median": 137216.0, "global_fisher_curvature/min": 137216.0, "global_fisher_curvature/p25": 137216.0, "global_fisher_curvature/p75": 137216.0, "global_fisher_curvature/p85": 137216.0, "global_fisher_curvature/p90": 137216.0, "global_fisher_curvature/p95": 137216.0, "global_fisher_curvature/p99": 137216.0, "global_fisher_curvature/var": NaN, "global_fisher_kl_divergence": 1.4352963262354024e-12, "global_fisher_kl_divergence/max": 1.4352963262354024e-12, "global_fisher_kl_divergence/median": 1.4352963262354024e-12, "global_fisher_kl_divergence/min": 1.4352963262354024e-12, "global_fisher_kl_divergence/p25": 1.4352963262354024e-12, "global_fisher_kl_divergence/p75": 1.4352963262354024e-12, "global_fisher_kl_divergence/p85": 1.4352963262354024e-12, "global_fisher_kl_divergence/p90": 1.4352963262354024e-12, "global_fisher_kl_divergence/p95": 1.4352963262354024e-12, "global_fisher_kl_divergence/p99": 1.4352963262354024e-12, "global_fisher_kl_divergence/var": NaN, "global_full_update_term": 4.172325134277344e-05, "global_full_update_term/max": 4.172325134277344e-05, "global_full_update_term/median": 4.172325134277344e-05, "global_full_update_term/min": 4.172325134277344e-05, "global_full_update_term/p25": 4.172325134277344e-05, "global_full_update_term/p75": 4.172325134277344e-05, "global_full_update_term/p85": 4.172325134277344e-05, "global_full_update_term/p90": 4.172325134277344e-05, "global_full_update_term/p95": 4.172325134277344e-05, "global_full_update_term/p99": 4.172325134277344e-05, "global_full_update_term/var": NaN, "global_hessian_coeff": 7552.0, "global_hessian_coeff/max": 7552.0, "global_hessian_coeff/median": 7552.0, "global_hessian_coeff/min": 7552.0, "global_hessian_coeff/p25": 7552.0, "global_hessian_coeff/p75": 7552.0, "global_hessian_coeff/p99": 7552.0, "global_hessian_coeff/var": NaN, "global_hessian_coeff_abs": 7552.0, "global_hessian_coeff_abs/max": 7552.0, "global_hessian_coeff_abs/median": 7552.0, "global_hessian_coeff_abs/min": 7552.0, "global_hessian_coeff_abs/p25": 7552.0, "global_hessian_coeff_abs/p75": 7552.0, "global_hessian_coeff_abs/p99": 7552.0, "global_hessian_coeff_abs/var": NaN, "grad_norm": 0.597848653793335, "learning_rate": 0.0, "loss": -0.4688, "masked_global_fisher_curvature": 137216.0, "masked_global_fisher_curvature/max": 137216.0, "masked_global_fisher_curvature/median": 137216.0, "masked_global_fisher_curvature/min": 137216.0, "masked_global_fisher_curvature/p25": 137216.0, "masked_global_fisher_curvature/p75": 137216.0, "masked_global_fisher_curvature/p85": 137216.0, "masked_global_fisher_curvature/p90": 137216.0, "masked_global_fisher_curvature/p95": 137216.0, "masked_global_fisher_curvature/p99": 137216.0, "masked_global_fisher_curvature/var": NaN, "masked_global_fisher_kl_divergence": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/max": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/median": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/min": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/p25": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/p75": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/p85": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/p90": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/p95": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/p99": 1.4352963262354024e-12, "masked_global_fisher_kl_divergence/var": NaN, "masked_global_full_update_term": 4.172325134277344e-05, "masked_global_full_update_term/max": 4.172325134277344e-05, "masked_global_full_update_term/median": 4.172325134277344e-05, "masked_global_full_update_term/min": 4.172325134277344e-05, "masked_global_full_update_term/p25": 4.172325134277344e-05, "masked_global_full_update_term/p75": 4.172325134277344e-05, "masked_global_full_update_term/p85": 4.172325134277344e-05, "masked_global_full_update_term/p90": 4.172325134277344e-05, "masked_global_full_update_term/p95": 4.172325134277344e-05, "masked_global_full_update_term/p99": 4.172325134277344e-05, "masked_global_full_update_term/var": NaN, "masked_global_hessian_coeff": 7552.0, "masked_global_hessian_coeff/max": 7552.0, "masked_global_hessian_coeff/median": 7552.0, "masked_global_hessian_coeff/min": 7552.0, "masked_global_hessian_coeff/p25": 7552.0, "masked_global_hessian_coeff/p75": 7552.0, "masked_global_hessian_coeff/p99": 7552.0, "masked_global_hessian_coeff/var": NaN, "masked_global_hessian_coeff_abs": 7552.0, "masked_global_hessian_coeff_abs/max": 7552.0, "masked_global_hessian_coeff_abs/median": 7552.0, "masked_global_hessian_coeff_abs/min": 7552.0, "masked_global_hessian_coeff_abs/p25": 7552.0, "masked_global_hessian_coeff_abs/p75": 7552.0, "masked_global_hessian_coeff_abs/p99": 7552.0, "masked_global_hessian_coeff_abs/var": NaN, "masked_per_sentence_gradient_norm": 29.88671875, "masked_per_sentence_gradient_norm/max": 187.0, "masked_per_sentence_gradient_norm/median": 0.0, "masked_per_sentence_gradient_norm/min": 0.0, "masked_per_sentence_gradient_norm/p25": 0.0, "masked_per_sentence_gradient_norm/p75": 50.5625, "masked_per_sentence_gradient_norm/var": 1555.798828125, "masked_per_token_gradient_norm": 0.6376360654830933, "masked_per_token_gradient_norm/max": 272.0, "masked_per_token_gradient_norm/median": 0.0, "masked_per_token_gradient_norm/min": 0.0, "masked_per_token_gradient_norm/p1": 0.0, "masked_per_token_gradient_norm/p10": 0.0, "masked_per_token_gradient_norm/p25": 0.0, "masked_per_token_gradient_norm/p5": 0.0, "masked_per_token_gradient_norm/p75": 9.74978320300579e-10, "masked_per_token_gradient_norm/var": 80.7520980834961, "masked_sentence_fisher_curvature": 299256.84375, "masked_sentence_fisher_curvature/max": 1646592.0, "masked_sentence_fisher_curvature/median": 55040.0, "masked_sentence_fisher_curvature/min": 432.0, "masked_sentence_fisher_curvature/p25": 7104.0, "masked_sentence_fisher_curvature/p75": 526336.0, "masked_sentence_fisher_curvature/p85": 761856.0, "masked_sentence_fisher_curvature/p90": 829440.0, "masked_sentence_fisher_curvature/p95": 1118208.0, "masked_sentence_fisher_curvature/p99": 1452032.625, "masked_sentence_fisher_curvature/var": 155081850880.0, "masked_sentence_fisher_kl_divergence": 3.125117786079157e-12, "masked_sentence_fisher_kl_divergence/max": 1.716671249596402e-11, "masked_sentence_fisher_kl_divergence/median": 5.755396159656812e-13, "masked_sentence_fisher_kl_divergence/min": 4.496403249731884e-15, "masked_sentence_fisher_kl_divergence/p25": 7.394085344003543e-14, "masked_sentence_fisher_kl_divergence/p75": 5.5067062021407764e-12, "masked_sentence_fisher_kl_divergence/p85": 7.958078640513122e-12, "masked_sentence_fisher_kl_divergence/p90": 8.668621376273222e-12, "masked_sentence_fisher_kl_divergence/p95": 1.1695533430611249e-11, "masked_sentence_fisher_kl_divergence/p99": 1.511467166703806e-11, "masked_sentence_fisher_kl_divergence/var": 1.6890163579889575e-23, "masked_sentence_full_gradient_variance/max_squared_error": 2370.05859375, "masked_sentence_full_gradient_variance/metric": 2370.05859375, "masked_sentence_full_gradient_variance/p75": 2370.05859375, "masked_sentence_full_gradient_variance/p90": 2370.05859375, "masked_sentence_full_gradient_variance/p95": 2370.05859375, "masked_sentence_full_gradient_variance/p99": 2370.05859375, "masked_sentence_full_update_term": 1.3470649719238281e-05, "masked_sentence_full_update_term/max": 6.031990051269531e-05, "masked_sentence_full_update_term/median": 0.0, "masked_sentence_full_update_term/min": 0.0, "masked_sentence_full_update_term/p25": 0.0, "masked_sentence_full_update_term/p75": 2.6434659957885742e-05, "masked_sentence_full_update_term/p85": 3.319978713989258e-05, "masked_sentence_full_update_term/p90": 3.802776336669922e-05, "masked_sentence_full_update_term/p95": 4.476308822631836e-05, "masked_sentence_full_update_term/p99": 5.91874158999417e-05, "masked_sentence_full_update_term/var": 2.870946536059904e-10, "masked_sentence_hessian_coeff": 25926.66796875, "masked_sentence_hessian_coeff/max": 872448.0, "masked_sentence_hessian_coeff/median": 0.0, "masked_sentence_hessian_coeff/min": -360448.0, "masked_sentence_hessian_coeff/p25": -25600.0, "masked_sentence_hessian_coeff/p75": 0.0, "masked_sentence_hessian_coeff/p99": 681779.8125, "masked_sentence_hessian_coeff/var": 38736691200.0, "masked_sentence_hessian_coeff_abs": 100625.3359375, "masked_sentence_hessian_coeff_abs/max": 872448.0, "masked_sentence_hessian_coeff_abs/median": 0.0, "masked_sentence_hessian_coeff_abs/min": 0.0, "masked_sentence_hessian_coeff_abs/p25": 0.0, "masked_sentence_hessian_coeff_abs/p75": 149504.0, "masked_sentence_hessian_coeff_abs/p99": 681779.8125, "masked_sentence_hessian_coeff_abs/var": 29183918080.0, "masked_token_fisher_curvature": 304444.40625, "masked_token_fisher_curvature/max": 264241152.0, "masked_token_fisher_curvature/median": 2.8406096919120216e-17, "masked_token_fisher_curvature/min": 0.0, "masked_token_fisher_curvature/p25": 3.494838088036542e-23, "masked_token_fisher_curvature/p75": 2.6574298317427747e-12, "masked_token_fisher_curvature/p85": 2.08092387765646e-09, "masked_token_fisher_curvature/p90": 3.110617399215698e-07, "masked_token_fisher_curvature/p95": 0.01502084732055664, "masked_token_fisher_curvature/p99": 389120.0, "masked_token_fisher_curvature/var": 34165948940288.0, "masked_token_fisher_kl_divergence": 3.1775112055432153e-12, "masked_token_fisher_kl_divergence/max": 2.764863893389702e-09, "masked_token_fisher_kl_divergence/median": 2.96412655503348e-34, "masked_token_fisher_kl_divergence/min": 0.0, "masked_token_fisher_kl_divergence/p25": 3.6734198463196485e-40, "masked_token_fisher_kl_divergence/p75": 2.7807346909040666e-29, "masked_token_fisher_kl_divergence/p85": 2.1709452111682245e-26, "masked_token_fisher_kl_divergence/p90": 3.2440989648564054e-24, "masked_token_fisher_kl_divergence/p95": 1.5678712202575108e-19, "masked_token_fisher_kl_divergence/p99": 4.064304448547773e-12, "masked_token_fisher_kl_divergence/var": 3.720073956617833e-21, "masked_token_full_update_term": 1.746562361404358e-07, "masked_token_full_update_term/max": 7.43865966796875e-05, "masked_token_full_update_term/median": 0.0, "masked_token_full_update_term/min": -3.259629011154175e-08, "masked_token_full_update_term/p25": 0.0, "masked_token_full_update_term/p75": 4.499862532288471e-22, "masked_token_full_update_term/p85": 7.101524229780054e-18, "masked_token_full_update_term/p90": 6.626643678231403e-16, "masked_token_full_update_term/p95": 2.824407374646398e-13, "masked_token_full_update_term/p99": 1.0058283805847168e-06, "masked_token_full_update_term/var": 6.138452159770713e-12, "masked_token_hessian_coeff": -10491.037109375, "masked_token_hessian_coeff/max": 260046848.0, "masked_token_hessian_coeff/median": -0.0, "masked_token_hessian_coeff/min": -34078720.0, "masked_token_hessian_coeff/p25": -2.1245796233415604e-09, "masked_token_hessian_coeff/p75": 0.0, "masked_token_hessian_coeff/p99": 0.05176353454589844, "masked_token_hessian_coeff/var": 21140292501504.0, "masked_token_hessian_coeff_abs": 272381.0, "masked_token_hessian_coeff_abs/max": 260046848.0, "masked_token_hessian_coeff_abs/median": 0.0, "masked_token_hessian_coeff_abs/min": 0.0, "masked_token_hessian_coeff_abs/p25": 0.0, "masked_token_hessian_coeff_abs/p75": 4.76837158203125e-07, "masked_token_hessian_coeff_abs/p99": 2260992.0, "masked_token_hessian_coeff_abs/var": 21066208509952.0, "mean_logprobs": -0.00958251953125, "mean_logprobs/var": 3.910064697265625e-05, "num_completions/total": 9600, "per_sentence_gradient_norm": 29.88671875, "per_sentence_gradient_norm/max": 187.0, "per_sentence_gradient_norm/median": 0.0, "per_sentence_gradient_norm/min": 0.0, "per_sentence_gradient_norm/p25": 0.0, "per_sentence_gradient_norm/p75": 50.5625, "per_sentence_gradient_norm/var": 1555.798828125, "per_token_feature_norm": 189.79144287109375, "per_token_feature_norm/max": 274.0, "per_token_feature_norm/median": 189.0, "per_token_feature_norm/min": 113.0, "per_token_feature_norm/p25": 184.0, "per_token_feature_norm/p75": 196.0, "per_token_feature_norm/var": 135.99588012695312, "per_token_gradient_norm": 0.6376360654830933, "per_token_gradient_norm/max": 272.0, "per_token_gradient_norm/median": 0.0, "per_token_gradient_norm/min": 0.0, "per_token_gradient_norm/p1": 0.0, "per_token_gradient_norm/p10": 0.0, "per_token_gradient_norm/p25": 0.0, "per_token_gradient_norm/p5": 0.0, "per_token_gradient_norm/p75": 9.74978320300579e-10, "per_token_gradient_norm/var": 80.7520980834961, "per_token_policy_error_norm": 0.00564685370773077, "per_token_policy_error_norm/max": 1.984375, "per_token_policy_error_norm/median": 0.0, "per_token_policy_error_norm/min": 0.0, "per_token_policy_error_norm/p25": 0.0, "per_token_policy_error_norm/p75": 0.0, "per_token_policy_error_norm/var": 0.0051284669898450375, "policy_entropy": 0.01087127160280943, "policy_entropy/max": 2.546875, "policy_entropy/median": 1.4988472685217857e-09, "policy_entropy/min": 1.5352472168984194e-20, "policy_entropy/p25": 3.716138508025324e-12, "policy_entropy/p75": 1.955777406692505e-07, "policy_entropy/var": 0.006275586783885956, "policy_loss": -0.46875, "policy_loss/max": 0.0, "policy_loss/median": 0.0, "policy_loss/min": -1.0, "policy_loss/p25": -1.0, "policy_loss/p75": 0.0, "policy_loss/var": 0.25164473056793213, "policy_sharpness": 9.714157104492188, "policy_sharpness/max": 10.0, "policy_sharpness/median": 10.0, "policy_sharpness/min": -0.0, "policy_sharpness/p25": 10.0, "policy_sharpness/p75": 10.0, "policy_sharpness/var": 1.8465180397033691, "reward": 0.46875, "reward/max": 1.0, "reward/median": 0.0, "reward/min": 0.0, "reward/p25": 0.0, "reward/p75": 1.0, "reward/var": 0.25164473056793213, "rewards/accuracy_reward": 0.46875, "rewards/accuracy_reward/max": 1.0, "rewards/accuracy_reward/median": 0.0, "rewards/accuracy_reward/min": 0.0, "rewards/accuracy_reward/p25": 0.0, "rewards/accuracy_reward/p75": 1.0, "rewards/accuracy_reward/var": 0.25164473056793213, "sentence_fisher_curvature": 299256.84375, "sentence_fisher_curvature/max": 1646592.0, "sentence_fisher_curvature/median": 55040.0, "sentence_fisher_curvature/min": 432.0, "sentence_fisher_curvature/p25": 7104.0, "sentence_fisher_curvature/p75": 526336.0, "sentence_fisher_curvature/p85": 761856.0, "sentence_fisher_curvature/p90": 829440.0, "sentence_fisher_curvature/p95": 1118208.0, "sentence_fisher_curvature/p99": 1452032.625, "sentence_fisher_curvature/var": 155081850880.0, "sentence_fisher_kl_divergence": 3.125117786079157e-12, "sentence_fisher_kl_divergence/max": 1.716671249596402e-11, "sentence_fisher_kl_divergence/median": 5.755396159656812e-13, "sentence_fisher_kl_divergence/min": 4.496403249731884e-15, "sentence_fisher_kl_divergence/p25": 7.394085344003543e-14, "sentence_fisher_kl_divergence/p75": 5.5067062021407764e-12, "sentence_fisher_kl_divergence/p85": 7.958078640513122e-12, "sentence_fisher_kl_divergence/p90": 8.668621376273222e-12, "sentence_fisher_kl_divergence/p95": 1.1695533430611249e-11, "sentence_fisher_kl_divergence/p99": 1.511467166703806e-11, "sentence_fisher_kl_divergence/var": 1.6890163579889575e-23, "sentence_full_gradient_variance/max_squared_error": 2370.05859375, "sentence_full_gradient_variance/metric": 2370.05859375, "sentence_full_gradient_variance/p75": 2370.05859375, "sentence_full_gradient_variance/p90": 2370.05859375, "sentence_full_gradient_variance/p95": 2370.05859375, "sentence_full_gradient_variance/p99": 2370.05859375, "sentence_full_update_term": 1.3470649719238281e-05, "sentence_full_update_term/max": 6.031990051269531e-05, "sentence_full_update_term/median": 0.0, "sentence_full_update_term/min": 0.0, "sentence_full_update_term/p25": 0.0, "sentence_full_update_term/p75": 2.6434659957885742e-05, "sentence_full_update_term/p85": 3.319978713989258e-05, "sentence_full_update_term/p90": 3.802776336669922e-05, "sentence_full_update_term/p95": 4.476308822631836e-05, "sentence_full_update_term/p99": 5.91874158999417e-05, "sentence_full_update_term/var": 2.870946536059904e-10, "sentence_hessian_coeff": 25926.66796875, "sentence_hessian_coeff/max": 872448.0, "sentence_hessian_coeff/median": 0.0, "sentence_hessian_coeff/min": -360448.0, "sentence_hessian_coeff/p25": -25600.0, "sentence_hessian_coeff/p75": 0.0, "sentence_hessian_coeff/p99": 681779.8125, "sentence_hessian_coeff/var": 38736691200.0, "sentence_hessian_coeff_abs": 100625.3359375, "sentence_hessian_coeff_abs/max": 872448.0, "sentence_hessian_coeff_abs/median": 0.0, "sentence_hessian_coeff_abs/min": 0.0, "sentence_hessian_coeff_abs/p25": 0.0, "sentence_hessian_coeff_abs/p75": 149504.0, "sentence_hessian_coeff_abs/p99": 681779.8125, "sentence_hessian_coeff_abs/var": 29183918080.0, "step": 100, "token_fisher_curvature": 304444.40625, "token_fisher_curvature/max": 264241152.0, "token_fisher_curvature/median": 2.8406096919120216e-17, "token_fisher_curvature/min": 0.0, "token_fisher_curvature/p25": 3.494838088036542e-23, "token_fisher_curvature/p75": 2.6574298317427747e-12, "token_fisher_curvature/p85": 2.08092387765646e-09, "token_fisher_curvature/p90": 3.110617399215698e-07, "token_fisher_curvature/p95": 0.01502084732055664, "token_fisher_curvature/p99": 389120.0, "token_fisher_curvature/var": 34165948940288.0, "token_fisher_kl_divergence": 3.1775112055432153e-12, "token_fisher_kl_divergence/max": 2.764863893389702e-09, "token_fisher_kl_divergence/median": 2.96412655503348e-34, "token_fisher_kl_divergence/min": 0.0, "token_fisher_kl_divergence/p25": 3.6734198463196485e-40, "token_fisher_kl_divergence/p75": 2.7807346909040666e-29, "token_fisher_kl_divergence/p85": 2.1709452111682245e-26, "token_fisher_kl_divergence/p90": 3.2440989648564054e-24, "token_fisher_kl_divergence/p95": 1.5678712202575108e-19, "token_fisher_kl_divergence/p99": 4.064304448547773e-12, "token_fisher_kl_divergence/var": 3.720073956617833e-21, "token_full_update_term": 1.746562361404358e-07, "token_full_update_term/max": 7.43865966796875e-05, "token_full_update_term/median": 0.0, "token_full_update_term/min": -3.259629011154175e-08, "token_full_update_term/p25": 0.0, "token_full_update_term/p75": 4.499862532288471e-22, "token_full_update_term/p85": 7.101524229780054e-18, "token_full_update_term/p90": 6.626643678231403e-16, "token_full_update_term/p95": 2.824407374646398e-13, "token_full_update_term/p99": 1.0058283805847168e-06, "token_full_update_term/var": 6.138452159770713e-12, "token_hessian_coeff": -10491.037109375, "token_hessian_coeff/max": 260046848.0, "token_hessian_coeff/median": -0.0, "token_hessian_coeff/min": -34078720.0, "token_hessian_coeff/p25": -2.1245796233415604e-09, "token_hessian_coeff/p75": 0.0, "token_hessian_coeff/p99": 0.05176353454589844, "token_hessian_coeff/var": 21140292501504.0, "token_hessian_coeff_abs": 272381.0, "token_hessian_coeff_abs/max": 260046848.0, "token_hessian_coeff_abs/median": 0.0, "token_hessian_coeff_abs/min": 0.0, "token_hessian_coeff_abs/p25": 0.0, "token_hessian_coeff_abs/p75": 4.76837158203125e-07, "token_hessian_coeff_abs/p99": 2260992.0, "token_hessian_coeff_abs/var": 21066208509952.0 }, { "adam_stats/lm_head/lr_effective_max": 0.0, "adam_stats/lm_head/lr_effective_mean": 0.0, "adam_stats/lm_head/lr_effective_min": 0.0, "adam_stats/lm_head/lr_effective_std": 0.0, "adam_stats/lr_effective_max": 0.0, "adam_stats/lr_effective_mean": 0.0, "adam_stats/lr_effective_min": 0.0, "adam_stats/m_t_max": 0.003916438203305006, "adam_stats/m_t_mean": -5.864969621072191e-11, "adam_stats/m_t_min": -0.0036701092030853033, "adam_stats/v_t_max": 2.7859794499818236e-05, "adam_stats/v_t_mean": 3.196898908791601e-12, "adam_stats/v_t_min": 0.0, "epoch": 0.16, "step": 100, "total_flos": 0.0, "train_loss": -0.6797916829586029, "train_runtime": 15592.1045, "train_samples_per_second": 0.616, "train_steps_per_second": 0.006 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 24, "trial_name": null, "trial_params": null }