diff --git "a/log_bs32_lr3e-05_20221118_065016_906968.txt" "b/log_bs32_lr3e-05_20221118_065016_906968.txt" new file mode 100644--- /dev/null +++ "b/log_bs32_lr3e-05_20221118_065016_906968.txt" @@ -0,0 +1,7843 @@ +------------> log file ==runs2/sst2/1/log_bs32_lr3e-05_20221118_065016_906968.txt +Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/SST-2', do_eval=False, early_stop=True, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/sst2/1', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='sst2', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0) +Distributed environment: NO +Num processes: 1 +Process index: 0 +Local process index: 0 +Device: cuda +Mixed precision type: fp16 + +Sample 40563 of the training set: (tensor([ 101, 2003, 1037, 21207, 2121, 102, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(0)). +Sample 624 of the training set: (tensor([ 101, 2008, 4654, 17847, 2015, 1996, 9647, 1998, 16356, 4244, + 1996, 6057, 5923, 102, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)). +Sample 42386 of the training set: (tensor([ 101, 1996, 5896, 1010, 1996, 18201, 2015, 1010, 1996, 3494, + 2024, 2035, 3622, 1011, 2000, 1011, 2678, 4933, 1010, 1998, + 2008, 1005, 1055, 2073, 2023, 2143, 2323, 2031, 2815, 102, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(0)). +***** Running training ***** + Num examples = 67349 + Num Epochs = 30 + Instantaneous batch size per device = 32 + Total train batch size (w. parallel, distributed & accumulation) = 32 + Gradient Accumulation steps = 1 + Total optimization steps = 63150 +000005/063150, loss: 0.699066, avg_loss: 0.698294 +000010/063150, loss: 0.684998, avg_loss: 0.702415 +000015/063150, loss: 0.677322, avg_loss: 0.700488 +000020/063150, loss: 0.696426, avg_loss: 0.703039 +000025/063150, loss: 0.706192, avg_loss: 0.702227 +000030/063150, loss: 0.705933, avg_loss: 0.702650 +000035/063150, loss: 0.722244, avg_loss: 0.703319 +000040/063150, loss: 0.702194, avg_loss: 0.701807 +000045/063150, loss: 0.711319, avg_loss: 0.702284 +000050/063150, loss: 0.684219, avg_loss: 0.701999 +000055/063150, loss: 0.694305, avg_loss: 0.701389 +000060/063150, loss: 0.688171, avg_loss: 0.701232 +000065/063150, loss: 0.704636, avg_loss: 0.701469 +000070/063150, loss: 0.708710, avg_loss: 0.701263 +000075/063150, loss: 0.685791, avg_loss: 0.700925 +000080/063150, loss: 0.703445, avg_loss: 0.700651 +000085/063150, loss: 0.714600, avg_loss: 0.700922 +000090/063150, loss: 0.695724, avg_loss: 0.701068 +000095/063150, loss: 0.694199, avg_loss: 0.700950 +000100/063150, loss: 0.694672, avg_loss: 0.700794 +000105/063150, loss: 0.684280, avg_loss: 0.700587 +000110/063150, loss: 0.696747, avg_loss: 0.700711 +000115/063150, loss: 0.696472, avg_loss: 0.700771 +000120/063150, loss: 0.693542, avg_loss: 0.700848 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 120/63150: {'accuracy': 0.4908256880733945} +000125/063150, loss: 0.701477, avg_loss: 0.700951 +000130/063150, loss: 0.680710, avg_loss: 0.700872 +000135/063150, loss: 0.675705, avg_loss: 0.700665 +000140/063150, loss: 0.709198, avg_loss: 0.700597 +000145/063150, loss: 0.681229, avg_loss: 0.700399 +000150/063150, loss: 0.696289, avg_loss: 0.700279 +000155/063150, loss: 0.697922, avg_loss: 0.700317 +000160/063150, loss: 0.707993, avg_loss: 0.700207 +000165/063150, loss: 0.698456, avg_loss: 0.700098 +000170/063150, loss: 0.706284, avg_loss: 0.699953 +000175/063150, loss: 0.706757, avg_loss: 0.699989 +000180/063150, loss: 0.695663, avg_loss: 0.699971 +000185/063150, loss: 0.695282, avg_loss: 0.699929 +000190/063150, loss: 0.700256, avg_loss: 0.699896 +000195/063150, loss: 0.701035, avg_loss: 0.699996 +000200/063150, loss: 0.701126, avg_loss: 0.700045 +000205/063150, loss: 0.697083, avg_loss: 0.700042 +000210/063150, loss: 0.698654, avg_loss: 0.700030 +000215/063150, loss: 0.697266, avg_loss: 0.699913 +000220/063150, loss: 0.702286, avg_loss: 0.699880 +000225/063150, loss: 0.693466, avg_loss: 0.699792 +000230/063150, loss: 0.698135, avg_loss: 0.699732 +000235/063150, loss: 0.682785, avg_loss: 0.699647 +000240/063150, loss: 0.692719, avg_loss: 0.699636 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 240/63150: {'accuracy': 0.4908256880733945} +000245/063150, loss: 0.701706, avg_loss: 0.699525 +000250/063150, loss: 0.694305, avg_loss: 0.699331 +000255/063150, loss: 0.693481, avg_loss: 0.699234 +000260/063150, loss: 0.695450, avg_loss: 0.699185 +000265/063150, loss: 0.690948, avg_loss: 0.699078 +000270/063150, loss: 0.692719, avg_loss: 0.698942 +000275/063150, loss: 0.692352, avg_loss: 0.698819 +000280/063150, loss: 0.694565, avg_loss: 0.698731 +000285/063150, loss: 0.690460, avg_loss: 0.698634 +000290/063150, loss: 0.690430, avg_loss: 0.698527 +000295/063150, loss: 0.695435, avg_loss: 0.698426 +000300/063150, loss: 0.688797, avg_loss: 0.698297 +000305/063150, loss: 0.692612, avg_loss: 0.698205 +000310/063150, loss: 0.692673, avg_loss: 0.698092 +000315/063150, loss: 0.691071, avg_loss: 0.698000 +000320/063150, loss: 0.692902, avg_loss: 0.697890 +000325/063150, loss: 0.691315, avg_loss: 0.697769 +000330/063150, loss: 0.688202, avg_loss: 0.697666 +000335/063150, loss: 0.692764, avg_loss: 0.697577 +000340/063150, loss: 0.691727, avg_loss: 0.697481 +000345/063150, loss: 0.690247, avg_loss: 0.697397 +000350/063150, loss: 0.688766, avg_loss: 0.697331 +000355/063150, loss: 0.689789, avg_loss: 0.697179 +000360/063150, loss: 0.691925, avg_loss: 0.697088 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 360/63150: {'accuracy': 0.5355504587155964} +000365/063150, loss: 0.690979, avg_loss: 0.696966 +000370/063150, loss: 0.682373, avg_loss: 0.696839 +000375/063150, loss: 0.691315, avg_loss: 0.696746 +000380/063150, loss: 0.700912, avg_loss: 0.696692 +000385/063150, loss: 0.694580, avg_loss: 0.696592 +000390/063150, loss: 0.679520, avg_loss: 0.696474 +000395/063150, loss: 0.685104, avg_loss: 0.696315 +000400/063150, loss: 0.683975, avg_loss: 0.696200 +000405/063150, loss: 0.689697, avg_loss: 0.696104 +000410/063150, loss: 0.687210, avg_loss: 0.696001 +000415/063150, loss: 0.682480, avg_loss: 0.695870 +000420/063150, loss: 0.700912, avg_loss: 0.695806 +000425/063150, loss: 0.688156, avg_loss: 0.695680 +000430/063150, loss: 0.684021, avg_loss: 0.695526 +000435/063150, loss: 0.678650, avg_loss: 0.695443 +000440/063150, loss: 0.687790, avg_loss: 0.695313 +000445/063150, loss: 0.677887, avg_loss: 0.695127 +000450/063150, loss: 0.684143, avg_loss: 0.695000 +000455/063150, loss: 0.682251, avg_loss: 0.694790 +000460/063150, loss: 0.689774, avg_loss: 0.694652 +000465/063150, loss: 0.697784, avg_loss: 0.694567 +000470/063150, loss: 0.689697, avg_loss: 0.694421 +000475/063150, loss: 0.688354, avg_loss: 0.694421 +000480/063150, loss: 0.657639, avg_loss: 0.694245 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 480/63150: {'accuracy': 0.5091743119266054} +000485/063150, loss: 0.679733, avg_loss: 0.694141 +000490/063150, loss: 0.674637, avg_loss: 0.693992 +000495/063150, loss: 0.725555, avg_loss: 0.694000 +000500/063150, loss: 0.683044, avg_loss: 0.693896 +000505/063150, loss: 0.684753, avg_loss: 0.693714 +000510/063150, loss: 0.677780, avg_loss: 0.693572 +000515/063150, loss: 0.664780, avg_loss: 0.693277 +000520/063150, loss: 0.703796, avg_loss: 0.693264 +000525/063150, loss: 0.671082, avg_loss: 0.693133 +000530/063150, loss: 0.678345, avg_loss: 0.692987 +000535/063150, loss: 0.662933, avg_loss: 0.692878 +000540/063150, loss: 0.676666, avg_loss: 0.692811 +000545/063150, loss: 0.657776, avg_loss: 0.692641 +000550/063150, loss: 0.690552, avg_loss: 0.692577 +000555/063150, loss: 0.693863, avg_loss: 0.692609 +000560/063150, loss: 0.645691, avg_loss: 0.692455 +000565/063150, loss: 0.684433, avg_loss: 0.692544 +000570/063150, loss: 0.700058, avg_loss: 0.692548 +000575/063150, loss: 0.685699, avg_loss: 0.692384 +000580/063150, loss: 0.676178, avg_loss: 0.692270 +000585/063150, loss: 0.685501, avg_loss: 0.692076 +000590/063150, loss: 0.679108, avg_loss: 0.691955 +000595/063150, loss: 0.672958, avg_loss: 0.691839 +000600/063150, loss: 0.684540, avg_loss: 0.691661 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 600/63150: {'accuracy': 0.5091743119266054} +000605/063150, loss: 0.698410, avg_loss: 0.691549 +000610/063150, loss: 0.678619, avg_loss: 0.691446 +000615/063150, loss: 0.690140, avg_loss: 0.691291 +000620/063150, loss: 0.676254, avg_loss: 0.691128 +000625/063150, loss: 0.682373, avg_loss: 0.691073 +000630/063150, loss: 0.704193, avg_loss: 0.690986 +000635/063150, loss: 0.681549, avg_loss: 0.690874 +000640/063150, loss: 0.675903, avg_loss: 0.690731 +000645/063150, loss: 0.663528, avg_loss: 0.690676 +000650/063150, loss: 0.669769, avg_loss: 0.690538 +000655/063150, loss: 0.670044, avg_loss: 0.690407 +000660/063150, loss: 0.646988, avg_loss: 0.690206 +000665/063150, loss: 0.662003, avg_loss: 0.690063 +000670/063150, loss: 0.654678, avg_loss: 0.689824 +000675/063150, loss: 0.673439, avg_loss: 0.689577 +000680/063150, loss: 0.652298, avg_loss: 0.689503 +000685/063150, loss: 0.651001, avg_loss: 0.689241 +000690/063150, loss: 0.686157, avg_loss: 0.689081 +000695/063150, loss: 0.673027, avg_loss: 0.688830 +000700/063150, loss: 0.661835, avg_loss: 0.688696 +000705/063150, loss: 0.658730, avg_loss: 0.688453 +000710/063150, loss: 0.670929, avg_loss: 0.688235 +000715/063150, loss: 0.684013, avg_loss: 0.688093 +000720/063150, loss: 0.672012, avg_loss: 0.687874 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 720/63150: {'accuracy': 0.6284403669724771} +000725/063150, loss: 0.673714, avg_loss: 0.687796 +000730/063150, loss: 0.683487, avg_loss: 0.687649 +000735/063150, loss: 0.651962, avg_loss: 0.687393 +000740/063150, loss: 0.651978, avg_loss: 0.687152 +000745/063150, loss: 0.671097, avg_loss: 0.686865 +000750/063150, loss: 0.682686, avg_loss: 0.686692 +000755/063150, loss: 0.671722, avg_loss: 0.686502 +000760/063150, loss: 0.625923, avg_loss: 0.686107 +000765/063150, loss: 0.631691, avg_loss: 0.685808 +000770/063150, loss: 0.647720, avg_loss: 0.685372 +000775/063150, loss: 0.630836, avg_loss: 0.685044 +000780/063150, loss: 0.600067, avg_loss: 0.684657 +000785/063150, loss: 0.649406, avg_loss: 0.684341 +000790/063150, loss: 0.620857, avg_loss: 0.684093 +000795/063150, loss: 0.616707, avg_loss: 0.683665 +000800/063150, loss: 0.597076, avg_loss: 0.683302 +000805/063150, loss: 0.605042, avg_loss: 0.682946 +000810/063150, loss: 0.589241, avg_loss: 0.682349 +000815/063150, loss: 0.635567, avg_loss: 0.682020 +000820/063150, loss: 0.589584, avg_loss: 0.681673 +000825/063150, loss: 0.651291, avg_loss: 0.681407 +000830/063150, loss: 0.688423, avg_loss: 0.681157 +000835/063150, loss: 0.563934, avg_loss: 0.680670 +000840/063150, loss: 0.631546, avg_loss: 0.680357 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 840/63150: {'accuracy': 0.7236238532110092} +000845/063150, loss: 0.662323, avg_loss: 0.679958 +000850/063150, loss: 0.561371, avg_loss: 0.679360 +000855/063150, loss: 0.537651, avg_loss: 0.678845 +000860/063150, loss: 0.600845, avg_loss: 0.678364 +000865/063150, loss: 0.524681, avg_loss: 0.677886 +000870/063150, loss: 0.618294, avg_loss: 0.677480 +000875/063150, loss: 0.550529, avg_loss: 0.677040 +000880/063150, loss: 0.562836, avg_loss: 0.676556 +000885/063150, loss: 0.584137, avg_loss: 0.676117 +000890/063150, loss: 0.534386, avg_loss: 0.675487 +000895/063150, loss: 0.547890, avg_loss: 0.674789 +000900/063150, loss: 0.572800, avg_loss: 0.674201 +000905/063150, loss: 0.594093, avg_loss: 0.673920 +000910/063150, loss: 0.577248, avg_loss: 0.673406 +000915/063150, loss: 0.521347, avg_loss: 0.672878 +000920/063150, loss: 0.583481, avg_loss: 0.672350 +000925/063150, loss: 0.553391, avg_loss: 0.671855 +000930/063150, loss: 0.561745, avg_loss: 0.671272 +000935/063150, loss: 0.524872, avg_loss: 0.670658 +000940/063150, loss: 0.522697, avg_loss: 0.670148 +000945/063150, loss: 0.662201, avg_loss: 0.669857 +000950/063150, loss: 0.665039, avg_loss: 0.669415 +000955/063150, loss: 0.541252, avg_loss: 0.668925 +000960/063150, loss: 0.529617, avg_loss: 0.668321 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 960/63150: {'accuracy': 0.7385321100917431} +000965/063150, loss: 0.646500, avg_loss: 0.667651 +000970/063150, loss: 0.635612, avg_loss: 0.666934 +000975/063150, loss: 0.485619, avg_loss: 0.666445 +000980/063150, loss: 0.481720, avg_loss: 0.666025 +000985/063150, loss: 0.623451, avg_loss: 0.665369 +000990/063150, loss: 0.556183, avg_loss: 0.664912 +000995/063150, loss: 0.594429, avg_loss: 0.664675 +001000/063150, loss: 0.642525, avg_loss: 0.664227 +001005/063150, loss: 0.507896, avg_loss: 0.663682 +001010/063150, loss: 0.559967, avg_loss: 0.663286 +001015/063150, loss: 0.537098, avg_loss: 0.662654 +001020/063150, loss: 0.538918, avg_loss: 0.662011 +001025/063150, loss: 0.477783, avg_loss: 0.661387 +001030/063150, loss: 0.640617, avg_loss: 0.660972 +001035/063150, loss: 0.559074, avg_loss: 0.660316 +001040/063150, loss: 0.640266, avg_loss: 0.659892 +001045/063150, loss: 0.534710, avg_loss: 0.659427 +001050/063150, loss: 0.484222, avg_loss: 0.658888 +001055/063150, loss: 0.581760, avg_loss: 0.658355 +001060/063150, loss: 0.561779, avg_loss: 0.657932 +001065/063150, loss: 0.554047, avg_loss: 0.657404 +001070/063150, loss: 0.572773, avg_loss: 0.656937 +001075/063150, loss: 0.647923, avg_loss: 0.656480 +001080/063150, loss: 0.425316, avg_loss: 0.655995 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 1080/63150: {'accuracy': 0.7568807339449541} +001085/063150, loss: 0.590240, avg_loss: 0.655601 +001090/063150, loss: 0.457207, avg_loss: 0.655022 +001095/063150, loss: 0.579010, avg_loss: 0.654627 +001100/063150, loss: 0.607227, avg_loss: 0.654075 +001105/063150, loss: 0.619682, avg_loss: 0.653523 +001110/063150, loss: 0.546398, avg_loss: 0.653132 +001115/063150, loss: 0.640785, avg_loss: 0.652507 +001120/063150, loss: 0.405167, avg_loss: 0.652141 +001125/063150, loss: 0.521263, avg_loss: 0.651874 +001130/063150, loss: 0.520802, avg_loss: 0.651417 +001135/063150, loss: 0.553818, avg_loss: 0.650836 +001140/063150, loss: 0.636532, avg_loss: 0.650372 +001145/063150, loss: 0.650009, avg_loss: 0.649899 +001150/063150, loss: 0.454395, avg_loss: 0.649468 +001155/063150, loss: 0.478718, avg_loss: 0.648877 +001160/063150, loss: 0.586956, avg_loss: 0.648443 +001165/063150, loss: 0.495560, avg_loss: 0.648029 +001170/063150, loss: 0.499985, avg_loss: 0.647654 +001175/063150, loss: 0.540573, avg_loss: 0.647043 +001180/063150, loss: 0.425880, avg_loss: 0.646502 +001185/063150, loss: 0.578789, avg_loss: 0.646155 +001190/063150, loss: 0.518787, avg_loss: 0.645670 +001195/063150, loss: 0.532078, avg_loss: 0.645171 +001200/063150, loss: 0.442375, avg_loss: 0.644458 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 1200/63150: {'accuracy': 0.7637614678899083} +001205/063150, loss: 0.529041, avg_loss: 0.644026 +001210/063150, loss: 0.481709, avg_loss: 0.643462 +001215/063150, loss: 0.421642, avg_loss: 0.642747 +001220/063150, loss: 0.436317, avg_loss: 0.642176 +001225/063150, loss: 0.537792, avg_loss: 0.641646 +001230/063150, loss: 0.482212, avg_loss: 0.641234 +001235/063150, loss: 0.506683, avg_loss: 0.640627 +001240/063150, loss: 0.423000, avg_loss: 0.640062 +001245/063150, loss: 0.452396, avg_loss: 0.639454 +001250/063150, loss: 0.459133, avg_loss: 0.638942 +001255/063150, loss: 0.682259, avg_loss: 0.638437 +001260/063150, loss: 0.492313, avg_loss: 0.637797 +001265/063150, loss: 0.612106, avg_loss: 0.637475 +001270/063150, loss: 0.371552, avg_loss: 0.636980 +001275/063150, loss: 0.442150, avg_loss: 0.636261 +001280/063150, loss: 0.531754, avg_loss: 0.635836 +001285/063150, loss: 0.519829, avg_loss: 0.635495 +001290/063150, loss: 0.499115, avg_loss: 0.634939 +001295/063150, loss: 0.536774, avg_loss: 0.634752 +001300/063150, loss: 0.503185, avg_loss: 0.634372 +001305/063150, loss: 0.602249, avg_loss: 0.633995 +001310/063150, loss: 0.442337, avg_loss: 0.633510 +001315/063150, loss: 0.642811, avg_loss: 0.633124 +001320/063150, loss: 0.414825, avg_loss: 0.632533 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 1320/63150: {'accuracy': 0.7752293577981652} +001325/063150, loss: 0.481556, avg_loss: 0.631990 +001330/063150, loss: 0.448387, avg_loss: 0.631533 +001335/063150, loss: 0.401711, avg_loss: 0.631070 +001340/063150, loss: 0.508049, avg_loss: 0.630476 +001345/063150, loss: 0.568802, avg_loss: 0.630039 +001350/063150, loss: 0.561794, avg_loss: 0.629707 +001355/063150, loss: 0.542850, avg_loss: 0.629050 +001360/063150, loss: 0.484318, avg_loss: 0.628416 +001365/063150, loss: 0.695049, avg_loss: 0.628053 +001370/063150, loss: 0.527977, avg_loss: 0.627550 +001375/063150, loss: 0.557415, avg_loss: 0.627135 +001380/063150, loss: 0.457073, avg_loss: 0.626956 +001385/063150, loss: 0.438343, avg_loss: 0.626553 +001390/063150, loss: 0.563004, avg_loss: 0.625970 +001395/063150, loss: 0.654701, avg_loss: 0.625507 +001400/063150, loss: 0.474880, avg_loss: 0.624905 +001405/063150, loss: 0.408585, avg_loss: 0.624406 +001410/063150, loss: 0.475113, avg_loss: 0.623802 +001415/063150, loss: 0.495762, avg_loss: 0.623506 +001420/063150, loss: 0.466187, avg_loss: 0.622912 +001425/063150, loss: 0.562550, avg_loss: 0.622626 +001430/063150, loss: 0.634254, avg_loss: 0.622368 +001435/063150, loss: 0.520088, avg_loss: 0.621966 +001440/063150, loss: 0.551281, avg_loss: 0.621461 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 1440/63150: {'accuracy': 0.7717889908256881} +001445/063150, loss: 0.396290, avg_loss: 0.620777 +001450/063150, loss: 0.437672, avg_loss: 0.620369 +001455/063150, loss: 0.466106, avg_loss: 0.619835 +001460/063150, loss: 0.512569, avg_loss: 0.619407 +001465/063150, loss: 0.454067, avg_loss: 0.619037 +001470/063150, loss: 0.506660, avg_loss: 0.618636 +001475/063150, loss: 0.539684, avg_loss: 0.618245 +001480/063150, loss: 0.378353, avg_loss: 0.617922 +001485/063150, loss: 0.325607, avg_loss: 0.617461 +001490/063150, loss: 0.623295, avg_loss: 0.617021 +001495/063150, loss: 0.510307, avg_loss: 0.616807 +001500/063150, loss: 0.357586, avg_loss: 0.616374 +001505/063150, loss: 0.481606, avg_loss: 0.616216 +001510/063150, loss: 0.558796, avg_loss: 0.615956 +001515/063150, loss: 0.619362, avg_loss: 0.615500 +001520/063150, loss: 0.502560, avg_loss: 0.615003 +001525/063150, loss: 0.419495, avg_loss: 0.614568 +001530/063150, loss: 0.462114, avg_loss: 0.613972 +001535/063150, loss: 0.514221, avg_loss: 0.613483 +001540/063150, loss: 0.531858, avg_loss: 0.612844 +001545/063150, loss: 0.456753, avg_loss: 0.612303 +001550/063150, loss: 0.461109, avg_loss: 0.611709 +001555/063150, loss: 0.595768, avg_loss: 0.611489 +001560/063150, loss: 0.481895, avg_loss: 0.611150 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 1560/63150: {'accuracy': 0.7752293577981652} +001565/063150, loss: 0.421822, avg_loss: 0.610683 +001570/063150, loss: 0.454964, avg_loss: 0.610350 +001575/063150, loss: 0.365948, avg_loss: 0.609845 +001580/063150, loss: 0.445292, avg_loss: 0.609434 +001585/063150, loss: 0.476309, avg_loss: 0.608933 +001590/063150, loss: 0.493601, avg_loss: 0.608611 +001595/063150, loss: 0.593845, avg_loss: 0.608162 +001600/063150, loss: 0.369373, avg_loss: 0.607726 +001605/063150, loss: 0.587719, avg_loss: 0.607384 +001610/063150, loss: 0.492731, avg_loss: 0.606924 +001615/063150, loss: 0.502428, avg_loss: 0.606412 +001620/063150, loss: 0.537010, avg_loss: 0.606198 +001625/063150, loss: 0.566477, avg_loss: 0.605799 +001630/063150, loss: 0.476679, avg_loss: 0.605363 +001635/063150, loss: 0.475550, avg_loss: 0.604885 +001640/063150, loss: 0.349373, avg_loss: 0.604407 +001645/063150, loss: 0.377903, avg_loss: 0.603836 +001650/063150, loss: 0.411657, avg_loss: 0.603554 +001655/063150, loss: 0.392540, avg_loss: 0.602957 +001660/063150, loss: 0.529764, avg_loss: 0.602509 +001665/063150, loss: 0.566372, avg_loss: 0.602353 +001670/063150, loss: 0.450151, avg_loss: 0.601893 +001675/063150, loss: 0.472094, avg_loss: 0.601507 +001680/063150, loss: 0.535753, avg_loss: 0.601047 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 1680/63150: {'accuracy': 0.7798165137614679} +001685/063150, loss: 0.471460, avg_loss: 0.600612 +001690/063150, loss: 0.621786, avg_loss: 0.600232 +001695/063150, loss: 0.503521, avg_loss: 0.599750 +001700/063150, loss: 0.539293, avg_loss: 0.599262 +001705/063150, loss: 0.464657, avg_loss: 0.598757 +001710/063150, loss: 0.486265, avg_loss: 0.598326 +001715/063150, loss: 0.533688, avg_loss: 0.597988 +001720/063150, loss: 0.421272, avg_loss: 0.597682 +001725/063150, loss: 0.376846, avg_loss: 0.597393 +001730/063150, loss: 0.492756, avg_loss: 0.596847 +001735/063150, loss: 0.332472, avg_loss: 0.596607 +001740/063150, loss: 0.479696, avg_loss: 0.596175 +001745/063150, loss: 0.427999, avg_loss: 0.595669 +001750/063150, loss: 0.527891, avg_loss: 0.595335 +001755/063150, loss: 0.379955, avg_loss: 0.594892 +001760/063150, loss: 0.599617, avg_loss: 0.594601 +001765/063150, loss: 0.576731, avg_loss: 0.594364 +001770/063150, loss: 0.566746, avg_loss: 0.594008 +001775/063150, loss: 0.606173, avg_loss: 0.593482 +001780/063150, loss: 0.411125, avg_loss: 0.593049 +001785/063150, loss: 0.559351, avg_loss: 0.592689 +001790/063150, loss: 0.489662, avg_loss: 0.592523 +001795/063150, loss: 0.438231, avg_loss: 0.592393 +001800/063150, loss: 0.439730, avg_loss: 0.591988 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 1800/63150: {'accuracy': 0.7821100917431193} +001805/063150, loss: 0.457642, avg_loss: 0.591447 +001810/063150, loss: 0.474457, avg_loss: 0.591127 +001815/063150, loss: 0.446978, avg_loss: 0.590701 +001820/063150, loss: 0.360235, avg_loss: 0.590146 +001825/063150, loss: 0.522787, avg_loss: 0.589913 +001830/063150, loss: 0.420828, avg_loss: 0.589691 +001835/063150, loss: 0.576784, avg_loss: 0.589292 +001840/063150, loss: 0.455002, avg_loss: 0.588905 +001845/063150, loss: 0.463318, avg_loss: 0.588360 +001850/063150, loss: 0.538891, avg_loss: 0.588076 +001855/063150, loss: 0.442177, avg_loss: 0.587478 +001860/063150, loss: 0.598307, avg_loss: 0.587336 +001865/063150, loss: 0.594749, avg_loss: 0.587073 +001870/063150, loss: 0.438782, avg_loss: 0.586785 +001875/063150, loss: 0.397961, avg_loss: 0.586557 +001880/063150, loss: 0.396614, avg_loss: 0.586146 +001885/063150, loss: 0.398376, avg_loss: 0.585721 +001890/063150, loss: 0.330936, avg_loss: 0.585311 +001895/063150, loss: 0.270622, avg_loss: 0.584947 +001900/063150, loss: 0.645473, avg_loss: 0.584804 +001905/063150, loss: 0.466560, avg_loss: 0.584304 +001910/063150, loss: 0.390421, avg_loss: 0.583849 +001915/063150, loss: 0.428555, avg_loss: 0.583479 +001920/063150, loss: 0.465866, avg_loss: 0.582975 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 1920/63150: {'accuracy': 0.7775229357798165} +001925/063150, loss: 0.476524, avg_loss: 0.582740 +001930/063150, loss: 0.404989, avg_loss: 0.582187 +001935/063150, loss: 0.563856, avg_loss: 0.581866 +001940/063150, loss: 0.461561, avg_loss: 0.581485 +001945/063150, loss: 0.463858, avg_loss: 0.581072 +001950/063150, loss: 0.476309, avg_loss: 0.580758 +001955/063150, loss: 0.419500, avg_loss: 0.580453 +001960/063150, loss: 0.425861, avg_loss: 0.580091 +001965/063150, loss: 0.608536, avg_loss: 0.579938 +001970/063150, loss: 0.318991, avg_loss: 0.579540 +001975/063150, loss: 0.358694, avg_loss: 0.579218 +001980/063150, loss: 0.287941, avg_loss: 0.578818 +001985/063150, loss: 0.434204, avg_loss: 0.578520 +001990/063150, loss: 0.598541, avg_loss: 0.578185 +001995/063150, loss: 0.411554, avg_loss: 0.577992 +002000/063150, loss: 0.554369, avg_loss: 0.577653 +002005/063150, loss: 0.444473, avg_loss: 0.577298 +002010/063150, loss: 0.464811, avg_loss: 0.576906 +002015/063150, loss: 0.500570, avg_loss: 0.576662 +002020/063150, loss: 0.327007, avg_loss: 0.576124 +002025/063150, loss: 0.424227, avg_loss: 0.575725 +002030/063150, loss: 0.331482, avg_loss: 0.575304 +002035/063150, loss: 0.371761, avg_loss: 0.575046 +002040/063150, loss: 0.376259, avg_loss: 0.574788 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 0, step 2040/63150: {'accuracy': 0.7993119266055045} +002045/063150, loss: 0.360487, avg_loss: 0.574369 +002050/063150, loss: 0.474432, avg_loss: 0.573938 +002055/063150, loss: 0.277416, avg_loss: 0.573426 +002060/063150, loss: 0.405094, avg_loss: 0.572981 +002065/063150, loss: 0.437620, avg_loss: 0.572760 +002070/063150, loss: 0.336487, avg_loss: 0.572440 +002075/063150, loss: 0.419630, avg_loss: 0.572151 +002080/063150, loss: 0.321566, avg_loss: 0.571714 +002085/063150, loss: 0.615108, avg_loss: 0.571479 +002090/063150, loss: 0.541786, avg_loss: 0.571437 +002095/063150, loss: 0.357214, avg_loss: 0.570996 +002100/063150, loss: 0.414228, avg_loss: 0.570739 +002105/063150, loss: 0.466367, avg_loss: 0.570469 +002110/063150, loss: 0.284697, avg_loss: 0.569925 +002115/063150, loss: 0.632372, avg_loss: 0.569449 +002120/063150, loss: 0.567448, avg_loss: 0.569101 +002125/063150, loss: 0.301064, avg_loss: 0.568673 +002130/063150, loss: 0.466331, avg_loss: 0.568429 +002135/063150, loss: 0.318399, avg_loss: 0.567998 +002140/063150, loss: 0.457193, avg_loss: 0.567648 +002145/063150, loss: 0.402014, avg_loss: 0.567166 +002150/063150, loss: 0.565275, avg_loss: 0.566827 +002155/063150, loss: 0.353403, avg_loss: 0.566410 +002160/063150, loss: 0.412094, avg_loss: 0.566103 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 2160/63150: {'accuracy': 0.7993119266055045} +002165/063150, loss: 0.530349, avg_loss: 0.565792 +002170/063150, loss: 0.482883, avg_loss: 0.565460 +002175/063150, loss: 0.410007, avg_loss: 0.565040 +002180/063150, loss: 0.475971, avg_loss: 0.564747 +002185/063150, loss: 0.532206, avg_loss: 0.564487 +002190/063150, loss: 0.375374, avg_loss: 0.563967 +002195/063150, loss: 0.367553, avg_loss: 0.563401 +002200/063150, loss: 0.443151, avg_loss: 0.563005 +002205/063150, loss: 0.410225, avg_loss: 0.562641 +002210/063150, loss: 0.489100, avg_loss: 0.562480 +002215/063150, loss: 0.289569, avg_loss: 0.562061 +002220/063150, loss: 0.304693, avg_loss: 0.561644 +002225/063150, loss: 0.318733, avg_loss: 0.561267 +002230/063150, loss: 0.599303, avg_loss: 0.560921 +002235/063150, loss: 0.554891, avg_loss: 0.560554 +002240/063150, loss: 0.523050, avg_loss: 0.560261 +002245/063150, loss: 0.541603, avg_loss: 0.560032 +002250/063150, loss: 0.339924, avg_loss: 0.559689 +002255/063150, loss: 0.333566, avg_loss: 0.559446 +002260/063150, loss: 0.387396, avg_loss: 0.559056 +002265/063150, loss: 0.391576, avg_loss: 0.558933 +002270/063150, loss: 0.431360, avg_loss: 0.558750 +002275/063150, loss: 0.467025, avg_loss: 0.558452 +002280/063150, loss: 0.440957, avg_loss: 0.557994 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 2280/63150: {'accuracy': 0.801605504587156} +002285/063150, loss: 0.334400, avg_loss: 0.557650 +002290/063150, loss: 0.362084, avg_loss: 0.557309 +002295/063150, loss: 0.456802, avg_loss: 0.557083 +002300/063150, loss: 0.487686, avg_loss: 0.556850 +002305/063150, loss: 0.195270, avg_loss: 0.556443 +002310/063150, loss: 0.320818, avg_loss: 0.556251 +002315/063150, loss: 0.297004, avg_loss: 0.555843 +002320/063150, loss: 0.271812, avg_loss: 0.555569 +002325/063150, loss: 0.534525, avg_loss: 0.555352 +002330/063150, loss: 0.517542, avg_loss: 0.555115 +002335/063150, loss: 0.236243, avg_loss: 0.554895 +002340/063150, loss: 0.503995, avg_loss: 0.554608 +002345/063150, loss: 0.534254, avg_loss: 0.554290 +002350/063150, loss: 0.401686, avg_loss: 0.554065 +002355/063150, loss: 0.400046, avg_loss: 0.553681 +002360/063150, loss: 0.337936, avg_loss: 0.553515 +002365/063150, loss: 0.360080, avg_loss: 0.553164 +002370/063150, loss: 0.282468, avg_loss: 0.552726 +002375/063150, loss: 0.511618, avg_loss: 0.552363 +002380/063150, loss: 0.393131, avg_loss: 0.552091 +002385/063150, loss: 0.312490, avg_loss: 0.551643 +002390/063150, loss: 0.433064, avg_loss: 0.551285 +002395/063150, loss: 0.403099, avg_loss: 0.550956 +002400/063150, loss: 0.510655, avg_loss: 0.550642 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 2400/63150: {'accuracy': 0.8096330275229358} +002405/063150, loss: 0.359091, avg_loss: 0.550244 +002410/063150, loss: 0.381485, avg_loss: 0.550031 +002415/063150, loss: 0.355571, avg_loss: 0.549686 +002420/063150, loss: 0.296953, avg_loss: 0.549320 +002425/063150, loss: 0.291162, avg_loss: 0.548987 +002430/063150, loss: 0.388165, avg_loss: 0.548686 +002435/063150, loss: 0.431476, avg_loss: 0.548367 +002440/063150, loss: 0.324961, avg_loss: 0.548020 +002445/063150, loss: 0.248220, avg_loss: 0.547515 +002450/063150, loss: 0.399220, avg_loss: 0.547195 +002455/063150, loss: 0.300820, avg_loss: 0.546632 +002460/063150, loss: 0.412463, avg_loss: 0.546405 +002465/063150, loss: 0.343831, avg_loss: 0.546263 +002470/063150, loss: 0.325924, avg_loss: 0.546023 +002475/063150, loss: 0.294374, avg_loss: 0.545837 +002480/063150, loss: 0.249588, avg_loss: 0.545485 +002485/063150, loss: 0.373238, avg_loss: 0.545193 +002490/063150, loss: 0.418461, avg_loss: 0.544885 +002495/063150, loss: 0.604065, avg_loss: 0.544701 +002500/063150, loss: 0.383909, avg_loss: 0.544381 +002505/063150, loss: 0.553466, avg_loss: 0.544107 +002510/063150, loss: 0.362100, avg_loss: 0.543708 +002515/063150, loss: 0.401723, avg_loss: 0.543476 +002520/063150, loss: 0.420690, avg_loss: 0.543165 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 2520/63150: {'accuracy': 0.8084862385321101} +002525/063150, loss: 0.322749, avg_loss: 0.542914 +002530/063150, loss: 0.446834, avg_loss: 0.542709 +002535/063150, loss: 0.247090, avg_loss: 0.542372 +002540/063150, loss: 0.217799, avg_loss: 0.541945 +002545/063150, loss: 0.416651, avg_loss: 0.541675 +002550/063150, loss: 0.296513, avg_loss: 0.541274 +002555/063150, loss: 0.326615, avg_loss: 0.540952 +002560/063150, loss: 0.416657, avg_loss: 0.540617 +002565/063150, loss: 0.353575, avg_loss: 0.540476 +002570/063150, loss: 0.208168, avg_loss: 0.540278 +002575/063150, loss: 0.510936, avg_loss: 0.539868 +002580/063150, loss: 0.405255, avg_loss: 0.539554 +002585/063150, loss: 0.572215, avg_loss: 0.539296 +002590/063150, loss: 0.581574, avg_loss: 0.539011 +002595/063150, loss: 0.320257, avg_loss: 0.538764 +002600/063150, loss: 0.536942, avg_loss: 0.538634 +002605/063150, loss: 0.352522, avg_loss: 0.538339 +002610/063150, loss: 0.361156, avg_loss: 0.538046 +002615/063150, loss: 0.265065, avg_loss: 0.537737 +002620/063150, loss: 0.390367, avg_loss: 0.537376 +002625/063150, loss: 0.417925, avg_loss: 0.537277 +002630/063150, loss: 0.294712, avg_loss: 0.536958 +002635/063150, loss: 0.376154, avg_loss: 0.536549 +002640/063150, loss: 0.456254, avg_loss: 0.536370 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 2640/63150: {'accuracy': 0.819954128440367} +002645/063150, loss: 0.255370, avg_loss: 0.536081 +002650/063150, loss: 0.320479, avg_loss: 0.535787 +002655/063150, loss: 0.377732, avg_loss: 0.535443 +002660/063150, loss: 0.252942, avg_loss: 0.535109 +002665/063150, loss: 0.396996, avg_loss: 0.534829 +002670/063150, loss: 0.362296, avg_loss: 0.534474 +002675/063150, loss: 0.425987, avg_loss: 0.534196 +002680/063150, loss: 0.378053, avg_loss: 0.533962 +002685/063150, loss: 0.348865, avg_loss: 0.533647 +002690/063150, loss: 0.442365, avg_loss: 0.533353 +002695/063150, loss: 0.272227, avg_loss: 0.533050 +002700/063150, loss: 0.342094, avg_loss: 0.532640 +002705/063150, loss: 0.345610, avg_loss: 0.532358 +002710/063150, loss: 0.360603, avg_loss: 0.532106 +002715/063150, loss: 0.485757, avg_loss: 0.531911 +002720/063150, loss: 0.406185, avg_loss: 0.531572 +002725/063150, loss: 0.324953, avg_loss: 0.531308 +002730/063150, loss: 0.280637, avg_loss: 0.531046 +002735/063150, loss: 0.210205, avg_loss: 0.530774 +002740/063150, loss: 0.203700, avg_loss: 0.530357 +002745/063150, loss: 0.260636, avg_loss: 0.530014 +002750/063150, loss: 0.429950, avg_loss: 0.529847 +002755/063150, loss: 0.711760, avg_loss: 0.529698 +002760/063150, loss: 0.410009, avg_loss: 0.529382 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 2760/63150: {'accuracy': 0.8142201834862385} +002765/063150, loss: 0.331034, avg_loss: 0.529057 +002770/063150, loss: 0.261881, avg_loss: 0.528702 +002775/063150, loss: 0.418627, avg_loss: 0.528421 +002780/063150, loss: 0.285539, avg_loss: 0.528087 +002785/063150, loss: 0.410718, avg_loss: 0.527839 +002790/063150, loss: 0.275710, avg_loss: 0.527568 +002795/063150, loss: 0.596097, avg_loss: 0.527231 +002800/063150, loss: 0.462545, avg_loss: 0.526970 +002805/063150, loss: 0.427958, avg_loss: 0.526764 +002810/063150, loss: 0.438083, avg_loss: 0.526567 +002815/063150, loss: 0.441787, avg_loss: 0.526307 +002820/063150, loss: 0.442998, avg_loss: 0.526128 +002825/063150, loss: 0.377224, avg_loss: 0.525880 +002830/063150, loss: 0.289482, avg_loss: 0.525707 +002835/063150, loss: 0.354323, avg_loss: 0.525438 +002840/063150, loss: 0.356212, avg_loss: 0.525252 +002845/063150, loss: 0.291039, avg_loss: 0.524944 +002850/063150, loss: 0.384137, avg_loss: 0.524755 +002855/063150, loss: 0.543154, avg_loss: 0.524567 +002860/063150, loss: 0.318191, avg_loss: 0.524278 +002865/063150, loss: 0.224320, avg_loss: 0.523993 +002870/063150, loss: 0.429259, avg_loss: 0.523760 +002875/063150, loss: 0.394798, avg_loss: 0.523539 +002880/063150, loss: 0.289711, avg_loss: 0.523362 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 2880/63150: {'accuracy': 0.819954128440367} +002885/063150, loss: 0.423884, avg_loss: 0.523056 +002890/063150, loss: 0.294128, avg_loss: 0.522815 +002895/063150, loss: 0.219022, avg_loss: 0.522560 +002900/063150, loss: 0.478649, avg_loss: 0.522309 +002905/063150, loss: 0.404638, avg_loss: 0.522083 +002910/063150, loss: 0.420214, avg_loss: 0.521777 +002915/063150, loss: 0.392127, avg_loss: 0.521563 +002920/063150, loss: 0.239749, avg_loss: 0.521218 +002925/063150, loss: 0.458743, avg_loss: 0.520987 +002930/063150, loss: 0.178244, avg_loss: 0.520637 +002935/063150, loss: 0.507570, avg_loss: 0.520337 +002940/063150, loss: 0.430135, avg_loss: 0.520116 +002945/063150, loss: 0.311901, avg_loss: 0.519821 +002950/063150, loss: 0.385186, avg_loss: 0.519534 +002955/063150, loss: 0.530890, avg_loss: 0.519227 +002960/063150, loss: 0.282289, avg_loss: 0.518936 +002965/063150, loss: 0.295678, avg_loss: 0.518628 +002970/063150, loss: 0.287114, avg_loss: 0.518313 +002975/063150, loss: 0.177844, avg_loss: 0.517955 +002980/063150, loss: 0.492239, avg_loss: 0.517671 +002985/063150, loss: 0.366426, avg_loss: 0.517487 +002990/063150, loss: 0.343064, avg_loss: 0.517248 +002995/063150, loss: 0.319809, avg_loss: 0.516827 +003000/063150, loss: 0.431824, avg_loss: 0.516586 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3000/63150: {'accuracy': 0.8245412844036697} +003005/063150, loss: 0.533326, avg_loss: 0.516348 +003010/063150, loss: 0.299756, avg_loss: 0.516209 +003015/063150, loss: 0.238364, avg_loss: 0.516007 +003020/063150, loss: 0.465790, avg_loss: 0.515813 +003025/063150, loss: 0.468388, avg_loss: 0.515642 +003030/063150, loss: 0.334590, avg_loss: 0.515271 +003035/063150, loss: 0.352698, avg_loss: 0.515002 +003040/063150, loss: 0.394218, avg_loss: 0.514811 +003045/063150, loss: 0.400462, avg_loss: 0.514603 +003050/063150, loss: 0.174596, avg_loss: 0.514233 +003055/063150, loss: 0.380684, avg_loss: 0.513951 +003060/063150, loss: 0.443890, avg_loss: 0.513707 +003065/063150, loss: 0.350919, avg_loss: 0.513352 +003070/063150, loss: 0.367730, avg_loss: 0.513119 +003075/063150, loss: 0.303897, avg_loss: 0.512880 +003080/063150, loss: 0.339126, avg_loss: 0.512622 +003085/063150, loss: 0.172532, avg_loss: 0.512337 +003090/063150, loss: 0.296662, avg_loss: 0.511963 +003095/063150, loss: 0.265311, avg_loss: 0.511643 +003100/063150, loss: 0.547116, avg_loss: 0.511435 +003105/063150, loss: 0.486903, avg_loss: 0.511267 +003110/063150, loss: 0.357828, avg_loss: 0.511004 +003115/063150, loss: 0.288122, avg_loss: 0.510716 +003120/063150, loss: 0.169984, avg_loss: 0.510339 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3120/63150: {'accuracy': 0.8291284403669725} +003125/063150, loss: 0.391413, avg_loss: 0.510143 +003130/063150, loss: 0.410817, avg_loss: 0.509878 +003135/063150, loss: 0.227384, avg_loss: 0.509497 +003140/063150, loss: 0.245751, avg_loss: 0.509240 +003145/063150, loss: 0.329388, avg_loss: 0.509190 +003150/063150, loss: 0.517729, avg_loss: 0.508962 +003155/063150, loss: 0.410280, avg_loss: 0.508690 +003160/063150, loss: 0.255345, avg_loss: 0.508490 +003165/063150, loss: 0.467873, avg_loss: 0.508244 +003170/063150, loss: 0.609531, avg_loss: 0.508100 +003175/063150, loss: 0.207893, avg_loss: 0.507832 +003180/063150, loss: 0.461324, avg_loss: 0.507690 +003185/063150, loss: 0.293929, avg_loss: 0.507442 +003190/063150, loss: 0.342324, avg_loss: 0.507173 +003195/063150, loss: 0.364676, avg_loss: 0.506903 +003200/063150, loss: 0.273666, avg_loss: 0.506640 +003205/063150, loss: 0.239925, avg_loss: 0.506290 +003210/063150, loss: 0.255106, avg_loss: 0.506002 +003215/063150, loss: 0.288060, avg_loss: 0.505724 +003220/063150, loss: 0.405391, avg_loss: 0.505464 +003225/063150, loss: 0.269861, avg_loss: 0.505234 +003230/063150, loss: 0.193510, avg_loss: 0.504942 +003235/063150, loss: 0.433163, avg_loss: 0.504727 +003240/063150, loss: 0.211067, avg_loss: 0.504363 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3240/63150: {'accuracy': 0.8291284403669725} +003245/063150, loss: 0.484763, avg_loss: 0.504067 +003250/063150, loss: 0.291922, avg_loss: 0.503777 +003255/063150, loss: 0.494730, avg_loss: 0.503512 +003260/063150, loss: 0.255081, avg_loss: 0.503354 +003265/063150, loss: 0.481848, avg_loss: 0.503102 +003270/063150, loss: 0.417441, avg_loss: 0.502899 +003275/063150, loss: 0.200572, avg_loss: 0.502584 +003280/063150, loss: 0.168463, avg_loss: 0.502487 +003285/063150, loss: 0.210876, avg_loss: 0.502319 +003290/063150, loss: 0.287843, avg_loss: 0.501946 +003295/063150, loss: 0.181558, avg_loss: 0.501571 +003300/063150, loss: 0.472590, avg_loss: 0.501398 +003305/063150, loss: 0.146369, avg_loss: 0.501337 +003310/063150, loss: 0.283992, avg_loss: 0.501068 +003315/063150, loss: 0.236916, avg_loss: 0.500815 +003320/063150, loss: 0.384485, avg_loss: 0.500522 +003325/063150, loss: 0.403200, avg_loss: 0.500360 +003330/063150, loss: 0.283864, avg_loss: 0.500092 +003335/063150, loss: 0.410497, avg_loss: 0.499943 +003340/063150, loss: 0.318582, avg_loss: 0.499752 +003345/063150, loss: 0.466494, avg_loss: 0.499531 +003350/063150, loss: 0.343706, avg_loss: 0.499330 +003355/063150, loss: 0.322908, avg_loss: 0.499076 +003360/063150, loss: 0.609332, avg_loss: 0.498971 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3360/63150: {'accuracy': 0.8176605504587156} +003365/063150, loss: 0.348086, avg_loss: 0.498690 +003370/063150, loss: 0.363859, avg_loss: 0.498392 +003375/063150, loss: 0.370918, avg_loss: 0.498175 +003380/063150, loss: 0.189442, avg_loss: 0.497851 +003385/063150, loss: 0.250529, avg_loss: 0.497506 +003390/063150, loss: 0.350957, avg_loss: 0.497272 +003395/063150, loss: 0.291620, avg_loss: 0.497034 +003400/063150, loss: 0.283865, avg_loss: 0.496783 +003405/063150, loss: 0.355665, avg_loss: 0.496519 +003410/063150, loss: 0.156031, avg_loss: 0.496212 +003415/063150, loss: 0.326745, avg_loss: 0.495975 +003420/063150, loss: 0.273052, avg_loss: 0.495651 +003425/063150, loss: 0.648556, avg_loss: 0.495533 +003430/063150, loss: 0.430986, avg_loss: 0.495275 +003435/063150, loss: 0.338636, avg_loss: 0.494997 +003440/063150, loss: 0.266915, avg_loss: 0.494701 +003445/063150, loss: 0.241296, avg_loss: 0.494522 +003450/063150, loss: 0.416952, avg_loss: 0.494355 +003455/063150, loss: 0.210606, avg_loss: 0.494159 +003460/063150, loss: 0.300003, avg_loss: 0.493918 +003465/063150, loss: 0.188219, avg_loss: 0.493744 +003470/063150, loss: 0.333949, avg_loss: 0.493513 +003475/063150, loss: 0.575965, avg_loss: 0.493269 +003480/063150, loss: 0.398163, avg_loss: 0.493014 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3480/63150: {'accuracy': 0.8245412844036697} +003485/063150, loss: 0.329946, avg_loss: 0.492715 +003490/063150, loss: 0.276797, avg_loss: 0.492574 +003495/063150, loss: 0.271636, avg_loss: 0.492447 +003500/063150, loss: 0.318183, avg_loss: 0.492189 +003505/063150, loss: 0.287863, avg_loss: 0.491960 +003510/063150, loss: 0.241224, avg_loss: 0.491692 +003515/063150, loss: 0.248229, avg_loss: 0.491466 +003520/063150, loss: 0.203357, avg_loss: 0.491085 +003525/063150, loss: 0.334120, avg_loss: 0.490832 +003530/063150, loss: 0.816127, avg_loss: 0.490780 +003535/063150, loss: 0.374384, avg_loss: 0.490629 +003540/063150, loss: 0.251941, avg_loss: 0.490391 +003545/063150, loss: 0.275866, avg_loss: 0.490227 +003550/063150, loss: 0.276895, avg_loss: 0.489930 +003555/063150, loss: 0.383942, avg_loss: 0.489838 +003560/063150, loss: 0.227589, avg_loss: 0.489613 +003565/063150, loss: 0.407125, avg_loss: 0.489610 +003570/063150, loss: 0.258336, avg_loss: 0.489305 +003575/063150, loss: 0.380267, avg_loss: 0.489088 +003580/063150, loss: 0.297157, avg_loss: 0.488990 +003585/063150, loss: 0.397631, avg_loss: 0.488827 +003590/063150, loss: 0.203452, avg_loss: 0.488615 +003595/063150, loss: 0.439270, avg_loss: 0.488440 +003600/063150, loss: 0.205019, avg_loss: 0.488162 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3600/63150: {'accuracy': 0.8084862385321101} +003605/063150, loss: 0.189155, avg_loss: 0.487951 +003610/063150, loss: 0.229313, avg_loss: 0.487661 +003615/063150, loss: 0.370952, avg_loss: 0.487494 +003620/063150, loss: 0.205126, avg_loss: 0.487224 +003625/063150, loss: 0.437848, avg_loss: 0.487051 +003630/063150, loss: 0.383554, avg_loss: 0.486768 +003635/063150, loss: 0.427163, avg_loss: 0.486577 +003640/063150, loss: 0.425560, avg_loss: 0.486343 +003645/063150, loss: 0.226355, avg_loss: 0.486214 +003650/063150, loss: 0.568268, avg_loss: 0.486013 +003655/063150, loss: 0.340532, avg_loss: 0.485834 +003660/063150, loss: 0.338089, avg_loss: 0.485783 +003665/063150, loss: 0.175585, avg_loss: 0.485615 +003670/063150, loss: 0.401298, avg_loss: 0.485494 +003675/063150, loss: 0.354530, avg_loss: 0.485397 +003680/063150, loss: 0.398257, avg_loss: 0.485189 +003685/063150, loss: 0.413964, avg_loss: 0.484986 +003690/063150, loss: 0.495796, avg_loss: 0.484827 +003695/063150, loss: 0.500072, avg_loss: 0.484619 +003700/063150, loss: 0.432035, avg_loss: 0.484414 +003705/063150, loss: 0.179378, avg_loss: 0.484087 +003710/063150, loss: 0.238655, avg_loss: 0.483893 +003715/063150, loss: 0.514123, avg_loss: 0.483672 +003720/063150, loss: 0.344935, avg_loss: 0.483506 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3720/63150: {'accuracy': 0.8279816513761468} +003725/063150, loss: 0.218731, avg_loss: 0.483263 +003730/063150, loss: 0.280191, avg_loss: 0.483006 +003735/063150, loss: 0.286088, avg_loss: 0.482821 +003740/063150, loss: 0.322779, avg_loss: 0.482655 +003745/063150, loss: 0.337575, avg_loss: 0.482444 +003750/063150, loss: 0.224053, avg_loss: 0.482281 +003755/063150, loss: 0.178751, avg_loss: 0.481987 +003760/063150, loss: 0.198184, avg_loss: 0.481745 +003765/063150, loss: 0.360390, avg_loss: 0.481527 +003770/063150, loss: 0.153401, avg_loss: 0.481235 +003775/063150, loss: 0.308784, avg_loss: 0.480957 +003780/063150, loss: 0.147440, avg_loss: 0.480649 +003785/063150, loss: 0.244711, avg_loss: 0.480516 +003790/063150, loss: 0.159410, avg_loss: 0.480247 +003795/063150, loss: 0.300179, avg_loss: 0.480061 +003800/063150, loss: 0.349719, avg_loss: 0.479742 +003805/063150, loss: 0.333127, avg_loss: 0.479573 +003810/063150, loss: 0.512503, avg_loss: 0.479473 +003815/063150, loss: 0.316065, avg_loss: 0.479315 +003820/063150, loss: 0.394462, avg_loss: 0.479157 +003825/063150, loss: 0.229573, avg_loss: 0.478914 +003830/063150, loss: 0.257653, avg_loss: 0.478606 +003835/063150, loss: 0.181622, avg_loss: 0.478397 +003840/063150, loss: 0.276539, avg_loss: 0.478220 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3840/63150: {'accuracy': 0.823394495412844} +003845/063150, loss: 0.205788, avg_loss: 0.478044 +003850/063150, loss: 0.269425, avg_loss: 0.477838 +003855/063150, loss: 0.313539, avg_loss: 0.477612 +003860/063150, loss: 0.481410, avg_loss: 0.477505 +003865/063150, loss: 0.326335, avg_loss: 0.477307 +003870/063150, loss: 0.277892, avg_loss: 0.477151 +003875/063150, loss: 0.485127, avg_loss: 0.476976 +003880/063150, loss: 0.130770, avg_loss: 0.476737 +003885/063150, loss: 0.357989, avg_loss: 0.476603 +003890/063150, loss: 0.414372, avg_loss: 0.476369 +003895/063150, loss: 0.511294, avg_loss: 0.476176 +003900/063150, loss: 0.281438, avg_loss: 0.475922 +003905/063150, loss: 0.522367, avg_loss: 0.475730 +003910/063150, loss: 0.193221, avg_loss: 0.475454 +003915/063150, loss: 0.312487, avg_loss: 0.475274 +003920/063150, loss: 0.246824, avg_loss: 0.475143 +003925/063150, loss: 0.424371, avg_loss: 0.474956 +003930/063150, loss: 0.191638, avg_loss: 0.474817 +003935/063150, loss: 0.328832, avg_loss: 0.474663 +003940/063150, loss: 0.167447, avg_loss: 0.474381 +003945/063150, loss: 0.192950, avg_loss: 0.474199 +003950/063150, loss: 0.143713, avg_loss: 0.474006 +003955/063150, loss: 0.312656, avg_loss: 0.473838 +003960/063150, loss: 0.180376, avg_loss: 0.473684 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 3960/63150: {'accuracy': 0.8142201834862385} +003965/063150, loss: 0.245379, avg_loss: 0.473425 +003970/063150, loss: 0.414509, avg_loss: 0.473334 +003975/063150, loss: 0.573777, avg_loss: 0.473283 +003980/063150, loss: 0.431763, avg_loss: 0.473142 +003985/063150, loss: 0.336310, avg_loss: 0.472953 +003990/063150, loss: 0.506686, avg_loss: 0.472871 +003995/063150, loss: 0.242396, avg_loss: 0.472633 +004000/063150, loss: 0.257115, avg_loss: 0.472442 +004005/063150, loss: 0.308776, avg_loss: 0.472229 +004010/063150, loss: 0.200311, avg_loss: 0.472067 +004015/063150, loss: 0.298820, avg_loss: 0.471853 +004020/063150, loss: 0.227573, avg_loss: 0.471579 +004025/063150, loss: 0.319509, avg_loss: 0.471440 +004030/063150, loss: 0.606248, avg_loss: 0.471350 +004035/063150, loss: 0.578137, avg_loss: 0.471285 +004040/063150, loss: 0.344481, avg_loss: 0.471059 +004045/063150, loss: 0.162110, avg_loss: 0.470830 +004050/063150, loss: 0.299887, avg_loss: 0.470668 +004055/063150, loss: 0.321251, avg_loss: 0.470464 +004060/063150, loss: 0.298908, avg_loss: 0.470328 +004065/063150, loss: 0.316801, avg_loss: 0.470166 +004070/063150, loss: 0.286965, avg_loss: 0.470026 +004075/063150, loss: 0.245791, avg_loss: 0.469788 +004080/063150, loss: 0.433150, avg_loss: 0.469643 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 4080/63150: {'accuracy': 0.8188073394495413} +004085/063150, loss: 0.402995, avg_loss: 0.469398 +004090/063150, loss: 0.283430, avg_loss: 0.469245 +004095/063150, loss: 0.270191, avg_loss: 0.469029 +004100/063150, loss: 0.261287, avg_loss: 0.468912 +004105/063150, loss: 0.254643, avg_loss: 0.468666 +004110/063150, loss: 0.412092, avg_loss: 0.468509 +004115/063150, loss: 0.427600, avg_loss: 0.468354 +004120/063150, loss: 0.224203, avg_loss: 0.468168 +004125/063150, loss: 0.184526, avg_loss: 0.467871 +004130/063150, loss: 0.336193, avg_loss: 0.467726 +004135/063150, loss: 0.524905, avg_loss: 0.467570 +004140/063150, loss: 0.208997, avg_loss: 0.467373 +004145/063150, loss: 0.213856, avg_loss: 0.467190 +004150/063150, loss: 0.441607, avg_loss: 0.467003 +004155/063150, loss: 0.515599, avg_loss: 0.466883 +004160/063150, loss: 0.283904, avg_loss: 0.466775 +004165/063150, loss: 0.197917, avg_loss: 0.466519 +004170/063150, loss: 0.310240, avg_loss: 0.466277 +004175/063150, loss: 0.189847, avg_loss: 0.466058 +004180/063150, loss: 0.162403, avg_loss: 0.465853 +004185/063150, loss: 0.368976, avg_loss: 0.465715 +004190/063150, loss: 0.149773, avg_loss: 0.465442 +004195/063150, loss: 0.170256, avg_loss: 0.465309 +004200/063150, loss: 0.327217, avg_loss: 0.465107 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 1, step 4200/63150: {'accuracy': 0.8279816513761468} +004205/063150, loss: 0.534513, avg_loss: 0.464948 +004210/063150, loss: 0.280758, avg_loss: 0.464745 +004215/063150, loss: 0.419127, avg_loss: 0.464535 +004220/063150, loss: 0.263336, avg_loss: 0.464373 +004225/063150, loss: 0.216963, avg_loss: 0.464180 +004230/063150, loss: 0.202848, avg_loss: 0.463952 +004235/063150, loss: 0.252699, avg_loss: 0.463746 +004240/063150, loss: 0.280739, avg_loss: 0.463539 +004245/063150, loss: 0.243150, avg_loss: 0.463346 +004250/063150, loss: 0.222552, avg_loss: 0.463098 +004255/063150, loss: 0.158287, avg_loss: 0.462887 +004260/063150, loss: 0.394647, avg_loss: 0.462672 +004265/063150, loss: 0.295818, avg_loss: 0.462479 +004270/063150, loss: 0.453301, avg_loss: 0.462292 +004275/063150, loss: 0.273588, avg_loss: 0.462118 +004280/063150, loss: 0.347643, avg_loss: 0.461983 +004285/063150, loss: 0.333519, avg_loss: 0.461748 +004290/063150, loss: 0.097076, avg_loss: 0.461539 +004295/063150, loss: 0.366240, avg_loss: 0.461389 +004300/063150, loss: 0.298476, avg_loss: 0.461151 +004305/063150, loss: 0.467995, avg_loss: 0.460991 +004310/063150, loss: 0.200606, avg_loss: 0.460794 +004315/063150, loss: 0.245891, avg_loss: 0.460592 +004320/063150, loss: 0.358560, avg_loss: 0.460404 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 4320/63150: {'accuracy': 0.8302752293577982} +004325/063150, loss: 0.204014, avg_loss: 0.460226 +004330/063150, loss: 0.197800, avg_loss: 0.460075 +004335/063150, loss: 0.227778, avg_loss: 0.459917 +004340/063150, loss: 0.349984, avg_loss: 0.459709 +004345/063150, loss: 0.303346, avg_loss: 0.459513 +004350/063150, loss: 0.409578, avg_loss: 0.459373 +004355/063150, loss: 0.295931, avg_loss: 0.459159 +004360/063150, loss: 0.304503, avg_loss: 0.458994 +004365/063150, loss: 0.150120, avg_loss: 0.458815 +004370/063150, loss: 0.402723, avg_loss: 0.458620 +004375/063150, loss: 0.281013, avg_loss: 0.458396 +004380/063150, loss: 0.358963, avg_loss: 0.458200 +004385/063150, loss: 0.284151, avg_loss: 0.458011 +004390/063150, loss: 0.591740, avg_loss: 0.457902 +004395/063150, loss: 0.235476, avg_loss: 0.457620 +004400/063150, loss: 0.387758, avg_loss: 0.457485 +004405/063150, loss: 0.532757, avg_loss: 0.457315 +004410/063150, loss: 0.239686, avg_loss: 0.457112 +004415/063150, loss: 0.276296, avg_loss: 0.456937 +004420/063150, loss: 0.229210, avg_loss: 0.456741 +004425/063150, loss: 0.433873, avg_loss: 0.456632 +004430/063150, loss: 0.210645, avg_loss: 0.456422 +004435/063150, loss: 0.238904, avg_loss: 0.456262 +004440/063150, loss: 0.187905, avg_loss: 0.456049 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 4440/63150: {'accuracy': 0.8371559633027523} +004445/063150, loss: 0.487801, avg_loss: 0.455872 +004450/063150, loss: 0.245235, avg_loss: 0.455642 +004455/063150, loss: 0.208553, avg_loss: 0.455426 +004460/063150, loss: 0.215836, avg_loss: 0.455196 +004465/063150, loss: 0.178831, avg_loss: 0.455039 +004470/063150, loss: 0.450658, avg_loss: 0.454869 +004475/063150, loss: 0.105808, avg_loss: 0.454571 +004480/063150, loss: 0.325373, avg_loss: 0.454372 +004485/063150, loss: 0.287978, avg_loss: 0.454114 +004490/063150, loss: 0.241827, avg_loss: 0.453863 +004495/063150, loss: 0.312952, avg_loss: 0.453664 +004500/063150, loss: 0.278380, avg_loss: 0.453492 +004505/063150, loss: 0.210514, avg_loss: 0.453240 +004510/063150, loss: 0.364019, avg_loss: 0.453080 +004515/063150, loss: 0.503646, avg_loss: 0.453042 +004520/063150, loss: 0.317447, avg_loss: 0.452801 +004525/063150, loss: 0.251832, avg_loss: 0.452619 +004530/063150, loss: 0.153919, avg_loss: 0.452398 +004535/063150, loss: 0.239300, avg_loss: 0.452199 +004540/063150, loss: 0.300717, avg_loss: 0.451941 +004545/063150, loss: 0.258976, avg_loss: 0.451722 +004550/063150, loss: 0.370137, avg_loss: 0.451493 +004555/063150, loss: 0.324233, avg_loss: 0.451366 +004560/063150, loss: 0.174304, avg_loss: 0.451143 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 4560/63150: {'accuracy': 0.8027522935779816} +004565/063150, loss: 0.263388, avg_loss: 0.450991 +004570/063150, loss: 0.153013, avg_loss: 0.450785 +004575/063150, loss: 0.285290, avg_loss: 0.450510 +004580/063150, loss: 0.049658, avg_loss: 0.450311 +004585/063150, loss: 0.401840, avg_loss: 0.450169 +004590/063150, loss: 0.418982, avg_loss: 0.450130 +004595/063150, loss: 0.198632, avg_loss: 0.449935 +004600/063150, loss: 0.458129, avg_loss: 0.449748 +004605/063150, loss: 0.417927, avg_loss: 0.449541 +004610/063150, loss: 0.203784, avg_loss: 0.449399 +004615/063150, loss: 0.186797, avg_loss: 0.449177 +004620/063150, loss: 0.146794, avg_loss: 0.449031 +004625/063150, loss: 0.146629, avg_loss: 0.448768 +004630/063150, loss: 0.333879, avg_loss: 0.448581 +004635/063150, loss: 0.261697, avg_loss: 0.448384 +004640/063150, loss: 0.289962, avg_loss: 0.448185 +004645/063150, loss: 0.289506, avg_loss: 0.447938 +004650/063150, loss: 0.339763, avg_loss: 0.447763 +004655/063150, loss: 0.183156, avg_loss: 0.447539 +004660/063150, loss: 0.454979, avg_loss: 0.447429 +004665/063150, loss: 0.425277, avg_loss: 0.447261 +004670/063150, loss: 0.166446, avg_loss: 0.447027 +004675/063150, loss: 0.427591, avg_loss: 0.446802 +004680/063150, loss: 0.432299, avg_loss: 0.446649 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 4680/63150: {'accuracy': 0.8394495412844036} +004685/063150, loss: 0.238948, avg_loss: 0.446484 +004690/063150, loss: 0.627134, avg_loss: 0.446360 +004695/063150, loss: 0.271760, avg_loss: 0.446131 +004700/063150, loss: 0.209785, avg_loss: 0.445915 +004705/063150, loss: 0.203467, avg_loss: 0.445696 +004710/063150, loss: 0.474783, avg_loss: 0.445573 +004715/063150, loss: 0.420667, avg_loss: 0.445427 +004720/063150, loss: 0.195672, avg_loss: 0.445222 +004725/063150, loss: 0.321105, avg_loss: 0.445016 +004730/063150, loss: 0.539818, avg_loss: 0.444896 +004735/063150, loss: 0.223064, avg_loss: 0.444749 +004740/063150, loss: 0.395136, avg_loss: 0.444562 +004745/063150, loss: 0.198465, avg_loss: 0.444423 +004750/063150, loss: 0.278540, avg_loss: 0.444251 +004755/063150, loss: 0.349364, avg_loss: 0.444091 +004760/063150, loss: 0.229996, avg_loss: 0.443912 +004765/063150, loss: 0.360927, avg_loss: 0.443755 +004770/063150, loss: 0.201597, avg_loss: 0.443511 +004775/063150, loss: 0.275852, avg_loss: 0.443293 +004780/063150, loss: 0.365218, avg_loss: 0.443081 +004785/063150, loss: 0.312531, avg_loss: 0.442895 +004790/063150, loss: 0.205163, avg_loss: 0.442677 +004795/063150, loss: 0.198468, avg_loss: 0.442459 +004800/063150, loss: 0.316452, avg_loss: 0.442294 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 4800/63150: {'accuracy': 0.8428899082568807} +004805/063150, loss: 0.199581, avg_loss: 0.442073 +004810/063150, loss: 0.278382, avg_loss: 0.441916 +004815/063150, loss: 0.286555, avg_loss: 0.441730 +004820/063150, loss: 0.227525, avg_loss: 0.441465 +004825/063150, loss: 0.103403, avg_loss: 0.441270 +004830/063150, loss: 0.153055, avg_loss: 0.441054 +004835/063150, loss: 0.270689, avg_loss: 0.440880 +004840/063150, loss: 0.230690, avg_loss: 0.440697 +004845/063150, loss: 0.290590, avg_loss: 0.440567 +004850/063150, loss: 0.168618, avg_loss: 0.440329 +004855/063150, loss: 0.341406, avg_loss: 0.440188 +004860/063150, loss: 0.285294, avg_loss: 0.440123 +004865/063150, loss: 0.309046, avg_loss: 0.439895 +004870/063150, loss: 0.381336, avg_loss: 0.439703 +004875/063150, loss: 0.290316, avg_loss: 0.439567 +004880/063150, loss: 0.149874, avg_loss: 0.439405 +004885/063150, loss: 0.328731, avg_loss: 0.439294 +004890/063150, loss: 0.154876, avg_loss: 0.439079 +004895/063150, loss: 0.409986, avg_loss: 0.438900 +004900/063150, loss: 0.389197, avg_loss: 0.438775 +004905/063150, loss: 0.285581, avg_loss: 0.438648 +004910/063150, loss: 0.250653, avg_loss: 0.438452 +004915/063150, loss: 0.210221, avg_loss: 0.438304 +004920/063150, loss: 0.244220, avg_loss: 0.438145 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 4920/63150: {'accuracy': 0.841743119266055} +004925/063150, loss: 0.216394, avg_loss: 0.437980 +004930/063150, loss: 0.448297, avg_loss: 0.437806 +004935/063150, loss: 0.372978, avg_loss: 0.437691 +004940/063150, loss: 0.189112, avg_loss: 0.437556 +004945/063150, loss: 0.400141, avg_loss: 0.437408 +004950/063150, loss: 0.246144, avg_loss: 0.437198 +004955/063150, loss: 0.415682, avg_loss: 0.437082 +004960/063150, loss: 0.267845, avg_loss: 0.436933 +004965/063150, loss: 0.191768, avg_loss: 0.436742 +004970/063150, loss: 0.292821, avg_loss: 0.436566 +004975/063150, loss: 0.302429, avg_loss: 0.436411 +004980/063150, loss: 0.297605, avg_loss: 0.436181 +004985/063150, loss: 0.461785, avg_loss: 0.435991 +004990/063150, loss: 0.198792, avg_loss: 0.435716 +004995/063150, loss: 0.503127, avg_loss: 0.435565 +005000/063150, loss: 0.429058, avg_loss: 0.435420 +005005/063150, loss: 0.300414, avg_loss: 0.435222 +005010/063150, loss: 0.288963, avg_loss: 0.435010 +005015/063150, loss: 0.238046, avg_loss: 0.434812 +005020/063150, loss: 0.263563, avg_loss: 0.434594 +005025/063150, loss: 0.256921, avg_loss: 0.434476 +005030/063150, loss: 0.147477, avg_loss: 0.434275 +005035/063150, loss: 0.394888, avg_loss: 0.434118 +005040/063150, loss: 0.294512, avg_loss: 0.433977 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 5040/63150: {'accuracy': 0.8474770642201835} +005045/063150, loss: 0.286427, avg_loss: 0.433767 +005050/063150, loss: 0.365111, avg_loss: 0.433589 +005055/063150, loss: 0.264320, avg_loss: 0.433527 +005060/063150, loss: 0.165543, avg_loss: 0.433324 +005065/063150, loss: 0.223556, avg_loss: 0.433158 +005070/063150, loss: 0.285883, avg_loss: 0.432927 +005075/063150, loss: 0.114102, avg_loss: 0.432806 +005080/063150, loss: 0.281807, avg_loss: 0.432717 +005085/063150, loss: 0.189452, avg_loss: 0.432565 +005090/063150, loss: 0.424987, avg_loss: 0.432412 +005095/063150, loss: 0.285115, avg_loss: 0.432226 +005100/063150, loss: 0.424671, avg_loss: 0.432093 +005105/063150, loss: 0.252718, avg_loss: 0.431929 +005110/063150, loss: 0.233115, avg_loss: 0.431757 +005115/063150, loss: 0.284833, avg_loss: 0.431530 +005120/063150, loss: 0.302364, avg_loss: 0.431387 +005125/063150, loss: 0.226655, avg_loss: 0.431239 +005130/063150, loss: 0.587139, avg_loss: 0.431109 +005135/063150, loss: 0.137232, avg_loss: 0.430907 +005140/063150, loss: 0.383301, avg_loss: 0.430851 +005145/063150, loss: 0.310483, avg_loss: 0.430627 +005150/063150, loss: 0.170012, avg_loss: 0.430463 +005155/063150, loss: 0.165516, avg_loss: 0.430218 +005160/063150, loss: 0.222842, avg_loss: 0.430030 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 5160/63150: {'accuracy': 0.8463302752293578} +005165/063150, loss: 0.238556, avg_loss: 0.429955 +005170/063150, loss: 0.465534, avg_loss: 0.429805 +005175/063150, loss: 0.270709, avg_loss: 0.429676 +005180/063150, loss: 0.161318, avg_loss: 0.429460 +005185/063150, loss: 0.235425, avg_loss: 0.429273 +005190/063150, loss: 0.214050, avg_loss: 0.429125 +005195/063150, loss: 0.371211, avg_loss: 0.429032 +005200/063150, loss: 0.305828, avg_loss: 0.428797 +005205/063150, loss: 0.262175, avg_loss: 0.428669 +005210/063150, loss: 0.165639, avg_loss: 0.428471 +005215/063150, loss: 0.287391, avg_loss: 0.428338 +005220/063150, loss: 0.165492, avg_loss: 0.428091 +005225/063150, loss: 0.211443, avg_loss: 0.427916 +005230/063150, loss: 0.269998, avg_loss: 0.427763 +005235/063150, loss: 0.255843, avg_loss: 0.427645 +005240/063150, loss: 0.229471, avg_loss: 0.427557 +005245/063150, loss: 0.088537, avg_loss: 0.427369 +005250/063150, loss: 0.264156, avg_loss: 0.427197 +005255/063150, loss: 0.191877, avg_loss: 0.426997 +005260/063150, loss: 0.126016, avg_loss: 0.426827 +005265/063150, loss: 0.335275, avg_loss: 0.426693 +005270/063150, loss: 0.229536, avg_loss: 0.426524 +005275/063150, loss: 0.235785, avg_loss: 0.426385 +005280/063150, loss: 0.276160, avg_loss: 0.426165 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 5280/63150: {'accuracy': 0.8325688073394495} +005285/063150, loss: 0.280916, avg_loss: 0.425973 +005290/063150, loss: 0.270638, avg_loss: 0.425829 +005295/063150, loss: 0.174957, avg_loss: 0.425669 +005300/063150, loss: 0.116287, avg_loss: 0.425525 +005305/063150, loss: 0.313111, avg_loss: 0.425400 +005310/063150, loss: 0.334110, avg_loss: 0.425252 +005315/063150, loss: 0.349562, avg_loss: 0.425126 +005320/063150, loss: 0.283849, avg_loss: 0.425032 +005325/063150, loss: 0.204222, avg_loss: 0.424851 +005330/063150, loss: 0.261867, avg_loss: 0.424644 +005335/063150, loss: 0.343752, avg_loss: 0.424509 +005340/063150, loss: 0.355949, avg_loss: 0.424404 +005345/063150, loss: 0.262267, avg_loss: 0.424234 +005350/063150, loss: 0.245239, avg_loss: 0.424023 +005355/063150, loss: 0.124010, avg_loss: 0.423851 +005360/063150, loss: 0.318031, avg_loss: 0.423722 +005365/063150, loss: 0.179093, avg_loss: 0.423529 +005370/063150, loss: 0.369743, avg_loss: 0.423343 +005375/063150, loss: 0.411769, avg_loss: 0.423239 +005380/063150, loss: 0.172745, avg_loss: 0.423072 +005385/063150, loss: 0.395810, avg_loss: 0.422913 +005390/063150, loss: 0.281775, avg_loss: 0.422846 +005395/063150, loss: 0.159977, avg_loss: 0.422638 +005400/063150, loss: 0.245816, avg_loss: 0.422545 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 5400/63150: {'accuracy': 0.8474770642201835} +005405/063150, loss: 0.222701, avg_loss: 0.422430 +005410/063150, loss: 0.411298, avg_loss: 0.422258 +005415/063150, loss: 0.189946, avg_loss: 0.422122 +005420/063150, loss: 0.125218, avg_loss: 0.421945 +005425/063150, loss: 0.296084, avg_loss: 0.421884 +005430/063150, loss: 0.181411, avg_loss: 0.421677 +005435/063150, loss: 0.342984, avg_loss: 0.421520 +005440/063150, loss: 0.138109, avg_loss: 0.421338 +005445/063150, loss: 0.298496, avg_loss: 0.421234 +005450/063150, loss: 0.126139, avg_loss: 0.421085 +005455/063150, loss: 0.335512, avg_loss: 0.420977 +005460/063150, loss: 0.315063, avg_loss: 0.420867 +005465/063150, loss: 0.271297, avg_loss: 0.420714 +005470/063150, loss: 0.173331, avg_loss: 0.420556 +005475/063150, loss: 0.132915, avg_loss: 0.420399 +005480/063150, loss: 0.444826, avg_loss: 0.420278 +005485/063150, loss: 0.247393, avg_loss: 0.420095 +005490/063150, loss: 0.144030, avg_loss: 0.419938 +005495/063150, loss: 0.251728, avg_loss: 0.419799 +005500/063150, loss: 0.282356, avg_loss: 0.419636 +005505/063150, loss: 0.526442, avg_loss: 0.419553 +005510/063150, loss: 0.246073, avg_loss: 0.419412 +005515/063150, loss: 0.308111, avg_loss: 0.419253 +005520/063150, loss: 0.310621, avg_loss: 0.419104 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 5520/63150: {'accuracy': 0.8486238532110092} +005525/063150, loss: 0.310869, avg_loss: 0.418925 +005530/063150, loss: 0.256039, avg_loss: 0.418742 +005535/063150, loss: 0.245214, avg_loss: 0.418653 +005540/063150, loss: 0.375845, avg_loss: 0.418556 +005545/063150, loss: 0.386721, avg_loss: 0.418405 +005550/063150, loss: 0.076456, avg_loss: 0.418188 +005555/063150, loss: 0.301503, avg_loss: 0.418126 +005560/063150, loss: 0.111197, avg_loss: 0.417964 +005565/063150, loss: 0.360656, avg_loss: 0.417825 +005570/063150, loss: 0.236154, avg_loss: 0.417703 +005575/063150, loss: 0.489955, avg_loss: 0.417533 +005580/063150, loss: 0.419160, avg_loss: 0.417395 +005585/063150, loss: 0.252846, avg_loss: 0.417191 +005590/063150, loss: 0.378713, avg_loss: 0.417089 +005595/063150, loss: 0.242426, avg_loss: 0.416918 +005600/063150, loss: 0.101006, avg_loss: 0.416771 +005605/063150, loss: 0.310748, avg_loss: 0.416665 +005610/063150, loss: 0.314502, avg_loss: 0.416532 +005615/063150, loss: 0.134294, avg_loss: 0.416373 +005620/063150, loss: 0.111358, avg_loss: 0.416124 +005625/063150, loss: 0.200313, avg_loss: 0.415975 +005630/063150, loss: 0.152119, avg_loss: 0.415853 +005635/063150, loss: 0.359868, avg_loss: 0.415691 +005640/063150, loss: 0.263532, avg_loss: 0.415543 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 5640/63150: {'accuracy': 0.8509174311926605} +005645/063150, loss: 0.255680, avg_loss: 0.415401 +005650/063150, loss: 0.219280, avg_loss: 0.415199 +005655/063150, loss: 0.137261, avg_loss: 0.415025 +005660/063150, loss: 0.106693, avg_loss: 0.414909 +005665/063150, loss: 0.247748, avg_loss: 0.414795 +005670/063150, loss: 0.222624, avg_loss: 0.414654 +005675/063150, loss: 0.142155, avg_loss: 0.414477 +005680/063150, loss: 0.356671, avg_loss: 0.414298 +005685/063150, loss: 0.185805, avg_loss: 0.414131 +005690/063150, loss: 0.119556, avg_loss: 0.413944 +005695/063150, loss: 0.093402, avg_loss: 0.413751 +005700/063150, loss: 0.103428, avg_loss: 0.413550 +005705/063150, loss: 0.191362, avg_loss: 0.413359 +005710/063150, loss: 0.354794, avg_loss: 0.413163 +005715/063150, loss: 0.263408, avg_loss: 0.413012 +005720/063150, loss: 0.077534, avg_loss: 0.412811 +005725/063150, loss: 0.080398, avg_loss: 0.412594 +005730/063150, loss: 0.265768, avg_loss: 0.412435 +005735/063150, loss: 0.271794, avg_loss: 0.412280 +005740/063150, loss: 0.082713, avg_loss: 0.412085 +005745/063150, loss: 0.280030, avg_loss: 0.411907 +005750/063150, loss: 0.088473, avg_loss: 0.411682 +005755/063150, loss: 0.081587, avg_loss: 0.411499 +005760/063150, loss: 0.171882, avg_loss: 0.411279 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 5760/63150: {'accuracy': 0.8474770642201835} +005765/063150, loss: 0.416375, avg_loss: 0.411165 +005770/063150, loss: 0.269925, avg_loss: 0.410981 +005775/063150, loss: 0.289832, avg_loss: 0.410822 +005780/063150, loss: 0.107895, avg_loss: 0.410649 +005785/063150, loss: 0.414067, avg_loss: 0.410539 +005790/063150, loss: 0.227315, avg_loss: 0.410407 +005795/063150, loss: 0.405493, avg_loss: 0.410256 +005800/063150, loss: 0.191451, avg_loss: 0.410080 +005805/063150, loss: 0.242881, avg_loss: 0.409952 +005810/063150, loss: 0.411584, avg_loss: 0.409858 +005815/063150, loss: 0.193257, avg_loss: 0.409707 +005820/063150, loss: 0.174544, avg_loss: 0.409572 +005825/063150, loss: 0.309917, avg_loss: 0.409427 +005830/063150, loss: 0.180061, avg_loss: 0.409250 +005835/063150, loss: 0.419392, avg_loss: 0.409187 +005840/063150, loss: 0.117432, avg_loss: 0.408997 +005845/063150, loss: 0.359205, avg_loss: 0.408869 +005850/063150, loss: 0.146162, avg_loss: 0.408702 +005855/063150, loss: 0.412644, avg_loss: 0.408624 +005860/063150, loss: 0.294692, avg_loss: 0.408494 +005865/063150, loss: 0.162512, avg_loss: 0.408370 +005870/063150, loss: 0.212495, avg_loss: 0.408199 +005875/063150, loss: 0.463353, avg_loss: 0.408061 +005880/063150, loss: 0.250324, avg_loss: 0.407958 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 5880/63150: {'accuracy': 0.8451834862385321} +005885/063150, loss: 0.373464, avg_loss: 0.407824 +005890/063150, loss: 0.064047, avg_loss: 0.407624 +005895/063150, loss: 0.139490, avg_loss: 0.407460 +005900/063150, loss: 0.152840, avg_loss: 0.407295 +005905/063150, loss: 0.198378, avg_loss: 0.407160 +005910/063150, loss: 0.096751, avg_loss: 0.407004 +005915/063150, loss: 0.259202, avg_loss: 0.406893 +005920/063150, loss: 0.214933, avg_loss: 0.406752 +005925/063150, loss: 0.333430, avg_loss: 0.406673 +005930/063150, loss: 0.246953, avg_loss: 0.406552 +005935/063150, loss: 0.163623, avg_loss: 0.406404 +005940/063150, loss: 0.180992, avg_loss: 0.406240 +005945/063150, loss: 0.252869, avg_loss: 0.406076 +005950/063150, loss: 0.334405, avg_loss: 0.405930 +005955/063150, loss: 0.406325, avg_loss: 0.405770 +005960/063150, loss: 0.252790, avg_loss: 0.405649 +005965/063150, loss: 0.133011, avg_loss: 0.405508 +005970/063150, loss: 0.165406, avg_loss: 0.405336 +005975/063150, loss: 0.296072, avg_loss: 0.405239 +005980/063150, loss: 0.252766, avg_loss: 0.405104 +005985/063150, loss: 0.248758, avg_loss: 0.404941 +005990/063150, loss: 0.183379, avg_loss: 0.404778 +005995/063150, loss: 0.332075, avg_loss: 0.404645 +006000/063150, loss: 0.190178, avg_loss: 0.404505 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 6000/63150: {'accuracy': 0.8577981651376146} +006005/063150, loss: 0.164176, avg_loss: 0.404329 +006010/063150, loss: 0.250037, avg_loss: 0.404126 +006015/063150, loss: 0.147910, avg_loss: 0.404039 +006020/063150, loss: 0.293327, avg_loss: 0.403912 +006025/063150, loss: 0.296977, avg_loss: 0.403782 +006030/063150, loss: 0.122955, avg_loss: 0.403677 +006035/063150, loss: 0.173654, avg_loss: 0.403527 +006040/063150, loss: 0.306564, avg_loss: 0.403507 +006045/063150, loss: 0.277148, avg_loss: 0.403381 +006050/063150, loss: 0.324062, avg_loss: 0.403264 +006055/063150, loss: 0.247942, avg_loss: 0.403152 +006060/063150, loss: 0.338937, avg_loss: 0.403021 +006065/063150, loss: 0.197391, avg_loss: 0.402836 +006070/063150, loss: 0.294998, avg_loss: 0.402756 +006075/063150, loss: 0.267503, avg_loss: 0.402596 +006080/063150, loss: 0.218786, avg_loss: 0.402422 +006085/063150, loss: 0.365590, avg_loss: 0.402392 +006090/063150, loss: 0.310873, avg_loss: 0.402275 +006095/063150, loss: 0.220314, avg_loss: 0.402125 +006100/063150, loss: 0.257281, avg_loss: 0.401961 +006105/063150, loss: 0.306239, avg_loss: 0.401818 +006110/063150, loss: 0.172037, avg_loss: 0.401694 +006115/063150, loss: 0.128923, avg_loss: 0.401562 +006120/063150, loss: 0.309872, avg_loss: 0.401441 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 6120/63150: {'accuracy': 0.8440366972477065} +006125/063150, loss: 0.161966, avg_loss: 0.401201 +006130/063150, loss: 0.205157, avg_loss: 0.401070 +006135/063150, loss: 0.314302, avg_loss: 0.400951 +006140/063150, loss: 0.435671, avg_loss: 0.400825 +006145/063150, loss: 0.269398, avg_loss: 0.400691 +006150/063150, loss: 0.577739, avg_loss: 0.400623 +006155/063150, loss: 0.200470, avg_loss: 0.400501 +006160/063150, loss: 0.365542, avg_loss: 0.400437 +006165/063150, loss: 0.475577, avg_loss: 0.400395 +006170/063150, loss: 0.176630, avg_loss: 0.400208 +006175/063150, loss: 0.274264, avg_loss: 0.400035 +006180/063150, loss: 0.244795, avg_loss: 0.399907 +006185/063150, loss: 0.361576, avg_loss: 0.399846 +006190/063150, loss: 0.191777, avg_loss: 0.399653 +006195/063150, loss: 0.688448, avg_loss: 0.399682 +006200/063150, loss: 0.361912, avg_loss: 0.399552 +006205/063150, loss: 0.314092, avg_loss: 0.399485 +006210/063150, loss: 0.307010, avg_loss: 0.399336 +006215/063150, loss: 0.127505, avg_loss: 0.399155 +006220/063150, loss: 0.185172, avg_loss: 0.399012 +006225/063150, loss: 0.328700, avg_loss: 0.398899 +006230/063150, loss: 0.240604, avg_loss: 0.398797 +006235/063150, loss: 0.333758, avg_loss: 0.398680 +006240/063150, loss: 0.228076, avg_loss: 0.398525 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 2, step 6240/63150: {'accuracy': 0.8520642201834863} +006245/063150, loss: 0.162701, avg_loss: 0.398416 +006250/063150, loss: 0.210361, avg_loss: 0.398263 +006255/063150, loss: 0.208135, avg_loss: 0.398196 +006260/063150, loss: 0.085145, avg_loss: 0.398034 +006265/063150, loss: 0.188135, avg_loss: 0.397893 +006270/063150, loss: 0.495432, avg_loss: 0.397817 +006275/063150, loss: 0.326975, avg_loss: 0.397740 +006280/063150, loss: 0.179111, avg_loss: 0.397617 +006285/063150, loss: 0.246767, avg_loss: 0.397447 +006290/063150, loss: 0.171531, avg_loss: 0.397311 +006295/063150, loss: 0.418926, avg_loss: 0.397200 +006300/063150, loss: 0.260764, avg_loss: 0.397097 +006305/063150, loss: 0.243057, avg_loss: 0.396923 +006310/063150, loss: 0.201061, avg_loss: 0.396799 +006315/063150, loss: 0.118235, avg_loss: 0.396668 +006320/063150, loss: 0.056191, avg_loss: 0.396527 +006325/063150, loss: 0.221699, avg_loss: 0.396330 +006330/063150, loss: 0.318177, avg_loss: 0.396179 +006335/063150, loss: 0.375150, avg_loss: 0.396131 +006340/063150, loss: 0.267562, avg_loss: 0.396028 +006345/063150, loss: 0.237121, avg_loss: 0.395841 +006350/063150, loss: 0.216224, avg_loss: 0.395679 +006355/063150, loss: 0.214655, avg_loss: 0.395543 +006360/063150, loss: 0.077958, avg_loss: 0.395376 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 6360/63150: {'accuracy': 0.8600917431192661} +006365/063150, loss: 0.326005, avg_loss: 0.395259 +006370/063150, loss: 0.341760, avg_loss: 0.395103 +006375/063150, loss: 0.220842, avg_loss: 0.394972 +006380/063150, loss: 0.145620, avg_loss: 0.394820 +006385/063150, loss: 0.249074, avg_loss: 0.394666 +006390/063150, loss: 0.275174, avg_loss: 0.394557 +006395/063150, loss: 0.315834, avg_loss: 0.394397 +006400/063150, loss: 0.279352, avg_loss: 0.394320 +006405/063150, loss: 0.128621, avg_loss: 0.394134 +006410/063150, loss: 0.124633, avg_loss: 0.394017 +006415/063150, loss: 0.106450, avg_loss: 0.393873 +006420/063150, loss: 0.171203, avg_loss: 0.393712 +006425/063150, loss: 0.282090, avg_loss: 0.393565 +006430/063150, loss: 0.530319, avg_loss: 0.393476 +006435/063150, loss: 0.118593, avg_loss: 0.393310 +006440/063150, loss: 0.232819, avg_loss: 0.393136 +006445/063150, loss: 0.038217, avg_loss: 0.393001 +006450/063150, loss: 0.043214, avg_loss: 0.392823 +006455/063150, loss: 0.226650, avg_loss: 0.392666 +006460/063150, loss: 0.179457, avg_loss: 0.392524 +006465/063150, loss: 0.193297, avg_loss: 0.392434 +006470/063150, loss: 0.154268, avg_loss: 0.392331 +006475/063150, loss: 0.241645, avg_loss: 0.392187 +006480/063150, loss: 0.245890, avg_loss: 0.392046 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 6480/63150: {'accuracy': 0.8600917431192661} +006485/063150, loss: 0.201324, avg_loss: 0.391934 +006490/063150, loss: 0.074267, avg_loss: 0.391731 +006495/063150, loss: 0.175214, avg_loss: 0.391588 +006500/063150, loss: 0.275347, avg_loss: 0.391462 +006505/063150, loss: 0.226976, avg_loss: 0.391336 +006510/063150, loss: 0.198437, avg_loss: 0.391155 +006515/063150, loss: 0.171466, avg_loss: 0.391018 +006520/063150, loss: 0.086039, avg_loss: 0.390852 +006525/063150, loss: 0.162816, avg_loss: 0.390692 +006530/063150, loss: 0.104518, avg_loss: 0.390587 +006535/063150, loss: 0.145672, avg_loss: 0.390452 +006540/063150, loss: 0.284854, avg_loss: 0.390359 +006545/063150, loss: 0.146214, avg_loss: 0.390216 +006550/063150, loss: 0.152664, avg_loss: 0.390071 +006555/063150, loss: 0.324599, avg_loss: 0.389960 +006560/063150, loss: 0.080889, avg_loss: 0.389772 +006565/063150, loss: 0.106667, avg_loss: 0.389588 +006570/063150, loss: 0.109226, avg_loss: 0.389452 +006575/063150, loss: 0.138286, avg_loss: 0.389298 +006580/063150, loss: 0.213736, avg_loss: 0.389130 +006585/063150, loss: 0.161279, avg_loss: 0.389022 +006590/063150, loss: 0.142323, avg_loss: 0.388865 +006595/063150, loss: 0.178648, avg_loss: 0.388785 +006600/063150, loss: 0.120253, avg_loss: 0.388600 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 6600/63150: {'accuracy': 0.8474770642201835} +006605/063150, loss: 0.169081, avg_loss: 0.388442 +006610/063150, loss: 0.187862, avg_loss: 0.388272 +006615/063150, loss: 0.353255, avg_loss: 0.388141 +006620/063150, loss: 0.169815, avg_loss: 0.387964 +006625/063150, loss: 0.212186, avg_loss: 0.387770 +006630/063150, loss: 0.193822, avg_loss: 0.387653 +006635/063150, loss: 0.064196, avg_loss: 0.387461 +006640/063150, loss: 0.372069, avg_loss: 0.387341 +006645/063150, loss: 0.165802, avg_loss: 0.387227 +006650/063150, loss: 0.066028, avg_loss: 0.387041 +006655/063150, loss: 0.161452, avg_loss: 0.386947 +006660/063150, loss: 0.046122, avg_loss: 0.386777 +006665/063150, loss: 0.153000, avg_loss: 0.386616 +006670/063150, loss: 0.084513, avg_loss: 0.386477 +006675/063150, loss: 0.133759, avg_loss: 0.386326 +006680/063150, loss: 0.136979, avg_loss: 0.386149 +006685/063150, loss: 0.066092, avg_loss: 0.385967 +006690/063150, loss: 0.225653, avg_loss: 0.385854 +006695/063150, loss: 0.220390, avg_loss: 0.385727 +006700/063150, loss: 0.300616, avg_loss: 0.385581 +006705/063150, loss: 0.129006, avg_loss: 0.385406 +006710/063150, loss: 0.161372, avg_loss: 0.385229 +006715/063150, loss: 0.258934, avg_loss: 0.385087 +006720/063150, loss: 0.244111, avg_loss: 0.384945 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 6720/63150: {'accuracy': 0.856651376146789} +006725/063150, loss: 0.036864, avg_loss: 0.384814 +006730/063150, loss: 0.443971, avg_loss: 0.384680 +006735/063150, loss: 0.057744, avg_loss: 0.384503 +006740/063150, loss: 0.313349, avg_loss: 0.384390 +006745/063150, loss: 0.265652, avg_loss: 0.384227 +006750/063150, loss: 0.276788, avg_loss: 0.384119 +006755/063150, loss: 0.108872, avg_loss: 0.383934 +006760/063150, loss: 0.057640, avg_loss: 0.383787 +006765/063150, loss: 0.105560, avg_loss: 0.383644 +006770/063150, loss: 0.093812, avg_loss: 0.383464 +006775/063150, loss: 0.384844, avg_loss: 0.383349 +006780/063150, loss: 0.149802, avg_loss: 0.383238 +006785/063150, loss: 0.075268, avg_loss: 0.383104 +006790/063150, loss: 0.133740, avg_loss: 0.382931 +006795/063150, loss: 0.231310, avg_loss: 0.382738 +006800/063150, loss: 0.131188, avg_loss: 0.382557 +006805/063150, loss: 0.049656, avg_loss: 0.382455 +006810/063150, loss: 0.219370, avg_loss: 0.382292 +006815/063150, loss: 0.140352, avg_loss: 0.382165 +006820/063150, loss: 0.278672, avg_loss: 0.382028 +006825/063150, loss: 0.243699, avg_loss: 0.381963 +006830/063150, loss: 0.121104, avg_loss: 0.381795 +006835/063150, loss: 0.248670, avg_loss: 0.381691 +006840/063150, loss: 0.170353, avg_loss: 0.381565 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 6840/63150: {'accuracy': 0.856651376146789} +006845/063150, loss: 0.065253, avg_loss: 0.381404 +006850/063150, loss: 0.199071, avg_loss: 0.381298 +006855/063150, loss: 0.311507, avg_loss: 0.381169 +006860/063150, loss: 0.148344, avg_loss: 0.381056 +006865/063150, loss: 0.229173, avg_loss: 0.380947 +006870/063150, loss: 0.164160, avg_loss: 0.380796 +006875/063150, loss: 0.204610, avg_loss: 0.380681 +006880/063150, loss: 0.256000, avg_loss: 0.380574 +006885/063150, loss: 0.200000, avg_loss: 0.380402 +006890/063150, loss: 0.464455, avg_loss: 0.380340 +006895/063150, loss: 0.164461, avg_loss: 0.380247 +006900/063150, loss: 0.221622, avg_loss: 0.380123 +006905/063150, loss: 0.136482, avg_loss: 0.379972 +006910/063150, loss: 0.122820, avg_loss: 0.379833 +006915/063150, loss: 0.266474, avg_loss: 0.379692 +006920/063150, loss: 0.173864, avg_loss: 0.379587 +006925/063150, loss: 0.168391, avg_loss: 0.379472 +006930/063150, loss: 0.115412, avg_loss: 0.379343 +006935/063150, loss: 0.157241, avg_loss: 0.379223 +006940/063150, loss: 0.296901, avg_loss: 0.379160 +006945/063150, loss: 0.209306, avg_loss: 0.379049 +006950/063150, loss: 0.244068, avg_loss: 0.378916 +006955/063150, loss: 0.093027, avg_loss: 0.378778 +006960/063150, loss: 0.159567, avg_loss: 0.378629 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 6960/63150: {'accuracy': 0.8589449541284404} +006965/063150, loss: 0.089753, avg_loss: 0.378511 +006970/063150, loss: 0.171301, avg_loss: 0.378378 +006975/063150, loss: 0.186785, avg_loss: 0.378223 +006980/063150, loss: 0.052185, avg_loss: 0.378052 +006985/063150, loss: 0.260831, avg_loss: 0.377963 +006990/063150, loss: 0.182956, avg_loss: 0.377833 +006995/063150, loss: 0.101096, avg_loss: 0.377710 +007000/063150, loss: 0.424678, avg_loss: 0.377606 +007005/063150, loss: 0.094043, avg_loss: 0.377463 +007010/063150, loss: 0.195814, avg_loss: 0.377289 +007015/063150, loss: 0.220344, avg_loss: 0.377167 +007020/063150, loss: 0.309708, avg_loss: 0.377044 +007025/063150, loss: 0.289091, avg_loss: 0.376925 +007030/063150, loss: 0.161398, avg_loss: 0.376872 +007035/063150, loss: 0.358189, avg_loss: 0.376792 +007040/063150, loss: 0.349344, avg_loss: 0.376711 +007045/063150, loss: 0.151659, avg_loss: 0.376567 +007050/063150, loss: 0.096523, avg_loss: 0.376454 +007055/063150, loss: 0.386761, avg_loss: 0.376361 +007060/063150, loss: 0.141258, avg_loss: 0.376228 +007065/063150, loss: 0.065083, avg_loss: 0.376047 +007070/063150, loss: 0.106668, avg_loss: 0.375880 +007075/063150, loss: 0.325593, avg_loss: 0.375737 +007080/063150, loss: 0.196084, avg_loss: 0.375609 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 7080/63150: {'accuracy': 0.8497706422018348} +007085/063150, loss: 0.214628, avg_loss: 0.375500 +007090/063150, loss: 0.084896, avg_loss: 0.375335 +007095/063150, loss: 0.245339, avg_loss: 0.375244 +007100/063150, loss: 0.306698, avg_loss: 0.375148 +007105/063150, loss: 0.210685, avg_loss: 0.375031 +007110/063150, loss: 0.070772, avg_loss: 0.374859 +007115/063150, loss: 0.326716, avg_loss: 0.374715 +007120/063150, loss: 0.117397, avg_loss: 0.374620 +007125/063150, loss: 0.152656, avg_loss: 0.374572 +007130/063150, loss: 0.260402, avg_loss: 0.374427 +007135/063150, loss: 0.292273, avg_loss: 0.374326 +007140/063150, loss: 0.162608, avg_loss: 0.374213 +007145/063150, loss: 0.275778, avg_loss: 0.374116 +007150/063150, loss: 0.223852, avg_loss: 0.373976 +007155/063150, loss: 0.100900, avg_loss: 0.373863 +007160/063150, loss: 0.049277, avg_loss: 0.373735 +007165/063150, loss: 0.141038, avg_loss: 0.373606 +007170/063150, loss: 0.448460, avg_loss: 0.373503 +007175/063150, loss: 0.087170, avg_loss: 0.373345 +007180/063150, loss: 0.164786, avg_loss: 0.373226 +007185/063150, loss: 0.139608, avg_loss: 0.373087 +007190/063150, loss: 0.188226, avg_loss: 0.372999 +007195/063150, loss: 0.159784, avg_loss: 0.372856 +007200/063150, loss: 0.400001, avg_loss: 0.372734 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 7200/63150: {'accuracy': 0.8623853211009175} +007205/063150, loss: 0.205842, avg_loss: 0.372661 +007210/063150, loss: 0.371919, avg_loss: 0.372530 +007215/063150, loss: 0.170688, avg_loss: 0.372412 +007220/063150, loss: 0.118036, avg_loss: 0.372321 +007225/063150, loss: 0.167954, avg_loss: 0.372173 +007230/063150, loss: 0.239164, avg_loss: 0.372046 +007235/063150, loss: 0.053277, avg_loss: 0.371897 +007240/063150, loss: 0.304795, avg_loss: 0.371779 +007245/063150, loss: 0.115691, avg_loss: 0.371625 +007250/063150, loss: 0.077418, avg_loss: 0.371502 +007255/063150, loss: 0.289505, avg_loss: 0.371377 +007260/063150, loss: 0.593990, avg_loss: 0.371345 +007265/063150, loss: 0.067507, avg_loss: 0.371207 +007270/063150, loss: 0.400890, avg_loss: 0.371084 +007275/063150, loss: 0.374084, avg_loss: 0.371018 +007280/063150, loss: 0.174338, avg_loss: 0.370893 +007285/063150, loss: 0.221011, avg_loss: 0.370754 +007290/063150, loss: 0.219682, avg_loss: 0.370742 +007295/063150, loss: 0.246324, avg_loss: 0.370629 +007300/063150, loss: 0.142836, avg_loss: 0.370506 +007305/063150, loss: 0.145212, avg_loss: 0.370399 +007310/063150, loss: 0.207463, avg_loss: 0.370305 +007315/063150, loss: 0.081330, avg_loss: 0.370185 +007320/063150, loss: 0.143124, avg_loss: 0.370080 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 7320/63150: {'accuracy': 0.8681192660550459} +007325/063150, loss: 0.159430, avg_loss: 0.369978 +007330/063150, loss: 0.366371, avg_loss: 0.369850 +007335/063150, loss: 0.266461, avg_loss: 0.369731 +007340/063150, loss: 0.159506, avg_loss: 0.369570 +007345/063150, loss: 0.208134, avg_loss: 0.369405 +007350/063150, loss: 0.217419, avg_loss: 0.369265 +007355/063150, loss: 0.166781, avg_loss: 0.369124 +007360/063150, loss: 0.214333, avg_loss: 0.368970 +007365/063150, loss: 0.150210, avg_loss: 0.368803 +007370/063150, loss: 0.063424, avg_loss: 0.368624 +007375/063150, loss: 0.135044, avg_loss: 0.368499 +007380/063150, loss: 0.279078, avg_loss: 0.368433 +007385/063150, loss: 0.304123, avg_loss: 0.368338 +007390/063150, loss: 0.363484, avg_loss: 0.368269 +007395/063150, loss: 0.149212, avg_loss: 0.368144 +007400/063150, loss: 0.347053, avg_loss: 0.368109 +007405/063150, loss: 0.185472, avg_loss: 0.368013 +007410/063150, loss: 0.138774, avg_loss: 0.367915 +007415/063150, loss: 0.265823, avg_loss: 0.367792 +007420/063150, loss: 0.139445, avg_loss: 0.367688 +007425/063150, loss: 0.222034, avg_loss: 0.367562 +007430/063150, loss: 0.162439, avg_loss: 0.367416 +007435/063150, loss: 0.156976, avg_loss: 0.367278 +007440/063150, loss: 0.070721, avg_loss: 0.367158 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 7440/63150: {'accuracy': 0.8589449541284404} +007445/063150, loss: 0.291048, avg_loss: 0.367024 +007450/063150, loss: 0.282264, avg_loss: 0.366928 +007455/063150, loss: 0.101688, avg_loss: 0.366750 +007460/063150, loss: 0.325055, avg_loss: 0.366644 +007465/063150, loss: 0.094004, avg_loss: 0.366516 +007470/063150, loss: 0.064246, avg_loss: 0.366395 +007475/063150, loss: 0.180423, avg_loss: 0.366292 +007480/063150, loss: 0.252848, avg_loss: 0.366215 +007485/063150, loss: 0.302171, avg_loss: 0.366137 +007490/063150, loss: 0.181653, avg_loss: 0.366021 +007495/063150, loss: 0.250473, avg_loss: 0.365896 +007500/063150, loss: 0.180313, avg_loss: 0.365773 +007505/063150, loss: 0.153185, avg_loss: 0.365665 +007510/063150, loss: 0.061817, avg_loss: 0.365510 +007515/063150, loss: 0.066394, avg_loss: 0.365351 +007520/063150, loss: 0.127214, avg_loss: 0.365183 +007525/063150, loss: 0.247178, avg_loss: 0.365057 +007530/063150, loss: 0.190868, avg_loss: 0.364947 +007535/063150, loss: 0.098464, avg_loss: 0.364943 +007540/063150, loss: 0.101409, avg_loss: 0.364868 +007545/063150, loss: 0.171416, avg_loss: 0.364827 +007550/063150, loss: 0.122503, avg_loss: 0.364686 +007555/063150, loss: 0.093141, avg_loss: 0.364563 +007560/063150, loss: 0.233811, avg_loss: 0.364472 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 7560/63150: {'accuracy': 0.8486238532110092} +007565/063150, loss: 0.387827, avg_loss: 0.364357 +007570/063150, loss: 0.340059, avg_loss: 0.364275 +007575/063150, loss: 0.172777, avg_loss: 0.364146 +007580/063150, loss: 0.249109, avg_loss: 0.364051 +007585/063150, loss: 0.066603, avg_loss: 0.363914 +007590/063150, loss: 0.091821, avg_loss: 0.363800 +007595/063150, loss: 0.472011, avg_loss: 0.363687 +007600/063150, loss: 0.096104, avg_loss: 0.363552 +007605/063150, loss: 0.304848, avg_loss: 0.363427 +007610/063150, loss: 0.032090, avg_loss: 0.363336 +007615/063150, loss: 0.299640, avg_loss: 0.363234 +007620/063150, loss: 0.192684, avg_loss: 0.363119 +007625/063150, loss: 0.090347, avg_loss: 0.363025 +007630/063150, loss: 0.168044, avg_loss: 0.362927 +007635/063150, loss: 0.183112, avg_loss: 0.362855 +007640/063150, loss: 0.176601, avg_loss: 0.362768 +007645/063150, loss: 0.134122, avg_loss: 0.362639 +007650/063150, loss: 0.295006, avg_loss: 0.362518 +007655/063150, loss: 0.342236, avg_loss: 0.362391 +007660/063150, loss: 0.068680, avg_loss: 0.362236 +007665/063150, loss: 0.176887, avg_loss: 0.362105 +007670/063150, loss: 0.211862, avg_loss: 0.361995 +007675/063150, loss: 0.236408, avg_loss: 0.361873 +007680/063150, loss: 0.267936, avg_loss: 0.361792 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 7680/63150: {'accuracy': 0.8681192660550459} +007685/063150, loss: 0.379450, avg_loss: 0.361709 +007690/063150, loss: 0.208368, avg_loss: 0.361575 +007695/063150, loss: 0.226497, avg_loss: 0.361504 +007700/063150, loss: 0.145290, avg_loss: 0.361415 +007705/063150, loss: 0.329823, avg_loss: 0.361326 +007710/063150, loss: 0.297184, avg_loss: 0.361214 +007715/063150, loss: 0.115789, avg_loss: 0.361070 +007720/063150, loss: 0.238246, avg_loss: 0.360940 +007725/063150, loss: 0.182013, avg_loss: 0.360804 +007730/063150, loss: 0.131330, avg_loss: 0.360726 +007735/063150, loss: 0.206145, avg_loss: 0.360629 +007740/063150, loss: 0.204966, avg_loss: 0.360515 +007745/063150, loss: 0.329338, avg_loss: 0.360433 +007750/063150, loss: 0.135681, avg_loss: 0.360353 +007755/063150, loss: 0.178485, avg_loss: 0.360234 +007760/063150, loss: 0.253994, avg_loss: 0.360109 +007765/063150, loss: 0.127027, avg_loss: 0.359984 +007770/063150, loss: 0.073020, avg_loss: 0.359853 +007775/063150, loss: 0.153367, avg_loss: 0.359712 +007780/063150, loss: 0.141749, avg_loss: 0.359581 +007785/063150, loss: 0.306362, avg_loss: 0.359478 +007790/063150, loss: 0.159246, avg_loss: 0.359375 +007795/063150, loss: 0.050078, avg_loss: 0.359285 +007800/063150, loss: 0.101789, avg_loss: 0.359178 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 7800/63150: {'accuracy': 0.8635321100917431} +007805/063150, loss: 0.147650, avg_loss: 0.359076 +007810/063150, loss: 0.123085, avg_loss: 0.358959 +007815/063150, loss: 0.184683, avg_loss: 0.358826 +007820/063150, loss: 0.328215, avg_loss: 0.358728 +007825/063150, loss: 0.397313, avg_loss: 0.358629 +007830/063150, loss: 0.236270, avg_loss: 0.358483 +007835/063150, loss: 0.133265, avg_loss: 0.358416 +007840/063150, loss: 0.257372, avg_loss: 0.358342 +007845/063150, loss: 0.222322, avg_loss: 0.358252 +007850/063150, loss: 0.120965, avg_loss: 0.358161 +007855/063150, loss: 0.279963, avg_loss: 0.358045 +007860/063150, loss: 0.241000, avg_loss: 0.357964 +007865/063150, loss: 0.079851, avg_loss: 0.357837 +007870/063150, loss: 0.086943, avg_loss: 0.357680 +007875/063150, loss: 0.216156, avg_loss: 0.357582 +007880/063150, loss: 0.215117, avg_loss: 0.357500 +007885/063150, loss: 0.121829, avg_loss: 0.357354 +007890/063150, loss: 0.255474, avg_loss: 0.357230 +007895/063150, loss: 0.058267, avg_loss: 0.357102 +007900/063150, loss: 0.258265, avg_loss: 0.356981 +007905/063150, loss: 0.452999, avg_loss: 0.356941 +007910/063150, loss: 0.087743, avg_loss: 0.356853 +007915/063150, loss: 0.200449, avg_loss: 0.356731 +007920/063150, loss: 0.285426, avg_loss: 0.356625 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 7920/63150: {'accuracy': 0.8463302752293578} +007925/063150, loss: 0.171869, avg_loss: 0.356542 +007930/063150, loss: 0.108217, avg_loss: 0.356442 +007935/063150, loss: 0.183361, avg_loss: 0.356301 +007940/063150, loss: 0.265317, avg_loss: 0.356199 +007945/063150, loss: 0.290357, avg_loss: 0.356094 +007950/063150, loss: 0.365331, avg_loss: 0.356041 +007955/063150, loss: 0.257229, avg_loss: 0.355963 +007960/063150, loss: 0.144841, avg_loss: 0.355828 +007965/063150, loss: 0.315796, avg_loss: 0.355744 +007970/063150, loss: 0.263026, avg_loss: 0.355706 +007975/063150, loss: 0.215960, avg_loss: 0.355612 +007980/063150, loss: 0.192837, avg_loss: 0.355490 +007985/063150, loss: 0.241060, avg_loss: 0.355400 +007990/063150, loss: 0.288449, avg_loss: 0.355329 +007995/063150, loss: 0.121776, avg_loss: 0.355215 +008000/063150, loss: 0.276262, avg_loss: 0.355171 +008005/063150, loss: 0.250153, avg_loss: 0.355081 +008010/063150, loss: 0.215816, avg_loss: 0.354997 +008015/063150, loss: 0.131414, avg_loss: 0.354868 +008020/063150, loss: 0.038290, avg_loss: 0.354781 +008025/063150, loss: 0.213567, avg_loss: 0.354672 +008030/063150, loss: 0.156903, avg_loss: 0.354577 +008035/063150, loss: 0.171680, avg_loss: 0.354470 +008040/063150, loss: 0.119041, avg_loss: 0.354356 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 8040/63150: {'accuracy': 0.856651376146789} +008045/063150, loss: 0.200204, avg_loss: 0.354263 +008050/063150, loss: 0.223657, avg_loss: 0.354146 +008055/063150, loss: 0.074261, avg_loss: 0.353990 +008060/063150, loss: 0.207721, avg_loss: 0.353882 +008065/063150, loss: 0.094057, avg_loss: 0.353756 +008070/063150, loss: 0.218744, avg_loss: 0.353649 +008075/063150, loss: 0.450161, avg_loss: 0.353593 +008080/063150, loss: 0.168045, avg_loss: 0.353503 +008085/063150, loss: 0.160139, avg_loss: 0.353385 +008090/063150, loss: 0.216971, avg_loss: 0.353274 +008095/063150, loss: 0.185560, avg_loss: 0.353181 +008100/063150, loss: 0.137113, avg_loss: 0.353088 +008105/063150, loss: 0.361699, avg_loss: 0.353014 +008110/063150, loss: 0.320956, avg_loss: 0.352922 +008115/063150, loss: 0.184971, avg_loss: 0.352812 +008120/063150, loss: 0.158169, avg_loss: 0.352692 +008125/063150, loss: 0.102878, avg_loss: 0.352590 +008130/063150, loss: 0.232930, avg_loss: 0.352512 +008135/063150, loss: 0.099422, avg_loss: 0.352398 +008140/063150, loss: 0.245228, avg_loss: 0.352328 +008145/063150, loss: 0.186783, avg_loss: 0.352220 +008150/063150, loss: 0.140948, avg_loss: 0.352148 +008155/063150, loss: 0.074818, avg_loss: 0.352032 +008160/063150, loss: 0.158935, avg_loss: 0.351906 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 8160/63150: {'accuracy': 0.8623853211009175} +008165/063150, loss: 0.275411, avg_loss: 0.351809 +008170/063150, loss: 0.254610, avg_loss: 0.351745 +008175/063150, loss: 0.151243, avg_loss: 0.351631 +008180/063150, loss: 0.325123, avg_loss: 0.351526 +008185/063150, loss: 0.155033, avg_loss: 0.351390 +008190/063150, loss: 0.236445, avg_loss: 0.351255 +008195/063150, loss: 0.240897, avg_loss: 0.351177 +008200/063150, loss: 0.151847, avg_loss: 0.351050 +008205/063150, loss: 0.046601, avg_loss: 0.350900 +008210/063150, loss: 0.400382, avg_loss: 0.350804 +008215/063150, loss: 0.203522, avg_loss: 0.350694 +008220/063150, loss: 0.119064, avg_loss: 0.350599 +008225/063150, loss: 0.146290, avg_loss: 0.350478 +008230/063150, loss: 0.119271, avg_loss: 0.350347 +008235/063150, loss: 0.184588, avg_loss: 0.350238 +008240/063150, loss: 0.120894, avg_loss: 0.350164 +008245/063150, loss: 0.367861, avg_loss: 0.350074 +008250/063150, loss: 0.212892, avg_loss: 0.349942 +008255/063150, loss: 0.300112, avg_loss: 0.349903 +008260/063150, loss: 0.178464, avg_loss: 0.349785 +008265/063150, loss: 0.083778, avg_loss: 0.349713 +008270/063150, loss: 0.279997, avg_loss: 0.349610 +008275/063150, loss: 0.151504, avg_loss: 0.349524 +008280/063150, loss: 0.347327, avg_loss: 0.349461 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 8280/63150: {'accuracy': 0.8360091743119266} +008285/063150, loss: 0.035563, avg_loss: 0.349342 +008290/063150, loss: 0.283749, avg_loss: 0.349258 +008295/063150, loss: 0.221612, avg_loss: 0.349166 +008300/063150, loss: 0.115435, avg_loss: 0.349053 +008305/063150, loss: 0.087390, avg_loss: 0.348955 +008310/063150, loss: 0.299136, avg_loss: 0.348848 +008315/063150, loss: 0.187080, avg_loss: 0.348738 +008320/063150, loss: 0.327208, avg_loss: 0.348686 +008325/063150, loss: 0.110752, avg_loss: 0.348574 +008330/063150, loss: 0.125973, avg_loss: 0.348480 +008335/063150, loss: 0.316209, avg_loss: 0.348443 +008340/063150, loss: 0.128039, avg_loss: 0.348354 +008345/063150, loss: 0.185242, avg_loss: 0.348262 +008350/063150, loss: 0.242111, avg_loss: 0.348189 +008355/063150, loss: 0.132835, avg_loss: 0.348116 +008360/063150, loss: 0.245911, avg_loss: 0.348020 +008365/063150, loss: 0.167025, avg_loss: 0.347888 +008370/063150, loss: 0.152295, avg_loss: 0.347808 +008375/063150, loss: 0.176811, avg_loss: 0.347728 +008380/063150, loss: 0.142865, avg_loss: 0.347633 +008385/063150, loss: 0.155957, avg_loss: 0.347538 +008390/063150, loss: 0.133290, avg_loss: 0.347445 +008395/063150, loss: 0.183931, avg_loss: 0.347336 +008400/063150, loss: 0.114982, avg_loss: 0.347229 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 3, step 8400/63150: {'accuracy': 0.8577981651376146} +008405/063150, loss: 0.147639, avg_loss: 0.347163 +008410/063150, loss: 0.086868, avg_loss: 0.347044 +008415/063150, loss: 0.283607, avg_loss: 0.346969 +008420/063150, loss: 0.040499, avg_loss: 0.346836 +008425/063150, loss: 0.173972, avg_loss: 0.346727 +008430/063150, loss: 0.290508, avg_loss: 0.346612 +008435/063150, loss: 0.206648, avg_loss: 0.346473 +008440/063150, loss: 0.042936, avg_loss: 0.346341 +008445/063150, loss: 0.017667, avg_loss: 0.346210 +008450/063150, loss: 0.353954, avg_loss: 0.346095 +008455/063150, loss: 0.214215, avg_loss: 0.345992 +008460/063150, loss: 0.294760, avg_loss: 0.345875 +008465/063150, loss: 0.126303, avg_loss: 0.345781 +008470/063150, loss: 0.113326, avg_loss: 0.345660 +008475/063150, loss: 0.210505, avg_loss: 0.345569 +008480/063150, loss: 0.168250, avg_loss: 0.345483 +008485/063150, loss: 0.239621, avg_loss: 0.345375 +008490/063150, loss: 0.213532, avg_loss: 0.345257 +008495/063150, loss: 0.106317, avg_loss: 0.345151 +008500/063150, loss: 0.060797, avg_loss: 0.345046 +008505/063150, loss: 0.087356, avg_loss: 0.344930 +008510/063150, loss: 0.160134, avg_loss: 0.344825 +008515/063150, loss: 0.115659, avg_loss: 0.344714 +008520/063150, loss: 0.108328, avg_loss: 0.344600 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 8520/63150: {'accuracy': 0.8577981651376146} +008525/063150, loss: 0.050164, avg_loss: 0.344474 +008530/063150, loss: 0.187589, avg_loss: 0.344358 +008535/063150, loss: 0.250972, avg_loss: 0.344280 +008540/063150, loss: 0.084973, avg_loss: 0.344166 +008545/063150, loss: 0.051566, avg_loss: 0.344045 +008550/063150, loss: 0.089198, avg_loss: 0.343942 +008555/063150, loss: 0.131200, avg_loss: 0.343840 +008560/063150, loss: 0.234564, avg_loss: 0.343739 +008565/063150, loss: 0.188884, avg_loss: 0.343613 +008570/063150, loss: 0.275349, avg_loss: 0.343504 +008575/063150, loss: 0.050353, avg_loss: 0.343384 +008580/063150, loss: 0.110323, avg_loss: 0.343284 +008585/063150, loss: 0.129442, avg_loss: 0.343166 +008590/063150, loss: 0.146756, avg_loss: 0.343053 +008595/063150, loss: 0.186370, avg_loss: 0.342992 +008600/063150, loss: 0.246563, avg_loss: 0.342882 +008605/063150, loss: 0.062764, avg_loss: 0.342748 +008610/063150, loss: 0.140477, avg_loss: 0.342607 +008615/063150, loss: 0.111829, avg_loss: 0.342470 +008620/063150, loss: 0.467207, avg_loss: 0.342416 +008625/063150, loss: 0.071900, avg_loss: 0.342311 +008630/063150, loss: 0.013013, avg_loss: 0.342184 +008635/063150, loss: 0.076351, avg_loss: 0.342077 +008640/063150, loss: 0.142043, avg_loss: 0.342019 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 8640/63150: {'accuracy': 0.8635321100917431} +008645/063150, loss: 0.172252, avg_loss: 0.341914 +008650/063150, loss: 0.129360, avg_loss: 0.341811 +008655/063150, loss: 0.192541, avg_loss: 0.341758 +008660/063150, loss: 0.098014, avg_loss: 0.341627 +008665/063150, loss: 0.251760, avg_loss: 0.341511 +008670/063150, loss: 0.070898, avg_loss: 0.341368 +008675/063150, loss: 0.107639, avg_loss: 0.341274 +008680/063150, loss: 0.283636, avg_loss: 0.341217 +008685/063150, loss: 0.185459, avg_loss: 0.341122 +008690/063150, loss: 0.280005, avg_loss: 0.341037 +008695/063150, loss: 0.153768, avg_loss: 0.340936 +008700/063150, loss: 0.093415, avg_loss: 0.340851 +008705/063150, loss: 0.155181, avg_loss: 0.340751 +008710/063150, loss: 0.302868, avg_loss: 0.340637 +008715/063150, loss: 0.105698, avg_loss: 0.340513 +008720/063150, loss: 0.065154, avg_loss: 0.340395 +008725/063150, loss: 0.115314, avg_loss: 0.340282 +008730/063150, loss: 0.182537, avg_loss: 0.340183 +008735/063150, loss: 0.157342, avg_loss: 0.340132 +008740/063150, loss: 0.366330, avg_loss: 0.340044 +008745/063150, loss: 0.275384, avg_loss: 0.339933 +008750/063150, loss: 0.207979, avg_loss: 0.339871 +008755/063150, loss: 0.096898, avg_loss: 0.339764 +008760/063150, loss: 0.088852, avg_loss: 0.339648 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 8760/63150: {'accuracy': 0.8646788990825688} +008765/063150, loss: 0.039967, avg_loss: 0.339504 +008770/063150, loss: 0.033317, avg_loss: 0.339377 +008775/063150, loss: 0.151095, avg_loss: 0.339251 +008780/063150, loss: 0.043551, avg_loss: 0.339143 +008785/063150, loss: 0.065352, avg_loss: 0.339048 +008790/063150, loss: 0.393764, avg_loss: 0.338969 +008795/063150, loss: 0.088041, avg_loss: 0.338866 +008800/063150, loss: 0.243336, avg_loss: 0.338827 +008805/063150, loss: 0.313326, avg_loss: 0.338732 +008810/063150, loss: 0.199689, avg_loss: 0.338640 +008815/063150, loss: 0.063370, avg_loss: 0.338503 +008820/063150, loss: 0.406386, avg_loss: 0.338435 +008825/063150, loss: 0.054040, avg_loss: 0.338328 +008830/063150, loss: 0.132369, avg_loss: 0.338201 +008835/063150, loss: 0.104322, avg_loss: 0.338061 +008840/063150, loss: 0.183163, avg_loss: 0.337956 +008845/063150, loss: 0.200131, avg_loss: 0.337822 +008850/063150, loss: 0.288721, avg_loss: 0.337746 +008855/063150, loss: 0.098022, avg_loss: 0.337621 +008860/063150, loss: 0.091157, avg_loss: 0.337541 +008865/063150, loss: 0.196395, avg_loss: 0.337457 +008870/063150, loss: 0.277938, avg_loss: 0.337379 +008875/063150, loss: 0.175176, avg_loss: 0.337301 +008880/063150, loss: 0.063588, avg_loss: 0.337179 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 8880/63150: {'accuracy': 0.841743119266055} +008885/063150, loss: 0.231603, avg_loss: 0.337113 +008890/063150, loss: 0.131049, avg_loss: 0.337016 +008895/063150, loss: 0.226012, avg_loss: 0.336904 +008900/063150, loss: 0.282586, avg_loss: 0.336808 +008905/063150, loss: 0.044403, avg_loss: 0.336709 +008910/063150, loss: 0.086191, avg_loss: 0.336598 +008915/063150, loss: 0.114874, avg_loss: 0.336480 +008920/063150, loss: 0.143575, avg_loss: 0.336356 +008925/063150, loss: 0.186634, avg_loss: 0.336223 +008930/063150, loss: 0.210297, avg_loss: 0.336126 +008935/063150, loss: 0.136710, avg_loss: 0.335991 +008940/063150, loss: 0.086504, avg_loss: 0.335873 +008945/063150, loss: 0.116755, avg_loss: 0.335793 +008950/063150, loss: 0.096619, avg_loss: 0.335689 +008955/063150, loss: 0.318345, avg_loss: 0.335614 +008960/063150, loss: 0.179386, avg_loss: 0.335521 +008965/063150, loss: 0.184041, avg_loss: 0.335417 +008970/063150, loss: 0.111617, avg_loss: 0.335339 +008975/063150, loss: 0.229850, avg_loss: 0.335223 +008980/063150, loss: 0.045274, avg_loss: 0.335106 +008985/063150, loss: 0.053271, avg_loss: 0.334977 +008990/063150, loss: 0.097683, avg_loss: 0.334923 +008995/063150, loss: 0.057590, avg_loss: 0.334814 +009000/063150, loss: 0.051548, avg_loss: 0.334668 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9000/63150: {'accuracy': 0.8509174311926605} +009005/063150, loss: 0.072405, avg_loss: 0.334557 +009010/063150, loss: 0.256491, avg_loss: 0.334438 +009015/063150, loss: 0.127623, avg_loss: 0.334343 +009020/063150, loss: 0.112166, avg_loss: 0.334256 +009025/063150, loss: 0.388839, avg_loss: 0.334250 +009030/063150, loss: 0.152723, avg_loss: 0.334124 +009035/063150, loss: 0.086404, avg_loss: 0.333987 +009040/063150, loss: 0.094675, avg_loss: 0.333876 +009045/063150, loss: 0.291428, avg_loss: 0.333776 +009050/063150, loss: 0.127347, avg_loss: 0.333691 +009055/063150, loss: 0.146506, avg_loss: 0.333582 +009060/063150, loss: 0.288586, avg_loss: 0.333477 +009065/063150, loss: 0.049420, avg_loss: 0.333370 +009070/063150, loss: 0.137911, avg_loss: 0.333326 +009075/063150, loss: 0.235950, avg_loss: 0.333232 +009080/063150, loss: 0.218558, avg_loss: 0.333156 +009085/063150, loss: 0.051511, avg_loss: 0.333040 +009090/063150, loss: 0.150964, avg_loss: 0.332983 +009095/063150, loss: 0.244306, avg_loss: 0.332926 +009100/063150, loss: 0.182292, avg_loss: 0.332813 +009105/063150, loss: 0.113472, avg_loss: 0.332685 +009110/063150, loss: 0.116689, avg_loss: 0.332550 +009115/063150, loss: 0.086916, avg_loss: 0.332421 +009120/063150, loss: 0.110995, avg_loss: 0.332337 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9120/63150: {'accuracy': 0.8646788990825688} +009125/063150, loss: 0.115181, avg_loss: 0.332256 +009130/063150, loss: 0.081418, avg_loss: 0.332164 +009135/063150, loss: 0.142157, avg_loss: 0.332047 +009140/063150, loss: 0.122091, avg_loss: 0.331966 +009145/063150, loss: 0.127238, avg_loss: 0.331915 +009150/063150, loss: 0.218985, avg_loss: 0.331838 +009155/063150, loss: 0.210979, avg_loss: 0.331741 +009160/063150, loss: 0.114556, avg_loss: 0.331630 +009165/063150, loss: 0.332905, avg_loss: 0.331561 +009170/063150, loss: 0.162467, avg_loss: 0.331473 +009175/063150, loss: 0.244894, avg_loss: 0.331355 +009180/063150, loss: 0.301605, avg_loss: 0.331267 +009185/063150, loss: 0.064814, avg_loss: 0.331177 +009190/063150, loss: 0.126476, avg_loss: 0.331073 +009195/063150, loss: 0.114781, avg_loss: 0.331011 +009200/063150, loss: 0.195647, avg_loss: 0.330949 +009205/063150, loss: 0.069879, avg_loss: 0.330830 +009210/063150, loss: 0.127997, avg_loss: 0.330740 +009215/063150, loss: 0.140868, avg_loss: 0.330648 +009220/063150, loss: 0.127070, avg_loss: 0.330551 +009225/063150, loss: 0.134876, avg_loss: 0.330465 +009230/063150, loss: 0.133600, avg_loss: 0.330359 +009235/063150, loss: 0.086346, avg_loss: 0.330253 +009240/063150, loss: 0.154050, avg_loss: 0.330127 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9240/63150: {'accuracy': 0.856651376146789} +009245/063150, loss: 0.330200, avg_loss: 0.330061 +009250/063150, loss: 0.161128, avg_loss: 0.329989 +009255/063150, loss: 0.025313, avg_loss: 0.329887 +009260/063150, loss: 0.046100, avg_loss: 0.329787 +009265/063150, loss: 0.093881, avg_loss: 0.329675 +009270/063150, loss: 0.194773, avg_loss: 0.329576 +009275/063150, loss: 0.155308, avg_loss: 0.329494 +009280/063150, loss: 0.200168, avg_loss: 0.329395 +009285/063150, loss: 0.241702, avg_loss: 0.329300 +009290/063150, loss: 0.397542, avg_loss: 0.329242 +009295/063150, loss: 0.070779, avg_loss: 0.329115 +009300/063150, loss: 0.329174, avg_loss: 0.329066 +009305/063150, loss: 0.240871, avg_loss: 0.328965 +009310/063150, loss: 0.155293, avg_loss: 0.328869 +009315/063150, loss: 0.103249, avg_loss: 0.328774 +009320/063150, loss: 0.129871, avg_loss: 0.328678 +009325/063150, loss: 0.146412, avg_loss: 0.328575 +009330/063150, loss: 0.282041, avg_loss: 0.328496 +009335/063150, loss: 0.076443, avg_loss: 0.328371 +009340/063150, loss: 0.060455, avg_loss: 0.328275 +009345/063150, loss: 0.345695, avg_loss: 0.328202 +009350/063150, loss: 0.019107, avg_loss: 0.328093 +009355/063150, loss: 0.409689, avg_loss: 0.328034 +009360/063150, loss: 0.179435, avg_loss: 0.327944 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9360/63150: {'accuracy': 0.8497706422018348} +009365/063150, loss: 0.131612, avg_loss: 0.327855 +009370/063150, loss: 0.409107, avg_loss: 0.327829 +009375/063150, loss: 0.175976, avg_loss: 0.327767 +009380/063150, loss: 0.035823, avg_loss: 0.327645 +009385/063150, loss: 0.078898, avg_loss: 0.327523 +009390/063150, loss: 0.330065, avg_loss: 0.327442 +009395/063150, loss: 0.100009, avg_loss: 0.327328 +009400/063150, loss: 0.131736, avg_loss: 0.327240 +009405/063150, loss: 0.340834, avg_loss: 0.327158 +009410/063150, loss: 0.119877, avg_loss: 0.327054 +009415/063150, loss: 0.188490, avg_loss: 0.326971 +009420/063150, loss: 0.249844, avg_loss: 0.326881 +009425/063150, loss: 0.054020, avg_loss: 0.326750 +009430/063150, loss: 0.056281, avg_loss: 0.326632 +009435/063150, loss: 0.115801, avg_loss: 0.326518 +009440/063150, loss: 0.201743, avg_loss: 0.326434 +009445/063150, loss: 0.161759, avg_loss: 0.326388 +009450/063150, loss: 0.403007, avg_loss: 0.326298 +009455/063150, loss: 0.317176, avg_loss: 0.326218 +009460/063150, loss: 0.130350, avg_loss: 0.326098 +009465/063150, loss: 0.132910, avg_loss: 0.326026 +009470/063150, loss: 0.209864, avg_loss: 0.325948 +009475/063150, loss: 0.256295, avg_loss: 0.325856 +009480/063150, loss: 0.110435, avg_loss: 0.325784 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9480/63150: {'accuracy': 0.8600917431192661} +009485/063150, loss: 0.264439, avg_loss: 0.325694 +009490/063150, loss: 0.107025, avg_loss: 0.325623 +009495/063150, loss: 0.209733, avg_loss: 0.325540 +009500/063150, loss: 0.258116, avg_loss: 0.325466 +009505/063150, loss: 0.100382, avg_loss: 0.325367 +009510/063150, loss: 0.317764, avg_loss: 0.325284 +009515/063150, loss: 0.066474, avg_loss: 0.325208 +009520/063150, loss: 0.073232, avg_loss: 0.325119 +009525/063150, loss: 0.168614, avg_loss: 0.325040 +009530/063150, loss: 0.113010, avg_loss: 0.324960 +009535/063150, loss: 0.186931, avg_loss: 0.324860 +009540/063150, loss: 0.340502, avg_loss: 0.324810 +009545/063150, loss: 0.128846, avg_loss: 0.324693 +009550/063150, loss: 0.085882, avg_loss: 0.324606 +009555/063150, loss: 0.057050, avg_loss: 0.324522 +009560/063150, loss: 0.309533, avg_loss: 0.324449 +009565/063150, loss: 0.069215, avg_loss: 0.324317 +009570/063150, loss: 0.137346, avg_loss: 0.324231 +009575/063150, loss: 0.157991, avg_loss: 0.324128 +009580/063150, loss: 0.116291, avg_loss: 0.324084 +009585/063150, loss: 0.286094, avg_loss: 0.324007 +009590/063150, loss: 0.145891, avg_loss: 0.323914 +009595/063150, loss: 0.156629, avg_loss: 0.323814 +009600/063150, loss: 0.108405, avg_loss: 0.323716 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9600/63150: {'accuracy': 0.8577981651376146} +009605/063150, loss: 0.254737, avg_loss: 0.323630 +009610/063150, loss: 0.051464, avg_loss: 0.323535 +009615/063150, loss: 0.159589, avg_loss: 0.323416 +009620/063150, loss: 0.019581, avg_loss: 0.323297 +009625/063150, loss: 0.036223, avg_loss: 0.323190 +009630/063150, loss: 0.403626, avg_loss: 0.323095 +009635/063150, loss: 0.022795, avg_loss: 0.322986 +009640/063150, loss: 0.192581, avg_loss: 0.322888 +009645/063150, loss: 0.121482, avg_loss: 0.322799 +009650/063150, loss: 0.231140, avg_loss: 0.322690 +009655/063150, loss: 0.250417, avg_loss: 0.322617 +009660/063150, loss: 0.145530, avg_loss: 0.322524 +009665/063150, loss: 0.270132, avg_loss: 0.322447 +009670/063150, loss: 0.300896, avg_loss: 0.322375 +009675/063150, loss: 0.091067, avg_loss: 0.322294 +009680/063150, loss: 0.230012, avg_loss: 0.322266 +009685/063150, loss: 0.180229, avg_loss: 0.322192 +009690/063150, loss: 0.091564, avg_loss: 0.322111 +009695/063150, loss: 0.142479, avg_loss: 0.322034 +009700/063150, loss: 0.071125, avg_loss: 0.321986 +009705/063150, loss: 0.255455, avg_loss: 0.321945 +009710/063150, loss: 0.200662, avg_loss: 0.321898 +009715/063150, loss: 0.228818, avg_loss: 0.321867 +009720/063150, loss: 0.093207, avg_loss: 0.321789 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9720/63150: {'accuracy': 0.8635321100917431} +009725/063150, loss: 0.236168, avg_loss: 0.321718 +009730/063150, loss: 0.290908, avg_loss: 0.321659 +009735/063150, loss: 0.068633, avg_loss: 0.321553 +009740/063150, loss: 0.196139, avg_loss: 0.321484 +009745/063150, loss: 0.180268, avg_loss: 0.321377 +009750/063150, loss: 0.263648, avg_loss: 0.321287 +009755/063150, loss: 0.124460, avg_loss: 0.321172 +009760/063150, loss: 0.177976, avg_loss: 0.321106 +009765/063150, loss: 0.086089, avg_loss: 0.321022 +009770/063150, loss: 0.318503, avg_loss: 0.320934 +009775/063150, loss: 0.158861, avg_loss: 0.320868 +009780/063150, loss: 0.091184, avg_loss: 0.320775 +009785/063150, loss: 0.153121, avg_loss: 0.320711 +009790/063150, loss: 0.142132, avg_loss: 0.320627 +009795/063150, loss: 0.104426, avg_loss: 0.320552 +009800/063150, loss: 0.205529, avg_loss: 0.320519 +009805/063150, loss: 0.369492, avg_loss: 0.320441 +009810/063150, loss: 0.119451, avg_loss: 0.320372 +009815/063150, loss: 0.187308, avg_loss: 0.320299 +009820/063150, loss: 0.203859, avg_loss: 0.320223 +009825/063150, loss: 0.090713, avg_loss: 0.320126 +009830/063150, loss: 0.208456, avg_loss: 0.320024 +009835/063150, loss: 0.188099, avg_loss: 0.319928 +009840/063150, loss: 0.093535, avg_loss: 0.319827 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9840/63150: {'accuracy': 0.8635321100917431} +009845/063150, loss: 0.148858, avg_loss: 0.319732 +009850/063150, loss: 0.144453, avg_loss: 0.319622 +009855/063150, loss: 0.457362, avg_loss: 0.319537 +009860/063150, loss: 0.191123, avg_loss: 0.319456 +009865/063150, loss: 0.174435, avg_loss: 0.319388 +009870/063150, loss: 0.089505, avg_loss: 0.319283 +009875/063150, loss: 0.060738, avg_loss: 0.319209 +009880/063150, loss: 0.170642, avg_loss: 0.319123 +009885/063150, loss: 0.122117, avg_loss: 0.319043 +009890/063150, loss: 0.216683, avg_loss: 0.318962 +009895/063150, loss: 0.195684, avg_loss: 0.318884 +009900/063150, loss: 0.088859, avg_loss: 0.318782 +009905/063150, loss: 0.125137, avg_loss: 0.318671 +009910/063150, loss: 0.308539, avg_loss: 0.318625 +009915/063150, loss: 0.077769, avg_loss: 0.318536 +009920/063150, loss: 0.040005, avg_loss: 0.318465 +009925/063150, loss: 0.223298, avg_loss: 0.318394 +009930/063150, loss: 0.152579, avg_loss: 0.318328 +009935/063150, loss: 0.218028, avg_loss: 0.318286 +009940/063150, loss: 0.105295, avg_loss: 0.318181 +009945/063150, loss: 0.232413, avg_loss: 0.318109 +009950/063150, loss: 0.205662, avg_loss: 0.318001 +009955/063150, loss: 0.137507, avg_loss: 0.317907 +009960/063150, loss: 0.122517, avg_loss: 0.317822 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 9960/63150: {'accuracy': 0.8612385321100917} +009965/063150, loss: 0.192335, avg_loss: 0.317720 +009970/063150, loss: 0.090811, avg_loss: 0.317603 +009975/063150, loss: 0.228959, avg_loss: 0.317527 +009980/063150, loss: 0.056074, avg_loss: 0.317449 +009985/063150, loss: 0.045065, avg_loss: 0.317350 +009990/063150, loss: 0.094455, avg_loss: 0.317276 +009995/063150, loss: 0.301941, avg_loss: 0.317223 +010000/063150, loss: 0.189853, avg_loss: 0.317141 +010005/063150, loss: 0.178251, avg_loss: 0.317062 +010010/063150, loss: 0.093454, avg_loss: 0.316966 +010015/063150, loss: 0.127900, avg_loss: 0.316878 +010020/063150, loss: 0.175450, avg_loss: 0.316799 +010025/063150, loss: 0.043340, avg_loss: 0.316689 +010030/063150, loss: 0.264480, avg_loss: 0.316617 +010035/063150, loss: 0.348374, avg_loss: 0.316550 +010040/063150, loss: 0.096781, avg_loss: 0.316461 +010045/063150, loss: 0.116490, avg_loss: 0.316362 +010050/063150, loss: 0.302402, avg_loss: 0.316299 +010055/063150, loss: 0.070589, avg_loss: 0.316239 +010060/063150, loss: 0.075808, avg_loss: 0.316154 +010065/063150, loss: 0.093432, avg_loss: 0.316084 +010070/063150, loss: 0.253873, avg_loss: 0.315986 +010075/063150, loss: 0.135548, avg_loss: 0.315920 +010080/063150, loss: 0.072331, avg_loss: 0.315851 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 10080/63150: {'accuracy': 0.8520642201834863} +010085/063150, loss: 0.090528, avg_loss: 0.315767 +010090/063150, loss: 0.032806, avg_loss: 0.315690 +010095/063150, loss: 0.268787, avg_loss: 0.315619 +010100/063150, loss: 0.145613, avg_loss: 0.315566 +010105/063150, loss: 0.241086, avg_loss: 0.315461 +010110/063150, loss: 0.257949, avg_loss: 0.315409 +010115/063150, loss: 0.109835, avg_loss: 0.315305 +010120/063150, loss: 0.169312, avg_loss: 0.315238 +010125/063150, loss: 0.277694, avg_loss: 0.315159 +010130/063150, loss: 0.206227, avg_loss: 0.315092 +010135/063150, loss: 0.081554, avg_loss: 0.315028 +010140/063150, loss: 0.146481, avg_loss: 0.314942 +010145/063150, loss: 0.282927, avg_loss: 0.314873 +010150/063150, loss: 0.203866, avg_loss: 0.314798 +010155/063150, loss: 0.077397, avg_loss: 0.314710 +010160/063150, loss: 0.236177, avg_loss: 0.314683 +010165/063150, loss: 0.431229, avg_loss: 0.314615 +010170/063150, loss: 0.065212, avg_loss: 0.314548 +010175/063150, loss: 0.094072, avg_loss: 0.314497 +010180/063150, loss: 0.149413, avg_loss: 0.314412 +010185/063150, loss: 0.106497, avg_loss: 0.314334 +010190/063150, loss: 0.029558, avg_loss: 0.314216 +010195/063150, loss: 0.073058, avg_loss: 0.314132 +010200/063150, loss: 0.194715, avg_loss: 0.314035 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 10200/63150: {'accuracy': 0.8600917431192661} +010205/063150, loss: 0.071776, avg_loss: 0.313966 +010210/063150, loss: 0.281974, avg_loss: 0.313912 +010215/063150, loss: 0.290942, avg_loss: 0.313837 +010220/063150, loss: 0.162581, avg_loss: 0.313782 +010225/063150, loss: 0.141001, avg_loss: 0.313676 +010230/063150, loss: 0.231695, avg_loss: 0.313605 +010235/063150, loss: 0.205768, avg_loss: 0.313548 +010240/063150, loss: 0.134883, avg_loss: 0.313459 +010245/063150, loss: 0.060985, avg_loss: 0.313381 +010250/063150, loss: 0.092142, avg_loss: 0.313316 +010255/063150, loss: 0.252459, avg_loss: 0.313259 +010260/063150, loss: 0.045346, avg_loss: 0.313156 +010265/063150, loss: 0.078167, avg_loss: 0.313053 +010270/063150, loss: 0.272497, avg_loss: 0.312978 +010275/063150, loss: 0.179398, avg_loss: 0.312891 +010280/063150, loss: 0.071036, avg_loss: 0.312842 +010285/063150, loss: 0.108504, avg_loss: 0.312752 +010290/063150, loss: 0.069757, avg_loss: 0.312676 +010295/063150, loss: 0.167666, avg_loss: 0.312604 +010300/063150, loss: 0.141145, avg_loss: 0.312548 +010305/063150, loss: 0.195818, avg_loss: 0.312477 +010310/063150, loss: 0.122474, avg_loss: 0.312400 +010315/063150, loss: 0.070946, avg_loss: 0.312290 +010320/063150, loss: 0.227125, avg_loss: 0.312206 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 10320/63150: {'accuracy': 0.8635321100917431} +010325/063150, loss: 0.091803, avg_loss: 0.312140 +010330/063150, loss: 0.057969, avg_loss: 0.312076 +010335/063150, loss: 0.215628, avg_loss: 0.312017 +010340/063150, loss: 0.147804, avg_loss: 0.311936 +010345/063150, loss: 0.085695, avg_loss: 0.311829 +010350/063150, loss: 0.170673, avg_loss: 0.311785 +010355/063150, loss: 0.056970, avg_loss: 0.311697 +010360/063150, loss: 0.156192, avg_loss: 0.311615 +010365/063150, loss: 0.077619, avg_loss: 0.311527 +010370/063150, loss: 0.240395, avg_loss: 0.311469 +010375/063150, loss: 0.038833, avg_loss: 0.311392 +010380/063150, loss: 0.045784, avg_loss: 0.311316 +010385/063150, loss: 0.192790, avg_loss: 0.311232 +010390/063150, loss: 0.241581, avg_loss: 0.311162 +010395/063150, loss: 0.180038, avg_loss: 0.311084 +010400/063150, loss: 0.111988, avg_loss: 0.311021 +010405/063150, loss: 0.072416, avg_loss: 0.310947 +010410/063150, loss: 0.034544, avg_loss: 0.310875 +010415/063150, loss: 0.167089, avg_loss: 0.310778 +010420/063150, loss: 0.080621, avg_loss: 0.310700 +010425/063150, loss: 0.168110, avg_loss: 0.310607 +010430/063150, loss: 0.125608, avg_loss: 0.310519 +010435/063150, loss: 0.176862, avg_loss: 0.310456 +010440/063150, loss: 0.169321, avg_loss: 0.310394 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 4, step 10440/63150: {'accuracy': 0.8463302752293578} +010445/063150, loss: 0.307146, avg_loss: 0.310384 +010450/063150, loss: 0.174915, avg_loss: 0.310321 +010455/063150, loss: 0.106211, avg_loss: 0.310265 +010460/063150, loss: 0.125601, avg_loss: 0.310202 +010465/063150, loss: 0.098977, avg_loss: 0.310099 +010470/063150, loss: 0.055487, avg_loss: 0.310007 +010475/063150, loss: 0.120468, avg_loss: 0.309905 +010480/063150, loss: 0.061571, avg_loss: 0.309816 +010485/063150, loss: 0.106210, avg_loss: 0.309754 +010490/063150, loss: 0.380060, avg_loss: 0.309686 +010495/063150, loss: 0.108737, avg_loss: 0.309608 +010500/063150, loss: 0.047445, avg_loss: 0.309550 +010505/063150, loss: 0.083730, avg_loss: 0.309448 +010510/063150, loss: 0.166087, avg_loss: 0.309363 +010515/063150, loss: 0.146377, avg_loss: 0.309281 +010520/063150, loss: 0.128919, avg_loss: 0.309214 +010525/063150, loss: 0.166360, avg_loss: 0.309147 +010530/063150, loss: 0.118950, avg_loss: 0.309042 +010535/063150, loss: 0.178382, avg_loss: 0.308967 +010540/063150, loss: 0.143099, avg_loss: 0.308861 +010545/063150, loss: 0.148934, avg_loss: 0.308756 +010550/063150, loss: 0.030103, avg_loss: 0.308631 +010555/063150, loss: 0.124762, avg_loss: 0.308541 +010560/063150, loss: 0.010532, avg_loss: 0.308428 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 10560/63150: {'accuracy': 0.8497706422018348} +010565/063150, loss: 0.078442, avg_loss: 0.308340 +010570/063150, loss: 0.120963, avg_loss: 0.308291 +010575/063150, loss: 0.018122, avg_loss: 0.308182 +010580/063150, loss: 0.144638, avg_loss: 0.308092 +010585/063150, loss: 0.053745, avg_loss: 0.308007 +010590/063150, loss: 0.035331, avg_loss: 0.307905 +010595/063150, loss: 0.245777, avg_loss: 0.307830 +010600/063150, loss: 0.102280, avg_loss: 0.307758 +010605/063150, loss: 0.060599, avg_loss: 0.307657 +010610/063150, loss: 0.125457, avg_loss: 0.307572 +010615/063150, loss: 0.099095, avg_loss: 0.307493 +010620/063150, loss: 0.038345, avg_loss: 0.307397 +010625/063150, loss: 0.067861, avg_loss: 0.307301 +010630/063150, loss: 0.070490, avg_loss: 0.307201 +010635/063150, loss: 0.239503, avg_loss: 0.307135 +010640/063150, loss: 0.120326, avg_loss: 0.307051 +010645/063150, loss: 0.246785, avg_loss: 0.306971 +010650/063150, loss: 0.038675, avg_loss: 0.306892 +010655/063150, loss: 0.068527, avg_loss: 0.306790 +010660/063150, loss: 0.254869, avg_loss: 0.306723 +010665/063150, loss: 0.203750, avg_loss: 0.306620 +010670/063150, loss: 0.018846, avg_loss: 0.306519 +010675/063150, loss: 0.131322, avg_loss: 0.306427 +010680/063150, loss: 0.117686, avg_loss: 0.306348 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 10680/63150: {'accuracy': 0.8474770642201835} +010685/063150, loss: 0.069627, avg_loss: 0.306263 +010690/063150, loss: 0.212331, avg_loss: 0.306176 +010695/063150, loss: 0.038811, avg_loss: 0.306085 +010700/063150, loss: 0.116324, avg_loss: 0.305989 +010705/063150, loss: 0.100974, avg_loss: 0.305895 +010710/063150, loss: 0.070316, avg_loss: 0.305811 +010715/063150, loss: 0.232882, avg_loss: 0.305779 +010720/063150, loss: 0.140272, avg_loss: 0.305699 +010725/063150, loss: 0.124733, avg_loss: 0.305623 +010730/063150, loss: 0.207910, avg_loss: 0.305551 +010735/063150, loss: 0.041805, avg_loss: 0.305436 +010740/063150, loss: 0.170655, avg_loss: 0.305348 +010745/063150, loss: 0.140702, avg_loss: 0.305259 +010750/063150, loss: 0.093360, avg_loss: 0.305162 +010755/063150, loss: 0.152758, avg_loss: 0.305071 +010760/063150, loss: 0.039450, avg_loss: 0.304973 +010765/063150, loss: 0.012937, avg_loss: 0.304870 +010770/063150, loss: 0.087834, avg_loss: 0.304771 +010775/063150, loss: 0.195773, avg_loss: 0.304687 +010780/063150, loss: 0.070066, avg_loss: 0.304611 +010785/063150, loss: 0.043237, avg_loss: 0.304542 +010790/063150, loss: 0.160668, avg_loss: 0.304458 +010795/063150, loss: 0.156932, avg_loss: 0.304374 +010800/063150, loss: 0.041045, avg_loss: 0.304307 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 10800/63150: {'accuracy': 0.8555045871559633} +010805/063150, loss: 0.050182, avg_loss: 0.304247 +010810/063150, loss: 0.138973, avg_loss: 0.304186 +010815/063150, loss: 0.191587, avg_loss: 0.304111 +010820/063150, loss: 0.189376, avg_loss: 0.304039 +010825/063150, loss: 0.055514, avg_loss: 0.303962 +010830/063150, loss: 0.045113, avg_loss: 0.303869 +010835/063150, loss: 0.059702, avg_loss: 0.303764 +010840/063150, loss: 0.103249, avg_loss: 0.303651 +010845/063150, loss: 0.029940, avg_loss: 0.303568 +010850/063150, loss: 0.358446, avg_loss: 0.303513 +010855/063150, loss: 0.072046, avg_loss: 0.303428 +010860/063150, loss: 0.024368, avg_loss: 0.303352 +010865/063150, loss: 0.070819, avg_loss: 0.303265 +010870/063150, loss: 0.040140, avg_loss: 0.303187 +010875/063150, loss: 0.106851, avg_loss: 0.303090 +010880/063150, loss: 0.059044, avg_loss: 0.303011 +010885/063150, loss: 0.144637, avg_loss: 0.302922 +010890/063150, loss: 0.142176, avg_loss: 0.302833 +010895/063150, loss: 0.272884, avg_loss: 0.302771 +010900/063150, loss: 0.101391, avg_loss: 0.302730 +010905/063150, loss: 0.035028, avg_loss: 0.302627 +010910/063150, loss: 0.109430, avg_loss: 0.302554 +010915/063150, loss: 0.307841, avg_loss: 0.302511 +010920/063150, loss: 0.062385, avg_loss: 0.302424 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 10920/63150: {'accuracy': 0.8577981651376146} +010925/063150, loss: 0.258084, avg_loss: 0.302359 +010930/063150, loss: 0.130835, avg_loss: 0.302289 +010935/063150, loss: 0.046937, avg_loss: 0.302228 +010940/063150, loss: 0.074056, avg_loss: 0.302142 +010945/063150, loss: 0.079976, avg_loss: 0.302065 +010950/063150, loss: 0.213374, avg_loss: 0.301996 +010955/063150, loss: 0.134519, avg_loss: 0.301914 +010960/063150, loss: 0.048815, avg_loss: 0.301816 +010965/063150, loss: 0.084379, avg_loss: 0.301738 +010970/063150, loss: 0.077946, avg_loss: 0.301674 +010975/063150, loss: 0.217925, avg_loss: 0.301624 +010980/063150, loss: 0.104104, avg_loss: 0.301537 +010985/063150, loss: 0.102159, avg_loss: 0.301463 +010990/063150, loss: 0.023048, avg_loss: 0.301380 +010995/063150, loss: 0.087407, avg_loss: 0.301307 +011000/063150, loss: 0.072717, avg_loss: 0.301241 +011005/063150, loss: 0.154431, avg_loss: 0.301172 +011010/063150, loss: 0.075919, avg_loss: 0.301110 +011015/063150, loss: 0.047905, avg_loss: 0.301024 +011020/063150, loss: 0.150243, avg_loss: 0.300958 +011025/063150, loss: 0.175037, avg_loss: 0.300911 +011030/063150, loss: 0.065675, avg_loss: 0.300827 +011035/063150, loss: 0.075467, avg_loss: 0.300758 +011040/063150, loss: 0.383090, avg_loss: 0.300684 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 11040/63150: {'accuracy': 0.8635321100917431} +011045/063150, loss: 0.064023, avg_loss: 0.300608 +011050/063150, loss: 0.054205, avg_loss: 0.300526 +011055/063150, loss: 0.184745, avg_loss: 0.300486 +011060/063150, loss: 0.139269, avg_loss: 0.300402 +011065/063150, loss: 0.100906, avg_loss: 0.300316 +011070/063150, loss: 0.044016, avg_loss: 0.300223 +011075/063150, loss: 0.150750, avg_loss: 0.300161 +011080/063150, loss: 0.070051, avg_loss: 0.300081 +011085/063150, loss: 0.372006, avg_loss: 0.300011 +011090/063150, loss: 0.164667, avg_loss: 0.299933 +011095/063150, loss: 0.060243, avg_loss: 0.299841 +011100/063150, loss: 0.258733, avg_loss: 0.299795 +011105/063150, loss: 0.044230, avg_loss: 0.299732 +011110/063150, loss: 0.115914, avg_loss: 0.299654 +011115/063150, loss: 0.069076, avg_loss: 0.299555 +011120/063150, loss: 0.146748, avg_loss: 0.299479 +011125/063150, loss: 0.377297, avg_loss: 0.299429 +011130/063150, loss: 0.026127, avg_loss: 0.299348 +011135/063150, loss: 0.165699, avg_loss: 0.299297 +011140/063150, loss: 0.322973, avg_loss: 0.299238 +011145/063150, loss: 0.240229, avg_loss: 0.299185 +011150/063150, loss: 0.138315, avg_loss: 0.299081 +011155/063150, loss: 0.085248, avg_loss: 0.299017 +011160/063150, loss: 0.300645, avg_loss: 0.298951 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 11160/63150: {'accuracy': 0.8635321100917431} +011165/063150, loss: 0.180261, avg_loss: 0.298876 +011170/063150, loss: 0.037220, avg_loss: 0.298770 +011175/063150, loss: 0.111140, avg_loss: 0.298685 +011180/063150, loss: 0.163392, avg_loss: 0.298612 +011185/063150, loss: 0.162369, avg_loss: 0.298522 +011190/063150, loss: 0.286023, avg_loss: 0.298468 +011195/063150, loss: 0.031226, avg_loss: 0.298378 +011200/063150, loss: 0.050550, avg_loss: 0.298291 +011205/063150, loss: 0.062056, avg_loss: 0.298229 +011210/063150, loss: 0.255696, avg_loss: 0.298149 +011215/063150, loss: 0.011775, avg_loss: 0.298050 +011220/063150, loss: 0.172581, avg_loss: 0.297959 +011225/063150, loss: 0.124258, avg_loss: 0.297887 +011230/063150, loss: 0.146640, avg_loss: 0.297816 +011235/063150, loss: 0.091208, avg_loss: 0.297743 +011240/063150, loss: 0.154959, avg_loss: 0.297661 +011245/063150, loss: 0.210192, avg_loss: 0.297603 +011250/063150, loss: 0.091186, avg_loss: 0.297531 +011255/063150, loss: 0.062088, avg_loss: 0.297441 +011260/063150, loss: 0.024434, avg_loss: 0.297362 +011265/063150, loss: 0.065718, avg_loss: 0.297265 +011270/063150, loss: 0.094349, avg_loss: 0.297168 +011275/063150, loss: 0.088033, avg_loss: 0.297088 +011280/063150, loss: 0.156095, avg_loss: 0.296992 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 11280/63150: {'accuracy': 0.8577981651376146} +011285/063150, loss: 0.111182, avg_loss: 0.296924 +011290/063150, loss: 0.055021, avg_loss: 0.296857 +011295/063150, loss: 0.136403, avg_loss: 0.296798 +011300/063150, loss: 0.049305, avg_loss: 0.296700 +011305/063150, loss: 0.265470, avg_loss: 0.296633 +011310/063150, loss: 0.363443, avg_loss: 0.296573 +011315/063150, loss: 0.229934, avg_loss: 0.296525 +011320/063150, loss: 0.104641, avg_loss: 0.296435 +011325/063150, loss: 0.132260, avg_loss: 0.296342 +011330/063150, loss: 0.084565, avg_loss: 0.296259 +011335/063150, loss: 0.152346, avg_loss: 0.296201 +011340/063150, loss: 0.083349, avg_loss: 0.296136 +011345/063150, loss: 0.205813, avg_loss: 0.296079 +011350/063150, loss: 0.057890, avg_loss: 0.295998 +011355/063150, loss: 0.139605, avg_loss: 0.295903 +011360/063150, loss: 0.213980, avg_loss: 0.295835 +011365/063150, loss: 0.215851, avg_loss: 0.295757 +011370/063150, loss: 0.028619, avg_loss: 0.295660 +011375/063150, loss: 0.133684, avg_loss: 0.295582 +011380/063150, loss: 0.131321, avg_loss: 0.295491 +011385/063150, loss: 0.226065, avg_loss: 0.295455 +011390/063150, loss: 0.080510, avg_loss: 0.295374 +011395/063150, loss: 0.098390, avg_loss: 0.295291 +011400/063150, loss: 0.044907, avg_loss: 0.295197 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 11400/63150: {'accuracy': 0.8394495412844036} +011405/063150, loss: 0.058795, avg_loss: 0.295125 +011410/063150, loss: 0.044944, avg_loss: 0.295026 +011415/063150, loss: 0.065715, avg_loss: 0.294930 +011420/063150, loss: 0.200236, avg_loss: 0.294863 +011425/063150, loss: 0.121049, avg_loss: 0.294801 +011430/063150, loss: 0.146016, avg_loss: 0.294722 +011435/063150, loss: 0.030630, avg_loss: 0.294655 +011440/063150, loss: 0.038620, avg_loss: 0.294554 +011445/063150, loss: 0.036397, avg_loss: 0.294467 +011450/063150, loss: 0.078964, avg_loss: 0.294375 +011455/063150, loss: 0.173694, avg_loss: 0.294289 +011460/063150, loss: 0.167719, avg_loss: 0.294198 +011465/063150, loss: 0.218025, avg_loss: 0.294115 +011470/063150, loss: 0.064786, avg_loss: 0.294022 +011475/063150, loss: 0.174319, avg_loss: 0.293966 +011480/063150, loss: 0.360695, avg_loss: 0.293900 +011485/063150, loss: 0.176150, avg_loss: 0.293822 +011490/063150, loss: 0.103830, avg_loss: 0.293749 +011495/063150, loss: 0.074414, avg_loss: 0.293685 +011500/063150, loss: 0.047651, avg_loss: 0.293580 +011505/063150, loss: 0.064202, avg_loss: 0.293524 +011510/063150, loss: 0.076031, avg_loss: 0.293455 +011515/063150, loss: 0.205897, avg_loss: 0.293372 +011520/063150, loss: 0.054321, avg_loss: 0.293290 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 11520/63150: {'accuracy': 0.8405963302752294} +011525/063150, loss: 0.087383, avg_loss: 0.293240 +011530/063150, loss: 0.235280, avg_loss: 0.293222 +011535/063150, loss: 0.204239, avg_loss: 0.293153 +011540/063150, loss: 0.150483, avg_loss: 0.293101 +011545/063150, loss: 0.068807, avg_loss: 0.293034 +011550/063150, loss: 0.234452, avg_loss: 0.292963 +011555/063150, loss: 0.033685, avg_loss: 0.292882 +011560/063150, loss: 0.298000, avg_loss: 0.292804 +011565/063150, loss: 0.259819, avg_loss: 0.292732 +011570/063150, loss: 0.068553, avg_loss: 0.292664 +011575/063150, loss: 0.147556, avg_loss: 0.292595 +011580/063150, loss: 0.108045, avg_loss: 0.292555 +011585/063150, loss: 0.173416, avg_loss: 0.292507 +011590/063150, loss: 0.096568, avg_loss: 0.292415 +011595/063150, loss: 0.397567, avg_loss: 0.292399 +011600/063150, loss: 0.129087, avg_loss: 0.292319 +011605/063150, loss: 0.093456, avg_loss: 0.292240 +011610/063150, loss: 0.158300, avg_loss: 0.292181 +011615/063150, loss: 0.100482, avg_loss: 0.292100 +011620/063150, loss: 0.194647, avg_loss: 0.292030 +011625/063150, loss: 0.199176, avg_loss: 0.291964 +011630/063150, loss: 0.022553, avg_loss: 0.291895 +011635/063150, loss: 0.120800, avg_loss: 0.291820 +011640/063150, loss: 0.165404, avg_loss: 0.291768 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 11640/63150: {'accuracy': 0.838302752293578} +011645/063150, loss: 0.062540, avg_loss: 0.291691 +011650/063150, loss: 0.248271, avg_loss: 0.291626 +011655/063150, loss: 0.133732, avg_loss: 0.291545 +011660/063150, loss: 0.074972, avg_loss: 0.291487 +011665/063150, loss: 0.217266, avg_loss: 0.291420 +011670/063150, loss: 0.042485, avg_loss: 0.291348 +011675/063150, loss: 0.045135, avg_loss: 0.291263 +011680/063150, loss: 0.046080, avg_loss: 0.291198 +011685/063150, loss: 0.064821, avg_loss: 0.291091 +011690/063150, loss: 0.033320, avg_loss: 0.291017 +011695/063150, loss: 0.111138, avg_loss: 0.290942 +011700/063150, loss: 0.197394, avg_loss: 0.290858 +011705/063150, loss: 0.256846, avg_loss: 0.290788 +011710/063150, loss: 0.063811, avg_loss: 0.290721 +011715/063150, loss: 0.067020, avg_loss: 0.290629 +011720/063150, loss: 0.357743, avg_loss: 0.290568 +011725/063150, loss: 0.231975, avg_loss: 0.290528 +011730/063150, loss: 0.041162, avg_loss: 0.290438 +011735/063150, loss: 0.023695, avg_loss: 0.290366 +011740/063150, loss: 0.084688, avg_loss: 0.290321 +011745/063150, loss: 0.473796, avg_loss: 0.290274 +011750/063150, loss: 0.114563, avg_loss: 0.290184 +011755/063150, loss: 0.187628, avg_loss: 0.290132 +011760/063150, loss: 0.333238, avg_loss: 0.290077 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 11760/63150: {'accuracy': 0.8532110091743119} +011765/063150, loss: 0.255383, avg_loss: 0.290005 +011770/063150, loss: 0.210848, avg_loss: 0.289965 +011775/063150, loss: 0.138689, avg_loss: 0.289895 +011780/063150, loss: 0.023046, avg_loss: 0.289823 +011785/063150, loss: 0.092461, avg_loss: 0.289749 +011790/063150, loss: 0.161388, avg_loss: 0.289696 +011795/063150, loss: 0.099379, avg_loss: 0.289604 +011800/063150, loss: 0.223064, avg_loss: 0.289555 +011805/063150, loss: 0.267299, avg_loss: 0.289505 +011810/063150, loss: 0.140722, avg_loss: 0.289426 +011815/063150, loss: 0.022494, avg_loss: 0.289328 +011820/063150, loss: 0.037473, avg_loss: 0.289279 +011825/063150, loss: 0.196594, avg_loss: 0.289243 +011830/063150, loss: 0.055268, avg_loss: 0.289167 +011835/063150, loss: 0.086641, avg_loss: 0.289107 +011840/063150, loss: 0.126851, avg_loss: 0.289030 +011845/063150, loss: 0.225183, avg_loss: 0.288967 +011850/063150, loss: 0.091154, avg_loss: 0.288902 +011855/063150, loss: 0.087773, avg_loss: 0.288823 +011860/063150, loss: 0.224470, avg_loss: 0.288785 +011865/063150, loss: 0.313331, avg_loss: 0.288740 +011870/063150, loss: 0.060008, avg_loss: 0.288671 +011875/063150, loss: 0.137095, avg_loss: 0.288643 +011880/063150, loss: 0.179460, avg_loss: 0.288602 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 11880/63150: {'accuracy': 0.8497706422018348} +011885/063150, loss: 0.128724, avg_loss: 0.288542 +011890/063150, loss: 0.190379, avg_loss: 0.288480 +011895/063150, loss: 0.119125, avg_loss: 0.288404 +011900/063150, loss: 0.054045, avg_loss: 0.288328 +011905/063150, loss: 0.158816, avg_loss: 0.288256 +011910/063150, loss: 0.148271, avg_loss: 0.288181 +011915/063150, loss: 0.103456, avg_loss: 0.288093 +011920/063150, loss: 0.050163, avg_loss: 0.288010 +011925/063150, loss: 0.153077, avg_loss: 0.287933 +011930/063150, loss: 0.209436, avg_loss: 0.287858 +011935/063150, loss: 0.035930, avg_loss: 0.287785 +011940/063150, loss: 0.097476, avg_loss: 0.287694 +011945/063150, loss: 0.022517, avg_loss: 0.287638 +011950/063150, loss: 0.187662, avg_loss: 0.287555 +011955/063150, loss: 0.050620, avg_loss: 0.287485 +011960/063150, loss: 0.044364, avg_loss: 0.287425 +011965/063150, loss: 0.169347, avg_loss: 0.287346 +011970/063150, loss: 0.094964, avg_loss: 0.287281 +011975/063150, loss: 0.082400, avg_loss: 0.287202 +011980/063150, loss: 0.083242, avg_loss: 0.287139 +011985/063150, loss: 0.061428, avg_loss: 0.287053 +011990/063150, loss: 0.377398, avg_loss: 0.287008 +011995/063150, loss: 0.114585, avg_loss: 0.286940 +012000/063150, loss: 0.155495, avg_loss: 0.286871 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 12000/63150: {'accuracy': 0.8532110091743119} +012005/063150, loss: 0.078167, avg_loss: 0.286791 +012010/063150, loss: 0.203096, avg_loss: 0.286753 +012015/063150, loss: 0.183604, avg_loss: 0.286713 +012020/063150, loss: 0.240165, avg_loss: 0.286683 +012025/063150, loss: 0.145528, avg_loss: 0.286635 +012030/063150, loss: 0.192909, avg_loss: 0.286586 +012035/063150, loss: 0.094191, avg_loss: 0.286521 +012040/063150, loss: 0.110136, avg_loss: 0.286466 +012045/063150, loss: 0.066443, avg_loss: 0.286398 +012050/063150, loss: 0.298872, avg_loss: 0.286342 +012055/063150, loss: 0.148360, avg_loss: 0.286268 +012060/063150, loss: 0.071920, avg_loss: 0.286208 +012065/063150, loss: 0.049196, avg_loss: 0.286142 +012070/063150, loss: 0.023247, avg_loss: 0.286057 +012075/063150, loss: 0.195157, avg_loss: 0.285993 +012080/063150, loss: 0.060874, avg_loss: 0.285932 +012085/063150, loss: 0.054214, avg_loss: 0.285873 +012090/063150, loss: 0.136288, avg_loss: 0.285814 +012095/063150, loss: 0.162098, avg_loss: 0.285760 +012100/063150, loss: 0.050607, avg_loss: 0.285707 +012105/063150, loss: 0.051216, avg_loss: 0.285658 +012110/063150, loss: 0.174317, avg_loss: 0.285592 +012115/063150, loss: 0.308638, avg_loss: 0.285543 +012120/063150, loss: 0.122276, avg_loss: 0.285468 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 12120/63150: {'accuracy': 0.8440366972477065} +012125/063150, loss: 0.187330, avg_loss: 0.285401 +012130/063150, loss: 0.312589, avg_loss: 0.285346 +012135/063150, loss: 0.103478, avg_loss: 0.285301 +012140/063150, loss: 0.013447, avg_loss: 0.285216 +012145/063150, loss: 0.138980, avg_loss: 0.285140 +012150/063150, loss: 0.041558, avg_loss: 0.285058 +012155/063150, loss: 0.225342, avg_loss: 0.285002 +012160/063150, loss: 0.129627, avg_loss: 0.284916 +012165/063150, loss: 0.030539, avg_loss: 0.284856 +012170/063150, loss: 0.014549, avg_loss: 0.284765 +012175/063150, loss: 0.388364, avg_loss: 0.284729 +012180/063150, loss: 0.199386, avg_loss: 0.284668 +012185/063150, loss: 0.270246, avg_loss: 0.284626 +012190/063150, loss: 0.239315, avg_loss: 0.284565 +012195/063150, loss: 0.174959, avg_loss: 0.284557 +012200/063150, loss: 0.155529, avg_loss: 0.284499 +012205/063150, loss: 0.221781, avg_loss: 0.284444 +012210/063150, loss: 0.046283, avg_loss: 0.284390 +012215/063150, loss: 0.099540, avg_loss: 0.284325 +012220/063150, loss: 0.016643, avg_loss: 0.284268 +012225/063150, loss: 0.040506, avg_loss: 0.284214 +012230/063150, loss: 0.018380, avg_loss: 0.284139 +012235/063150, loss: 0.071935, avg_loss: 0.284054 +012240/063150, loss: 0.067944, avg_loss: 0.283995 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 12240/63150: {'accuracy': 0.8497706422018348} +012245/063150, loss: 0.019915, avg_loss: 0.283949 +012250/063150, loss: 0.309860, avg_loss: 0.283886 +012255/063150, loss: 0.213824, avg_loss: 0.283836 +012260/063150, loss: 0.081780, avg_loss: 0.283773 +012265/063150, loss: 0.170630, avg_loss: 0.283700 +012270/063150, loss: 0.024879, avg_loss: 0.283634 +012275/063150, loss: 0.276173, avg_loss: 0.283585 +012280/063150, loss: 0.141190, avg_loss: 0.283528 +012285/063150, loss: 0.172848, avg_loss: 0.283464 +012290/063150, loss: 0.141190, avg_loss: 0.283390 +012295/063150, loss: 0.207016, avg_loss: 0.283330 +012300/063150, loss: 0.097402, avg_loss: 0.283271 +012305/063150, loss: 0.136080, avg_loss: 0.283214 +012310/063150, loss: 0.199594, avg_loss: 0.283155 +012315/063150, loss: 0.288813, avg_loss: 0.283093 +012320/063150, loss: 0.135031, avg_loss: 0.283027 +012325/063150, loss: 0.084555, avg_loss: 0.282954 +012330/063150, loss: 0.117300, avg_loss: 0.282885 +012335/063150, loss: 0.312335, avg_loss: 0.282843 +012340/063150, loss: 0.167584, avg_loss: 0.282768 +012345/063150, loss: 0.078126, avg_loss: 0.282698 +012350/063150, loss: 0.087348, avg_loss: 0.282625 +012355/063150, loss: 0.209596, avg_loss: 0.282554 +012360/063150, loss: 0.101469, avg_loss: 0.282492 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 12360/63150: {'accuracy': 0.8405963302752294} +012365/063150, loss: 0.159936, avg_loss: 0.282443 +012370/063150, loss: 0.022752, avg_loss: 0.282420 +012375/063150, loss: 0.228275, avg_loss: 0.282351 +012380/063150, loss: 0.123931, avg_loss: 0.282297 +012385/063150, loss: 0.026388, avg_loss: 0.282220 +012390/063150, loss: 0.088277, avg_loss: 0.282133 +012395/063150, loss: 0.128585, avg_loss: 0.282073 +012400/063150, loss: 0.141453, avg_loss: 0.281997 +012405/063150, loss: 0.101637, avg_loss: 0.281938 +012410/063150, loss: 0.055244, avg_loss: 0.281858 +012415/063150, loss: 0.033181, avg_loss: 0.281779 +012420/063150, loss: 0.048105, avg_loss: 0.281723 +012425/063150, loss: 0.059283, avg_loss: 0.281687 +012430/063150, loss: 0.013903, avg_loss: 0.281627 +012435/063150, loss: 0.211168, avg_loss: 0.281573 +012440/063150, loss: 0.084332, avg_loss: 0.281500 +012445/063150, loss: 0.289767, avg_loss: 0.281452 +012450/063150, loss: 0.188534, avg_loss: 0.281404 +012455/063150, loss: 0.251635, avg_loss: 0.281345 +012460/063150, loss: 0.161930, avg_loss: 0.281305 +012465/063150, loss: 0.335953, avg_loss: 0.281260 +012470/063150, loss: 0.208025, avg_loss: 0.281224 +012475/063150, loss: 0.053871, avg_loss: 0.281155 +012480/063150, loss: 0.125879, avg_loss: 0.281101 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 12480/63150: {'accuracy': 0.8463302752293578} +012485/063150, loss: 0.050669, avg_loss: 0.281058 +012490/063150, loss: 0.255816, avg_loss: 0.281012 +012495/063150, loss: 0.018033, avg_loss: 0.280947 +012500/063150, loss: 0.237609, avg_loss: 0.280892 +012505/063150, loss: 0.047795, avg_loss: 0.280828 +012510/063150, loss: 0.197351, avg_loss: 0.280756 +012515/063150, loss: 0.105237, avg_loss: 0.280693 +012520/063150, loss: 0.144539, avg_loss: 0.280629 +012525/063150, loss: 0.073976, avg_loss: 0.280562 +012530/063150, loss: 0.467036, avg_loss: 0.280512 +012535/063150, loss: 0.138596, avg_loss: 0.280434 +012540/063150, loss: 0.130581, avg_loss: 0.280370 +012545/063150, loss: 0.164143, avg_loss: 0.280315 +012550/063150, loss: 0.230616, avg_loss: 0.280273 +012555/063150, loss: 0.246401, avg_loss: 0.280253 +012560/063150, loss: 0.105198, avg_loss: 0.280194 +012565/063150, loss: 0.270347, avg_loss: 0.280141 +012570/063150, loss: 0.256924, avg_loss: 0.280090 +012575/063150, loss: 0.047514, avg_loss: 0.280016 +012580/063150, loss: 0.056171, avg_loss: 0.279965 +012585/063150, loss: 0.143267, avg_loss: 0.279906 +012590/063150, loss: 0.043212, avg_loss: 0.279839 +012595/063150, loss: 0.019226, avg_loss: 0.279764 +012600/063150, loss: 0.270574, avg_loss: 0.279695 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 5, step 12600/63150: {'accuracy': 0.8589449541284404} +012605/063150, loss: 0.017577, avg_loss: 0.279607 +012610/063150, loss: 0.155197, avg_loss: 0.279541 +012615/063150, loss: 0.046276, avg_loss: 0.279450 +012620/063150, loss: 0.070490, avg_loss: 0.279386 +012625/063150, loss: 0.265416, avg_loss: 0.279346 +012630/063150, loss: 0.024924, avg_loss: 0.279274 +012635/063150, loss: 0.093318, avg_loss: 0.279199 +012640/063150, loss: 0.088615, avg_loss: 0.279127 +012645/063150, loss: 0.181436, avg_loss: 0.279061 +012650/063150, loss: 0.059107, avg_loss: 0.278987 +012655/063150, loss: 0.160050, avg_loss: 0.278910 +012660/063150, loss: 0.097640, avg_loss: 0.278837 +012665/063150, loss: 0.118547, avg_loss: 0.278769 +012670/063150, loss: 0.048246, avg_loss: 0.278693 +012675/063150, loss: 0.041198, avg_loss: 0.278613 +012680/063150, loss: 0.119466, avg_loss: 0.278527 +012685/063150, loss: 0.173420, avg_loss: 0.278456 +012690/063150, loss: 0.088104, avg_loss: 0.278417 +012695/063150, loss: 0.065836, avg_loss: 0.278326 +012700/063150, loss: 0.084069, avg_loss: 0.278263 +012705/063150, loss: 0.096931, avg_loss: 0.278184 +012710/063150, loss: 0.110463, avg_loss: 0.278117 +012715/063150, loss: 0.021911, avg_loss: 0.278062 +012720/063150, loss: 0.060979, avg_loss: 0.278011 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 12720/63150: {'accuracy': 0.8543577981651376} +012725/063150, loss: 0.039972, avg_loss: 0.277944 +012730/063150, loss: 0.089546, avg_loss: 0.277874 +012735/063150, loss: 0.104067, avg_loss: 0.277801 +012740/063150, loss: 0.123020, avg_loss: 0.277766 +012745/063150, loss: 0.454353, avg_loss: 0.277713 +012750/063150, loss: 0.128797, avg_loss: 0.277638 +012755/063150, loss: 0.083241, avg_loss: 0.277574 +012760/063150, loss: 0.147388, avg_loss: 0.277509 +012765/063150, loss: 0.126672, avg_loss: 0.277428 +012770/063150, loss: 0.089539, avg_loss: 0.277366 +012775/063150, loss: 0.061422, avg_loss: 0.277287 +012780/063150, loss: 0.047593, avg_loss: 0.277216 +012785/063150, loss: 0.111838, avg_loss: 0.277134 +012790/063150, loss: 0.183344, avg_loss: 0.277058 +012795/063150, loss: 0.107863, avg_loss: 0.277014 +012800/063150, loss: 0.093615, avg_loss: 0.276946 +012805/063150, loss: 0.089627, avg_loss: 0.276874 +012810/063150, loss: 0.012900, avg_loss: 0.276793 +012815/063150, loss: 0.194428, avg_loss: 0.276725 +012820/063150, loss: 0.180725, avg_loss: 0.276672 +012825/063150, loss: 0.124272, avg_loss: 0.276606 +012830/063150, loss: 0.028520, avg_loss: 0.276532 +012835/063150, loss: 0.120147, avg_loss: 0.276459 +012840/063150, loss: 0.047961, avg_loss: 0.276381 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 12840/63150: {'accuracy': 0.8360091743119266} +012845/063150, loss: 0.298571, avg_loss: 0.276322 +012850/063150, loss: 0.115047, avg_loss: 0.276253 +012855/063150, loss: 0.118174, avg_loss: 0.276199 +012860/063150, loss: 0.107821, avg_loss: 0.276123 +012865/063150, loss: 0.034822, avg_loss: 0.276055 +012870/063150, loss: 0.042447, avg_loss: 0.275995 +012875/063150, loss: 0.201824, avg_loss: 0.275930 +012880/063150, loss: 0.109776, avg_loss: 0.275851 +012885/063150, loss: 0.105178, avg_loss: 0.275799 +012890/063150, loss: 0.035115, avg_loss: 0.275728 +012895/063150, loss: 0.122633, avg_loss: 0.275669 +012900/063150, loss: 0.056667, avg_loss: 0.275593 +012905/063150, loss: 0.117544, avg_loss: 0.275527 +012910/063150, loss: 0.061899, avg_loss: 0.275480 +012915/063150, loss: 0.106639, avg_loss: 0.275413 +012920/063150, loss: 0.121677, avg_loss: 0.275346 +012925/063150, loss: 0.019810, avg_loss: 0.275274 +012930/063150, loss: 0.202127, avg_loss: 0.275209 +012935/063150, loss: 0.173433, avg_loss: 0.275154 +012940/063150, loss: 0.221774, avg_loss: 0.275096 +012945/063150, loss: 0.280484, avg_loss: 0.275045 +012950/063150, loss: 0.130884, avg_loss: 0.274974 +012955/063150, loss: 0.271203, avg_loss: 0.274916 +012960/063150, loss: 0.063081, avg_loss: 0.274861 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 12960/63150: {'accuracy': 0.8371559633027523} +012965/063150, loss: 0.062341, avg_loss: 0.274783 +012970/063150, loss: 0.109526, avg_loss: 0.274700 +012975/063150, loss: 0.032035, avg_loss: 0.274647 +012980/063150, loss: 0.151826, avg_loss: 0.274578 +012985/063150, loss: 0.232828, avg_loss: 0.274515 +012990/063150, loss: 0.082368, avg_loss: 0.274460 +012995/063150, loss: 0.114868, avg_loss: 0.274422 +013000/063150, loss: 0.152240, avg_loss: 0.274367 +013005/063150, loss: 0.114849, avg_loss: 0.274305 +013010/063150, loss: 0.269775, avg_loss: 0.274245 +013015/063150, loss: 0.056767, avg_loss: 0.274164 +013020/063150, loss: 0.064866, avg_loss: 0.274128 +013025/063150, loss: 0.069140, avg_loss: 0.274061 +013030/063150, loss: 0.033224, avg_loss: 0.273980 +013035/063150, loss: 0.114303, avg_loss: 0.273930 +013040/063150, loss: 0.124206, avg_loss: 0.273858 +013045/063150, loss: 0.098769, avg_loss: 0.273780 +013050/063150, loss: 0.160771, avg_loss: 0.273715 +013055/063150, loss: 0.111488, avg_loss: 0.273662 +013060/063150, loss: 0.128473, avg_loss: 0.273587 +013065/063150, loss: 0.073546, avg_loss: 0.273526 +013070/063150, loss: 0.112468, avg_loss: 0.273459 +013075/063150, loss: 0.185894, avg_loss: 0.273410 +013080/063150, loss: 0.295514, avg_loss: 0.273359 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 13080/63150: {'accuracy': 0.8486238532110092} +013085/063150, loss: 0.022749, avg_loss: 0.273272 +013090/063150, loss: 0.093160, avg_loss: 0.273207 +013095/063150, loss: 0.085173, avg_loss: 0.273130 +013100/063150, loss: 0.063070, avg_loss: 0.273056 +013105/063150, loss: 0.037746, avg_loss: 0.272982 +013110/063150, loss: 0.028997, avg_loss: 0.272902 +013115/063150, loss: 0.108188, avg_loss: 0.272864 +013120/063150, loss: 0.073325, avg_loss: 0.272808 +013125/063150, loss: 0.061102, avg_loss: 0.272766 +013130/063150, loss: 0.113964, avg_loss: 0.272722 +013135/063150, loss: 0.121411, avg_loss: 0.272648 +013140/063150, loss: 0.122367, avg_loss: 0.272577 +013145/063150, loss: 0.118462, avg_loss: 0.272531 +013150/063150, loss: 0.048148, avg_loss: 0.272448 +013155/063150, loss: 0.057264, avg_loss: 0.272376 +013160/063150, loss: 0.056291, avg_loss: 0.272317 +013165/063150, loss: 0.092015, avg_loss: 0.272259 +013170/063150, loss: 0.162923, avg_loss: 0.272207 +013175/063150, loss: 0.088421, avg_loss: 0.272124 +013180/063150, loss: 0.100824, avg_loss: 0.272072 +013185/063150, loss: 0.198496, avg_loss: 0.272020 +013190/063150, loss: 0.045203, avg_loss: 0.271968 +013195/063150, loss: 0.069659, avg_loss: 0.271895 +013200/063150, loss: 0.087967, avg_loss: 0.271830 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 13200/63150: {'accuracy': 0.8543577981651376} +013205/063150, loss: 0.062170, avg_loss: 0.271765 +013210/063150, loss: 0.093041, avg_loss: 0.271720 +013215/063150, loss: 0.043924, avg_loss: 0.271663 +013220/063150, loss: 0.125447, avg_loss: 0.271591 +013225/063150, loss: 0.258061, avg_loss: 0.271541 +013230/063150, loss: 0.169371, avg_loss: 0.271493 +013235/063150, loss: 0.141665, avg_loss: 0.271423 +013240/063150, loss: 0.110977, avg_loss: 0.271340 +013245/063150, loss: 0.113201, avg_loss: 0.271263 +013250/063150, loss: 0.068434, avg_loss: 0.271223 +013255/063150, loss: 0.249632, avg_loss: 0.271161 +013260/063150, loss: 0.080187, avg_loss: 0.271095 +013265/063150, loss: 0.046250, avg_loss: 0.271020 +013270/063150, loss: 0.035164, avg_loss: 0.270960 +013275/063150, loss: 0.295392, avg_loss: 0.270915 +013280/063150, loss: 0.023077, avg_loss: 0.270843 +013285/063150, loss: 0.122318, avg_loss: 0.270769 +013290/063150, loss: 0.020301, avg_loss: 0.270687 +013295/063150, loss: 0.174889, avg_loss: 0.270629 +013300/063150, loss: 0.147910, avg_loss: 0.270560 +013305/063150, loss: 0.019807, avg_loss: 0.270477 +013310/063150, loss: 0.008060, avg_loss: 0.270389 +013315/063150, loss: 0.039331, avg_loss: 0.270319 +013320/063150, loss: 0.038662, avg_loss: 0.270301 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 13320/63150: {'accuracy': 0.8497706422018348} +013325/063150, loss: 0.029035, avg_loss: 0.270221 +013330/063150, loss: 0.300784, avg_loss: 0.270162 +013335/063150, loss: 0.031550, avg_loss: 0.270093 +013340/063150, loss: 0.125954, avg_loss: 0.270047 +013345/063150, loss: 0.192454, avg_loss: 0.269993 +013350/063150, loss: 0.107369, avg_loss: 0.269934 +013355/063150, loss: 0.081192, avg_loss: 0.269892 +013360/063150, loss: 0.371979, avg_loss: 0.269858 +013365/063150, loss: 0.079792, avg_loss: 0.269803 +013370/063150, loss: 0.173756, avg_loss: 0.269746 +013375/063150, loss: 0.375714, avg_loss: 0.269696 +013380/063150, loss: 0.168938, avg_loss: 0.269636 +013385/063150, loss: 0.178837, avg_loss: 0.269594 +013390/063150, loss: 0.076373, avg_loss: 0.269524 +013395/063150, loss: 0.138537, avg_loss: 0.269461 +013400/063150, loss: 0.080482, avg_loss: 0.269395 +013405/063150, loss: 0.109945, avg_loss: 0.269358 +013410/063150, loss: 0.070638, avg_loss: 0.269297 +013415/063150, loss: 0.129705, avg_loss: 0.269240 +013420/063150, loss: 0.257224, avg_loss: 0.269183 +013425/063150, loss: 0.026388, avg_loss: 0.269124 +013430/063150, loss: 0.129405, avg_loss: 0.269061 +013435/063150, loss: 0.060233, avg_loss: 0.269002 +013440/063150, loss: 0.191335, avg_loss: 0.268973 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 13440/63150: {'accuracy': 0.8451834862385321} +013445/063150, loss: 0.094875, avg_loss: 0.268929 +013450/063150, loss: 0.043482, avg_loss: 0.268865 +013455/063150, loss: 0.096150, avg_loss: 0.268818 +013460/063150, loss: 0.031702, avg_loss: 0.268734 +013465/063150, loss: 0.208088, avg_loss: 0.268681 +013470/063150, loss: 0.070791, avg_loss: 0.268615 +013475/063150, loss: 0.072805, avg_loss: 0.268554 +013480/063150, loss: 0.298978, avg_loss: 0.268508 +013485/063150, loss: 0.232945, avg_loss: 0.268460 +013490/063150, loss: 0.155616, avg_loss: 0.268403 +013495/063150, loss: 0.154669, avg_loss: 0.268354 +013500/063150, loss: 0.076360, avg_loss: 0.268292 +013505/063150, loss: 0.042192, avg_loss: 0.268244 +013510/063150, loss: 0.112927, avg_loss: 0.268188 +013515/063150, loss: 0.019044, avg_loss: 0.268134 +013520/063150, loss: 0.195267, avg_loss: 0.268111 +013525/063150, loss: 0.116349, avg_loss: 0.268052 +013530/063150, loss: 0.076219, avg_loss: 0.267990 +013535/063150, loss: 0.056818, avg_loss: 0.267924 +013540/063150, loss: 0.071377, avg_loss: 0.267865 +013545/063150, loss: 0.163268, avg_loss: 0.267803 +013550/063150, loss: 0.037107, avg_loss: 0.267723 +013555/063150, loss: 0.078334, avg_loss: 0.267651 +013560/063150, loss: 0.092340, avg_loss: 0.267588 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 13560/63150: {'accuracy': 0.8463302752293578} +013565/063150, loss: 0.094319, avg_loss: 0.267545 +013570/063150, loss: 0.120061, avg_loss: 0.267476 +013575/063150, loss: 0.054833, avg_loss: 0.267426 +013580/063150, loss: 0.271442, avg_loss: 0.267368 +013585/063150, loss: 0.058013, avg_loss: 0.267313 +013590/063150, loss: 0.118900, avg_loss: 0.267276 +013595/063150, loss: 0.142767, avg_loss: 0.267212 +013600/063150, loss: 0.066206, avg_loss: 0.267183 +013605/063150, loss: 0.135607, avg_loss: 0.267127 +013610/063150, loss: 0.030110, avg_loss: 0.267060 +013615/063150, loss: 0.084123, avg_loss: 0.266999 +013620/063150, loss: 0.135131, avg_loss: 0.266948 +013625/063150, loss: 0.089114, avg_loss: 0.266882 +013630/063150, loss: 0.270881, avg_loss: 0.266860 +013635/063150, loss: 0.055905, avg_loss: 0.266795 +013640/063150, loss: 0.087505, avg_loss: 0.266715 +013645/063150, loss: 0.064347, avg_loss: 0.266675 +013650/063150, loss: 0.013942, avg_loss: 0.266625 +013655/063150, loss: 0.116575, avg_loss: 0.266570 +013660/063150, loss: 0.184201, avg_loss: 0.266531 +013665/063150, loss: 0.202388, avg_loss: 0.266484 +013670/063150, loss: 0.061368, avg_loss: 0.266427 +013675/063150, loss: 0.116518, avg_loss: 0.266383 +013680/063150, loss: 0.052227, avg_loss: 0.266326 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 13680/63150: {'accuracy': 0.8474770642201835} +013685/063150, loss: 0.077557, avg_loss: 0.266278 +013690/063150, loss: 0.182886, avg_loss: 0.266220 +013695/063150, loss: 0.247576, avg_loss: 0.266172 +013700/063150, loss: 0.036649, avg_loss: 0.266120 +013705/063150, loss: 0.138115, avg_loss: 0.266070 +013710/063150, loss: 0.107699, avg_loss: 0.266012 +013715/063150, loss: 0.031566, avg_loss: 0.265960 +013720/063150, loss: 0.054828, avg_loss: 0.265891 +013725/063150, loss: 0.071758, avg_loss: 0.265828 +013730/063150, loss: 0.060742, avg_loss: 0.265781 +013735/063150, loss: 0.168752, avg_loss: 0.265723 +013740/063150, loss: 0.028218, avg_loss: 0.265656 +013745/063150, loss: 0.107161, avg_loss: 0.265585 +013750/063150, loss: 0.069972, avg_loss: 0.265522 +013755/063150, loss: 0.024637, avg_loss: 0.265466 +013760/063150, loss: 0.056660, avg_loss: 0.265396 +013765/063150, loss: 0.022972, avg_loss: 0.265322 +013770/063150, loss: 0.203705, avg_loss: 0.265296 +013775/063150, loss: 0.280965, avg_loss: 0.265247 +013780/063150, loss: 0.101777, avg_loss: 0.265190 +013785/063150, loss: 0.149071, avg_loss: 0.265143 +013790/063150, loss: 0.053666, avg_loss: 0.265105 +013795/063150, loss: 0.066703, avg_loss: 0.265040 +013800/063150, loss: 0.223569, avg_loss: 0.265025 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 13800/63150: {'accuracy': 0.8268348623853211} +013805/063150, loss: 0.079790, avg_loss: 0.264975 +013810/063150, loss: 0.128095, avg_loss: 0.264909 +013815/063150, loss: 0.055525, avg_loss: 0.264866 +013820/063150, loss: 0.307931, avg_loss: 0.264832 +013825/063150, loss: 0.076600, avg_loss: 0.264770 +013830/063150, loss: 0.091545, avg_loss: 0.264711 +013835/063150, loss: 0.030899, avg_loss: 0.264676 +013840/063150, loss: 0.121103, avg_loss: 0.264630 +013845/063150, loss: 0.084404, avg_loss: 0.264551 +013850/063150, loss: 0.097718, avg_loss: 0.264489 +013855/063150, loss: 0.187055, avg_loss: 0.264446 +013860/063150, loss: 0.138895, avg_loss: 0.264382 +013865/063150, loss: 0.105118, avg_loss: 0.264313 +013870/063150, loss: 0.172121, avg_loss: 0.264275 +013875/063150, loss: 0.087043, avg_loss: 0.264222 +013880/063150, loss: 0.165336, avg_loss: 0.264185 +013885/063150, loss: 0.045960, avg_loss: 0.264110 +013890/063150, loss: 0.270816, avg_loss: 0.264054 +013895/063150, loss: 0.072052, avg_loss: 0.263987 +013900/063150, loss: 0.175842, avg_loss: 0.263973 +013905/063150, loss: 0.050581, avg_loss: 0.263902 +013910/063150, loss: 0.084705, avg_loss: 0.263834 +013915/063150, loss: 0.152145, avg_loss: 0.263778 +013920/063150, loss: 0.051316, avg_loss: 0.263720 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 13920/63150: {'accuracy': 0.8497706422018348} +013925/063150, loss: 0.103797, avg_loss: 0.263661 +013930/063150, loss: 0.089867, avg_loss: 0.263589 +013935/063150, loss: 0.163807, avg_loss: 0.263540 +013940/063150, loss: 0.078997, avg_loss: 0.263480 +013945/063150, loss: 0.152408, avg_loss: 0.263419 +013950/063150, loss: 0.123994, avg_loss: 0.263358 +013955/063150, loss: 0.013176, avg_loss: 0.263298 +013960/063150, loss: 0.095018, avg_loss: 0.263246 +013965/063150, loss: 0.134163, avg_loss: 0.263174 +013970/063150, loss: 0.178191, avg_loss: 0.263127 +013975/063150, loss: 0.208208, avg_loss: 0.263086 +013980/063150, loss: 0.360650, avg_loss: 0.263034 +013985/063150, loss: 0.108444, avg_loss: 0.262983 +013990/063150, loss: 0.286343, avg_loss: 0.262923 +013995/063150, loss: 0.140673, avg_loss: 0.262886 +014000/063150, loss: 0.256444, avg_loss: 0.262837 +014005/063150, loss: 0.128041, avg_loss: 0.262775 +014010/063150, loss: 0.088349, avg_loss: 0.262733 +014015/063150, loss: 0.212553, avg_loss: 0.262692 +014020/063150, loss: 0.087990, avg_loss: 0.262636 +014025/063150, loss: 0.038597, avg_loss: 0.262564 +014030/063150, loss: 0.316874, avg_loss: 0.262512 +014035/063150, loss: 0.044449, avg_loss: 0.262449 +014040/063150, loss: 0.326197, avg_loss: 0.262405 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 14040/63150: {'accuracy': 0.8589449541284404} +014045/063150, loss: 0.177093, avg_loss: 0.262340 +014050/063150, loss: 0.036103, avg_loss: 0.262290 +014055/063150, loss: 0.060110, avg_loss: 0.262223 +014060/063150, loss: 0.056052, avg_loss: 0.262196 +014065/063150, loss: 0.046807, avg_loss: 0.262145 +014070/063150, loss: 0.028137, avg_loss: 0.262088 +014075/063150, loss: 0.231622, avg_loss: 0.262031 +014080/063150, loss: 0.051018, avg_loss: 0.261989 +014085/063150, loss: 0.016655, avg_loss: 0.261940 +014090/063150, loss: 0.140307, avg_loss: 0.261899 +014095/063150, loss: 0.029685, avg_loss: 0.261857 +014100/063150, loss: 0.094307, avg_loss: 0.261827 +014105/063150, loss: 0.138286, avg_loss: 0.261769 +014110/063150, loss: 0.051098, avg_loss: 0.261706 +014115/063150, loss: 0.172384, avg_loss: 0.261658 +014120/063150, loss: 0.149141, avg_loss: 0.261597 +014125/063150, loss: 0.092800, avg_loss: 0.261531 +014130/063150, loss: 0.067818, avg_loss: 0.261476 +014135/063150, loss: 0.094962, avg_loss: 0.261433 +014140/063150, loss: 0.045183, avg_loss: 0.261381 +014145/063150, loss: 0.085965, avg_loss: 0.261328 +014150/063150, loss: 0.166606, avg_loss: 0.261287 +014155/063150, loss: 0.145029, avg_loss: 0.261240 +014160/063150, loss: 0.060549, avg_loss: 0.261186 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 14160/63150: {'accuracy': 0.8474770642201835} +014165/063150, loss: 0.020250, avg_loss: 0.261122 +014170/063150, loss: 0.399914, avg_loss: 0.261074 +014175/063150, loss: 0.065335, avg_loss: 0.261008 +014180/063150, loss: 0.235548, avg_loss: 0.260967 +014185/063150, loss: 0.017201, avg_loss: 0.260909 +014190/063150, loss: 0.036110, avg_loss: 0.260864 +014195/063150, loss: 0.088286, avg_loss: 0.260820 +014200/063150, loss: 0.074259, avg_loss: 0.260758 +014205/063150, loss: 0.059955, avg_loss: 0.260701 +014210/063150, loss: 0.082869, avg_loss: 0.260642 +014215/063150, loss: 0.098786, avg_loss: 0.260595 +014220/063150, loss: 0.175388, avg_loss: 0.260569 +014225/063150, loss: 0.085989, avg_loss: 0.260520 +014230/063150, loss: 0.084125, avg_loss: 0.260462 +014235/063150, loss: 0.183629, avg_loss: 0.260432 +014240/063150, loss: 0.088892, avg_loss: 0.260388 +014245/063150, loss: 0.083819, avg_loss: 0.260324 +014250/063150, loss: 0.346467, avg_loss: 0.260294 +014255/063150, loss: 0.062989, avg_loss: 0.260247 +014260/063150, loss: 0.212593, avg_loss: 0.260204 +014265/063150, loss: 0.076411, avg_loss: 0.260172 +014270/063150, loss: 0.115249, avg_loss: 0.260107 +014275/063150, loss: 0.063950, avg_loss: 0.260053 +014280/063150, loss: 0.031637, avg_loss: 0.259986 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 14280/63150: {'accuracy': 0.8577981651376146} +014285/063150, loss: 0.054005, avg_loss: 0.259940 +014290/063150, loss: 0.089260, avg_loss: 0.259893 +014295/063150, loss: 0.066401, avg_loss: 0.259847 +014300/063150, loss: 0.137174, avg_loss: 0.259792 +014305/063150, loss: 0.101514, avg_loss: 0.259719 +014310/063150, loss: 0.063591, avg_loss: 0.259659 +014315/063150, loss: 0.020809, avg_loss: 0.259599 +014320/063150, loss: 0.178270, avg_loss: 0.259537 +014325/063150, loss: 0.039895, avg_loss: 0.259490 +014330/063150, loss: 0.154372, avg_loss: 0.259434 +014335/063150, loss: 0.200468, avg_loss: 0.259392 +014340/063150, loss: 0.037809, avg_loss: 0.259327 +014345/063150, loss: 0.067647, avg_loss: 0.259264 +014350/063150, loss: 0.145115, avg_loss: 0.259215 +014355/063150, loss: 0.026962, avg_loss: 0.259157 +014360/063150, loss: 0.121841, avg_loss: 0.259105 +014365/063150, loss: 0.022555, avg_loss: 0.259032 +014370/063150, loss: 0.047826, avg_loss: 0.258974 +014375/063150, loss: 0.010901, avg_loss: 0.258900 +014380/063150, loss: 0.096120, avg_loss: 0.258845 +014385/063150, loss: 0.292007, avg_loss: 0.258798 +014390/063150, loss: 0.206674, avg_loss: 0.258736 +014395/063150, loss: 0.073791, avg_loss: 0.258686 +014400/063150, loss: 0.034189, avg_loss: 0.258624 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 14400/63150: {'accuracy': 0.8612385321100917} +014405/063150, loss: 0.015206, avg_loss: 0.258570 +014410/063150, loss: 0.281437, avg_loss: 0.258538 +014415/063150, loss: 0.074194, avg_loss: 0.258499 +014420/063150, loss: 0.024112, avg_loss: 0.258438 +014425/063150, loss: 0.145962, avg_loss: 0.258382 +014430/063150, loss: 0.251093, avg_loss: 0.258355 +014435/063150, loss: 0.134923, avg_loss: 0.258307 +014440/063150, loss: 0.198847, avg_loss: 0.258301 +014445/063150, loss: 0.054991, avg_loss: 0.258246 +014450/063150, loss: 0.311306, avg_loss: 0.258196 +014455/063150, loss: 0.164875, avg_loss: 0.258138 +014460/063150, loss: 0.065179, avg_loss: 0.258094 +014465/063150, loss: 0.084651, avg_loss: 0.258034 +014470/063150, loss: 0.168442, avg_loss: 0.257981 +014475/063150, loss: 0.034467, avg_loss: 0.257909 +014480/063150, loss: 0.147439, avg_loss: 0.257889 +014485/063150, loss: 0.043330, avg_loss: 0.257826 +014490/063150, loss: 0.119306, avg_loss: 0.257777 +014495/063150, loss: 0.118149, avg_loss: 0.257739 +014500/063150, loss: 0.148272, avg_loss: 0.257688 +014505/063150, loss: 0.217492, avg_loss: 0.257653 +014510/063150, loss: 0.050174, avg_loss: 0.257602 +014515/063150, loss: 0.059788, avg_loss: 0.257539 +014520/063150, loss: 0.049787, avg_loss: 0.257507 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 14520/63150: {'accuracy': 0.8623853211009175} +014525/063150, loss: 0.042498, avg_loss: 0.257449 +014530/063150, loss: 0.340567, avg_loss: 0.257409 +014535/063150, loss: 0.057205, avg_loss: 0.257355 +014540/063150, loss: 0.119171, avg_loss: 0.257304 +014545/063150, loss: 0.085505, avg_loss: 0.257271 +014550/063150, loss: 0.027120, avg_loss: 0.257223 +014555/063150, loss: 0.061823, avg_loss: 0.257154 +014560/063150, loss: 0.042163, avg_loss: 0.257104 +014565/063150, loss: 0.119290, avg_loss: 0.257054 +014570/063150, loss: 0.240499, avg_loss: 0.256998 +014575/063150, loss: 0.086652, avg_loss: 0.256939 +014580/063150, loss: 0.116210, avg_loss: 0.256870 +014585/063150, loss: 0.121100, avg_loss: 0.256828 +014590/063150, loss: 0.099862, avg_loss: 0.256778 +014595/063150, loss: 0.138586, avg_loss: 0.256736 +014600/063150, loss: 0.045343, avg_loss: 0.256679 +014605/063150, loss: 0.137912, avg_loss: 0.256620 +014610/063150, loss: 0.029011, avg_loss: 0.256575 +014615/063150, loss: 0.061517, avg_loss: 0.256512 +014620/063150, loss: 0.121189, avg_loss: 0.256477 +014625/063150, loss: 0.296102, avg_loss: 0.256448 +014630/063150, loss: 0.081778, avg_loss: 0.256400 +014635/063150, loss: 0.124910, avg_loss: 0.256357 +014640/063150, loss: 0.207077, avg_loss: 0.256319 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 6, step 14640/63150: {'accuracy': 0.8600917431192661} +014645/063150, loss: 0.058024, avg_loss: 0.256280 +014650/063150, loss: 0.150247, avg_loss: 0.256216 +014655/063150, loss: 0.110824, avg_loss: 0.256178 +014660/063150, loss: 0.292875, avg_loss: 0.256133 +014665/063150, loss: 0.233535, avg_loss: 0.256114 +014670/063150, loss: 0.087444, avg_loss: 0.256052 +014675/063150, loss: 0.154656, avg_loss: 0.256013 +014680/063150, loss: 0.079222, avg_loss: 0.255978 +014685/063150, loss: 0.084735, avg_loss: 0.255920 +014690/063150, loss: 0.019807, avg_loss: 0.255851 +014695/063150, loss: 0.126704, avg_loss: 0.255799 +014700/063150, loss: 0.077859, avg_loss: 0.255753 +014705/063150, loss: 0.037996, avg_loss: 0.255692 +014710/063150, loss: 0.184385, avg_loss: 0.255654 +014715/063150, loss: 0.103425, avg_loss: 0.255624 +014720/063150, loss: 0.072027, avg_loss: 0.255573 +014725/063150, loss: 0.065999, avg_loss: 0.255518 +014730/063150, loss: 0.084613, avg_loss: 0.255458 +014735/063150, loss: 0.173158, avg_loss: 0.255426 +014740/063150, loss: 0.042773, avg_loss: 0.255356 +014745/063150, loss: 0.023401, avg_loss: 0.255290 +014750/063150, loss: 0.270549, avg_loss: 0.255232 +014755/063150, loss: 0.028614, avg_loss: 0.255167 +014760/063150, loss: 0.100419, avg_loss: 0.255107 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 14760/63150: {'accuracy': 0.8589449541284404} +014765/063150, loss: 0.047162, avg_loss: 0.255042 +014770/063150, loss: 0.048707, avg_loss: 0.254974 +014775/063150, loss: 0.037687, avg_loss: 0.254917 +014780/063150, loss: 0.064711, avg_loss: 0.254868 +014785/063150, loss: 0.031524, avg_loss: 0.254799 +014790/063150, loss: 0.098283, avg_loss: 0.254782 +014795/063150, loss: 0.025336, avg_loss: 0.254720 +014800/063150, loss: 0.153026, avg_loss: 0.254667 +014805/063150, loss: 0.027600, avg_loss: 0.254610 +014810/063150, loss: 0.023772, avg_loss: 0.254570 +014815/063150, loss: 0.049845, avg_loss: 0.254500 +014820/063150, loss: 0.028427, avg_loss: 0.254441 +014825/063150, loss: 0.020604, avg_loss: 0.254368 +014830/063150, loss: 0.033797, avg_loss: 0.254316 +014835/063150, loss: 0.254177, avg_loss: 0.254265 +014840/063150, loss: 0.279819, avg_loss: 0.254216 +014845/063150, loss: 0.209376, avg_loss: 0.254161 +014850/063150, loss: 0.090203, avg_loss: 0.254106 +014855/063150, loss: 0.221246, avg_loss: 0.254065 +014860/063150, loss: 0.069436, avg_loss: 0.254010 +014865/063150, loss: 0.033942, avg_loss: 0.253952 +014870/063150, loss: 0.131283, avg_loss: 0.253924 +014875/063150, loss: 0.097068, avg_loss: 0.253883 +014880/063150, loss: 0.051916, avg_loss: 0.253822 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 14880/63150: {'accuracy': 0.8520642201834863} +014885/063150, loss: 0.060921, avg_loss: 0.253770 +014890/063150, loss: 0.022754, avg_loss: 0.253715 +014895/063150, loss: 0.016856, avg_loss: 0.253650 +014900/063150, loss: 0.169625, avg_loss: 0.253610 +014905/063150, loss: 0.086245, avg_loss: 0.253552 +014910/063150, loss: 0.038238, avg_loss: 0.253482 +014915/063150, loss: 0.013177, avg_loss: 0.253405 +014920/063150, loss: 0.256956, avg_loss: 0.253349 +014925/063150, loss: 0.032279, avg_loss: 0.253299 +014930/063150, loss: 0.087344, avg_loss: 0.253249 +014935/063150, loss: 0.010722, avg_loss: 0.253183 +014940/063150, loss: 0.052754, avg_loss: 0.253118 +014945/063150, loss: 0.006596, avg_loss: 0.253055 +014950/063150, loss: 0.127233, avg_loss: 0.252994 +014955/063150, loss: 0.063616, avg_loss: 0.252931 +014960/063150, loss: 0.227392, avg_loss: 0.252893 +014965/063150, loss: 0.062876, avg_loss: 0.252826 +014970/063150, loss: 0.187272, avg_loss: 0.252771 +014975/063150, loss: 0.181508, avg_loss: 0.252717 +014980/063150, loss: 0.264755, avg_loss: 0.252685 +014985/063150, loss: 0.067823, avg_loss: 0.252659 +014990/063150, loss: 0.117201, avg_loss: 0.252611 +014995/063150, loss: 0.067068, avg_loss: 0.252555 +015000/063150, loss: 0.101558, avg_loss: 0.252500 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15000/63150: {'accuracy': 0.8509174311926605} +015005/063150, loss: 0.057630, avg_loss: 0.252457 +015010/063150, loss: 0.037018, avg_loss: 0.252391 +015015/063150, loss: 0.283105, avg_loss: 0.252355 +015020/063150, loss: 0.119350, avg_loss: 0.252311 +015025/063150, loss: 0.099779, avg_loss: 0.252251 +015030/063150, loss: 0.039832, avg_loss: 0.252202 +015035/063150, loss: 0.086444, avg_loss: 0.252146 +015040/063150, loss: 0.149655, avg_loss: 0.252112 +015045/063150, loss: 0.079242, avg_loss: 0.252051 +015050/063150, loss: 0.082632, avg_loss: 0.251981 +015055/063150, loss: 0.166733, avg_loss: 0.251936 +015060/063150, loss: 0.041102, avg_loss: 0.251898 +015065/063150, loss: 0.432804, avg_loss: 0.251862 +015070/063150, loss: 0.082084, avg_loss: 0.251805 +015075/063150, loss: 0.020640, avg_loss: 0.251747 +015080/063150, loss: 0.072865, avg_loss: 0.251694 +015085/063150, loss: 0.018421, avg_loss: 0.251641 +015090/063150, loss: 0.220518, avg_loss: 0.251584 +015095/063150, loss: 0.052701, avg_loss: 0.251526 +015100/063150, loss: 0.059496, avg_loss: 0.251475 +015105/063150, loss: 0.108729, avg_loss: 0.251419 +015110/063150, loss: 0.098916, avg_loss: 0.251363 +015115/063150, loss: 0.046422, avg_loss: 0.251309 +015120/063150, loss: 0.173842, avg_loss: 0.251254 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15120/63150: {'accuracy': 0.8612385321100917} +015125/063150, loss: 0.036144, avg_loss: 0.251209 +015130/063150, loss: 0.028506, avg_loss: 0.251142 +015135/063150, loss: 0.092467, avg_loss: 0.251086 +015140/063150, loss: 0.049376, avg_loss: 0.251026 +015145/063150, loss: 0.157167, avg_loss: 0.250971 +015150/063150, loss: 0.107291, avg_loss: 0.250921 +015155/063150, loss: 0.097855, avg_loss: 0.250865 +015160/063150, loss: 0.015526, avg_loss: 0.250796 +015165/063150, loss: 0.049513, avg_loss: 0.250747 +015170/063150, loss: 0.102515, avg_loss: 0.250687 +015175/063150, loss: 0.187899, avg_loss: 0.250628 +015180/063150, loss: 0.026176, avg_loss: 0.250586 +015185/063150, loss: 0.060727, avg_loss: 0.250519 +015190/063150, loss: 0.031158, avg_loss: 0.250482 +015195/063150, loss: 0.142221, avg_loss: 0.250423 +015200/063150, loss: 0.045691, avg_loss: 0.250364 +015205/063150, loss: 0.093195, avg_loss: 0.250320 +015210/063150, loss: 0.106645, avg_loss: 0.250256 +015215/063150, loss: 0.260352, avg_loss: 0.250221 +015220/063150, loss: 0.086733, avg_loss: 0.250162 +015225/063150, loss: 0.155065, avg_loss: 0.250118 +015230/063150, loss: 0.143915, avg_loss: 0.250074 +015235/063150, loss: 0.037681, avg_loss: 0.250027 +015240/063150, loss: 0.139268, avg_loss: 0.249977 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15240/63150: {'accuracy': 0.8520642201834863} +015245/063150, loss: 0.171303, avg_loss: 0.249936 +015250/063150, loss: 0.131963, avg_loss: 0.249886 +015255/063150, loss: 0.017564, avg_loss: 0.249836 +015260/063150, loss: 0.053436, avg_loss: 0.249788 +015265/063150, loss: 0.015324, avg_loss: 0.249744 +015270/063150, loss: 0.031634, avg_loss: 0.249688 +015275/063150, loss: 0.163604, avg_loss: 0.249647 +015280/063150, loss: 0.028024, avg_loss: 0.249587 +015285/063150, loss: 0.024133, avg_loss: 0.249528 +015290/063150, loss: 0.286434, avg_loss: 0.249497 +015295/063150, loss: 0.024551, avg_loss: 0.249435 +015300/063150, loss: 0.064727, avg_loss: 0.249377 +015305/063150, loss: 0.022773, avg_loss: 0.249317 +015310/063150, loss: 0.192043, avg_loss: 0.249287 +015315/063150, loss: 0.125754, avg_loss: 0.249238 +015320/063150, loss: 0.172836, avg_loss: 0.249187 +015325/063150, loss: 0.039195, avg_loss: 0.249127 +015330/063150, loss: 0.116977, avg_loss: 0.249090 +015335/063150, loss: 0.026914, avg_loss: 0.249050 +015340/063150, loss: 0.076980, avg_loss: 0.249006 +015345/063150, loss: 0.167223, avg_loss: 0.248953 +015350/063150, loss: 0.043318, avg_loss: 0.248910 +015355/063150, loss: 0.211880, avg_loss: 0.248859 +015360/063150, loss: 0.237495, avg_loss: 0.248805 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15360/63150: {'accuracy': 0.856651376146789} +015365/063150, loss: 0.068923, avg_loss: 0.248754 +015370/063150, loss: 0.122501, avg_loss: 0.248703 +015375/063150, loss: 0.244610, avg_loss: 0.248658 +015380/063150, loss: 0.038137, avg_loss: 0.248599 +015385/063150, loss: 0.054783, avg_loss: 0.248533 +015390/063150, loss: 0.014205, avg_loss: 0.248504 +015395/063150, loss: 0.163701, avg_loss: 0.248451 +015400/063150, loss: 0.141392, avg_loss: 0.248409 +015405/063150, loss: 0.047014, avg_loss: 0.248352 +015410/063150, loss: 0.161129, avg_loss: 0.248311 +015415/063150, loss: 0.063588, avg_loss: 0.248256 +015420/063150, loss: 0.106174, avg_loss: 0.248216 +015425/063150, loss: 0.135584, avg_loss: 0.248159 +015430/063150, loss: 0.040877, avg_loss: 0.248122 +015435/063150, loss: 0.058661, avg_loss: 0.248074 +015440/063150, loss: 0.060923, avg_loss: 0.248022 +015445/063150, loss: 0.043286, avg_loss: 0.247969 +015450/063150, loss: 0.097451, avg_loss: 0.247926 +015455/063150, loss: 0.059941, avg_loss: 0.247883 +015460/063150, loss: 0.057785, avg_loss: 0.247840 +015465/063150, loss: 0.122885, avg_loss: 0.247789 +015470/063150, loss: 0.049087, avg_loss: 0.247754 +015475/063150, loss: 0.094039, avg_loss: 0.247696 +015480/063150, loss: 0.168765, avg_loss: 0.247647 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15480/63150: {'accuracy': 0.8704128440366973} +015485/063150, loss: 0.015912, avg_loss: 0.247586 +015490/063150, loss: 0.133411, avg_loss: 0.247534 +015495/063150, loss: 0.075367, avg_loss: 0.247479 +015500/063150, loss: 0.054369, avg_loss: 0.247413 +015505/063150, loss: 0.015628, avg_loss: 0.247351 +015510/063150, loss: 0.014699, avg_loss: 0.247302 +015515/063150, loss: 0.149320, avg_loss: 0.247253 +015520/063150, loss: 0.003490, avg_loss: 0.247195 +015525/063150, loss: 0.036033, avg_loss: 0.247159 +015530/063150, loss: 0.227210, avg_loss: 0.247136 +015535/063150, loss: 0.112899, avg_loss: 0.247093 +015540/063150, loss: 0.061346, avg_loss: 0.247038 +015545/063150, loss: 0.141176, avg_loss: 0.247006 +015550/063150, loss: 0.082260, avg_loss: 0.246953 +015555/063150, loss: 0.077427, avg_loss: 0.246921 +015560/063150, loss: 0.032544, avg_loss: 0.246872 +015565/063150, loss: 0.028731, avg_loss: 0.246816 +015570/063150, loss: 0.151045, avg_loss: 0.246767 +015575/063150, loss: 0.156013, avg_loss: 0.246724 +015580/063150, loss: 0.176002, avg_loss: 0.246689 +015585/063150, loss: 0.195044, avg_loss: 0.246633 +015590/063150, loss: 0.145153, avg_loss: 0.246580 +015595/063150, loss: 0.060876, avg_loss: 0.246530 +015600/063150, loss: 0.019595, avg_loss: 0.246485 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15600/63150: {'accuracy': 0.8520642201834863} +015605/063150, loss: 0.174659, avg_loss: 0.246442 +015610/063150, loss: 0.076890, avg_loss: 0.246397 +015615/063150, loss: 0.054905, avg_loss: 0.246371 +015620/063150, loss: 0.034342, avg_loss: 0.246326 +015625/063150, loss: 0.025884, avg_loss: 0.246270 +015630/063150, loss: 0.073457, avg_loss: 0.246205 +015635/063150, loss: 0.170781, avg_loss: 0.246159 +015640/063150, loss: 0.063921, avg_loss: 0.246114 +015645/063150, loss: 0.073859, avg_loss: 0.246069 +015650/063150, loss: 0.201619, avg_loss: 0.246014 +015655/063150, loss: 0.218607, avg_loss: 0.245992 +015660/063150, loss: 0.014419, avg_loss: 0.245934 +015665/063150, loss: 0.199271, avg_loss: 0.245897 +015670/063150, loss: 0.061761, avg_loss: 0.245848 +015675/063150, loss: 0.060662, avg_loss: 0.245784 +015680/063150, loss: 0.189873, avg_loss: 0.245758 +015685/063150, loss: 0.081219, avg_loss: 0.245718 +015690/063150, loss: 0.103686, avg_loss: 0.245674 +015695/063150, loss: 0.049083, avg_loss: 0.245640 +015700/063150, loss: 0.213367, avg_loss: 0.245612 +015705/063150, loss: 0.207482, avg_loss: 0.245575 +015710/063150, loss: 0.024889, avg_loss: 0.245526 +015715/063150, loss: 0.079323, avg_loss: 0.245485 +015720/063150, loss: 0.134524, avg_loss: 0.245455 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15720/63150: {'accuracy': 0.8371559633027523} +015725/063150, loss: 0.090505, avg_loss: 0.245400 +015730/063150, loss: 0.016397, avg_loss: 0.245347 +015735/063150, loss: 0.129737, avg_loss: 0.245297 +015740/063150, loss: 0.019403, avg_loss: 0.245242 +015745/063150, loss: 0.063688, avg_loss: 0.245209 +015750/063150, loss: 0.033200, avg_loss: 0.245145 +015755/063150, loss: 0.220870, avg_loss: 0.245094 +015760/063150, loss: 0.101973, avg_loss: 0.245033 +015765/063150, loss: 0.193970, avg_loss: 0.244987 +015770/063150, loss: 0.096565, avg_loss: 0.244955 +015775/063150, loss: 0.066594, avg_loss: 0.244907 +015780/063150, loss: 0.119268, avg_loss: 0.244866 +015785/063150, loss: 0.182717, avg_loss: 0.244846 +015790/063150, loss: 0.123303, avg_loss: 0.244793 +015795/063150, loss: 0.094873, avg_loss: 0.244766 +015800/063150, loss: 0.175980, avg_loss: 0.244722 +015805/063150, loss: 0.033686, avg_loss: 0.244671 +015810/063150, loss: 0.091493, avg_loss: 0.244618 +015815/063150, loss: 0.157901, avg_loss: 0.244599 +015820/063150, loss: 0.061673, avg_loss: 0.244553 +015825/063150, loss: 0.042599, avg_loss: 0.244499 +015830/063150, loss: 0.022082, avg_loss: 0.244455 +015835/063150, loss: 0.222017, avg_loss: 0.244407 +015840/063150, loss: 0.232396, avg_loss: 0.244364 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15840/63150: {'accuracy': 0.8451834862385321} +015845/063150, loss: 0.072962, avg_loss: 0.244309 +015850/063150, loss: 0.007790, avg_loss: 0.244267 +015855/063150, loss: 0.021392, avg_loss: 0.244220 +015860/063150, loss: 0.162051, avg_loss: 0.244171 +015865/063150, loss: 0.013669, avg_loss: 0.244129 +015870/063150, loss: 0.082533, avg_loss: 0.244099 +015875/063150, loss: 0.048399, avg_loss: 0.244044 +015880/063150, loss: 0.054102, avg_loss: 0.243990 +015885/063150, loss: 0.045106, avg_loss: 0.243946 +015890/063150, loss: 0.018356, avg_loss: 0.243893 +015895/063150, loss: 0.064778, avg_loss: 0.243834 +015900/063150, loss: 0.106186, avg_loss: 0.243805 +015905/063150, loss: 0.013312, avg_loss: 0.243752 +015910/063150, loss: 0.308522, avg_loss: 0.243715 +015915/063150, loss: 0.206676, avg_loss: 0.243669 +015920/063150, loss: 0.057731, avg_loss: 0.243610 +015925/063150, loss: 0.133215, avg_loss: 0.243565 +015930/063150, loss: 0.125428, avg_loss: 0.243518 +015935/063150, loss: 0.101146, avg_loss: 0.243471 +015940/063150, loss: 0.048975, avg_loss: 0.243412 +015945/063150, loss: 0.095781, avg_loss: 0.243362 +015950/063150, loss: 0.147959, avg_loss: 0.243320 +015955/063150, loss: 0.018992, avg_loss: 0.243263 +015960/063150, loss: 0.147412, avg_loss: 0.243239 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 15960/63150: {'accuracy': 0.8635321100917431} +015965/063150, loss: 0.046434, avg_loss: 0.243195 +015970/063150, loss: 0.103837, avg_loss: 0.243160 +015975/063150, loss: 0.036223, avg_loss: 0.243098 +015980/063150, loss: 0.155872, avg_loss: 0.243048 +015985/063150, loss: 0.048480, avg_loss: 0.243014 +015990/063150, loss: 0.230808, avg_loss: 0.242982 +015995/063150, loss: 0.068854, avg_loss: 0.242936 +016000/063150, loss: 0.163879, avg_loss: 0.242897 +016005/063150, loss: 0.081024, avg_loss: 0.242860 +016010/063150, loss: 0.050348, avg_loss: 0.242808 +016015/063150, loss: 0.036733, avg_loss: 0.242745 +016020/063150, loss: 0.127995, avg_loss: 0.242693 +016025/063150, loss: 0.139483, avg_loss: 0.242646 +016030/063150, loss: 0.214190, avg_loss: 0.242614 +016035/063150, loss: 0.008660, avg_loss: 0.242587 +016040/063150, loss: 0.023276, avg_loss: 0.242540 +016045/063150, loss: 0.079154, avg_loss: 0.242500 +016050/063150, loss: 0.095760, avg_loss: 0.242466 +016055/063150, loss: 0.128351, avg_loss: 0.242422 +016060/063150, loss: 0.078901, avg_loss: 0.242374 +016065/063150, loss: 0.220945, avg_loss: 0.242330 +016070/063150, loss: 0.124696, avg_loss: 0.242288 +016075/063150, loss: 0.105566, avg_loss: 0.242240 +016080/063150, loss: 0.061437, avg_loss: 0.242192 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 16080/63150: {'accuracy': 0.8623853211009175} +016085/063150, loss: 0.077287, avg_loss: 0.242158 +016090/063150, loss: 0.023930, avg_loss: 0.242113 +016095/063150, loss: 0.107452, avg_loss: 0.242069 +016100/063150, loss: 0.162722, avg_loss: 0.242030 +016105/063150, loss: 0.179557, avg_loss: 0.241990 +016110/063150, loss: 0.133646, avg_loss: 0.241937 +016115/063150, loss: 0.150010, avg_loss: 0.241893 +016120/063150, loss: 0.198442, avg_loss: 0.241860 +016125/063150, loss: 0.107166, avg_loss: 0.241802 +016130/063150, loss: 0.024254, avg_loss: 0.241751 +016135/063150, loss: 0.076878, avg_loss: 0.241699 +016140/063150, loss: 0.045169, avg_loss: 0.241636 +016145/063150, loss: 0.100221, avg_loss: 0.241597 +016150/063150, loss: 0.030091, avg_loss: 0.241540 +016155/063150, loss: 0.501772, avg_loss: 0.241504 +016160/063150, loss: 0.041420, avg_loss: 0.241441 +016165/063150, loss: 0.414921, avg_loss: 0.241442 +016170/063150, loss: 0.067747, avg_loss: 0.241397 +016175/063150, loss: 0.165367, avg_loss: 0.241349 +016180/063150, loss: 0.051663, avg_loss: 0.241309 +016185/063150, loss: 0.076393, avg_loss: 0.241257 +016190/063150, loss: 0.234359, avg_loss: 0.241223 +016195/063150, loss: 0.114011, avg_loss: 0.241194 +016200/063150, loss: 0.034018, avg_loss: 0.241161 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 16200/63150: {'accuracy': 0.8520642201834863} +016205/063150, loss: 0.132344, avg_loss: 0.241119 +016210/063150, loss: 0.069125, avg_loss: 0.241057 +016215/063150, loss: 0.188767, avg_loss: 0.241012 +016220/063150, loss: 0.185615, avg_loss: 0.240975 +016225/063150, loss: 0.286258, avg_loss: 0.240949 +016230/063150, loss: 0.057768, avg_loss: 0.240905 +016235/063150, loss: 0.108053, avg_loss: 0.240855 +016240/063150, loss: 0.035894, avg_loss: 0.240809 +016245/063150, loss: 0.015202, avg_loss: 0.240753 +016250/063150, loss: 0.098608, avg_loss: 0.240714 +016255/063150, loss: 0.032770, avg_loss: 0.240678 +016260/063150, loss: 0.274860, avg_loss: 0.240650 +016265/063150, loss: 0.052015, avg_loss: 0.240599 +016270/063150, loss: 0.186151, avg_loss: 0.240554 +016275/063150, loss: 0.022749, avg_loss: 0.240521 +016280/063150, loss: 0.020349, avg_loss: 0.240468 +016285/063150, loss: 0.129580, avg_loss: 0.240433 +016290/063150, loss: 0.077496, avg_loss: 0.240390 +016295/063150, loss: 0.091408, avg_loss: 0.240342 +016300/063150, loss: 0.174978, avg_loss: 0.240306 +016305/063150, loss: 0.066926, avg_loss: 0.240263 +016310/063150, loss: 0.218361, avg_loss: 0.240225 +016315/063150, loss: 0.308288, avg_loss: 0.240198 +016320/063150, loss: 0.056079, avg_loss: 0.240151 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 16320/63150: {'accuracy': 0.8497706422018348} +016325/063150, loss: 0.172810, avg_loss: 0.240114 +016330/063150, loss: 0.192324, avg_loss: 0.240070 +016335/063150, loss: 0.054192, avg_loss: 0.240039 +016340/063150, loss: 0.052309, avg_loss: 0.240005 +016345/063150, loss: 0.079147, avg_loss: 0.239961 +016350/063150, loss: 0.065793, avg_loss: 0.239917 +016355/063150, loss: 0.328105, avg_loss: 0.239876 +016360/063150, loss: 0.071816, avg_loss: 0.239824 +016365/063150, loss: 0.144563, avg_loss: 0.239776 +016370/063150, loss: 0.208988, avg_loss: 0.239746 +016375/063150, loss: 0.084231, avg_loss: 0.239720 +016380/063150, loss: 0.077513, avg_loss: 0.239671 +016385/063150, loss: 0.015028, avg_loss: 0.239622 +016390/063150, loss: 0.025971, avg_loss: 0.239568 +016395/063150, loss: 0.144280, avg_loss: 0.239542 +016400/063150, loss: 0.113108, avg_loss: 0.239502 +016405/063150, loss: 0.066161, avg_loss: 0.239463 +016410/063150, loss: 0.057078, avg_loss: 0.239421 +016415/063150, loss: 0.085710, avg_loss: 0.239390 +016420/063150, loss: 0.065105, avg_loss: 0.239345 +016425/063150, loss: 0.124231, avg_loss: 0.239296 +016430/063150, loss: 0.119014, avg_loss: 0.239256 +016435/063150, loss: 0.172879, avg_loss: 0.239214 +016440/063150, loss: 0.175219, avg_loss: 0.239174 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 16440/63150: {'accuracy': 0.8451834862385321} +016445/063150, loss: 0.162650, avg_loss: 0.239134 +016450/063150, loss: 0.055402, avg_loss: 0.239080 +016455/063150, loss: 0.058849, avg_loss: 0.239021 +016460/063150, loss: 0.103137, avg_loss: 0.238980 +016465/063150, loss: 0.167158, avg_loss: 0.238932 +016470/063150, loss: 0.088193, avg_loss: 0.238884 +016475/063150, loss: 0.038158, avg_loss: 0.238831 +016480/063150, loss: 0.075967, avg_loss: 0.238794 +016485/063150, loss: 0.150140, avg_loss: 0.238745 +016490/063150, loss: 0.029540, avg_loss: 0.238712 +016495/063150, loss: 0.100816, avg_loss: 0.238673 +016500/063150, loss: 0.060318, avg_loss: 0.238623 +016505/063150, loss: 0.008539, avg_loss: 0.238573 +016510/063150, loss: 0.041330, avg_loss: 0.238534 +016515/063150, loss: 0.139269, avg_loss: 0.238484 +016520/063150, loss: 0.056222, avg_loss: 0.238447 +016525/063150, loss: 0.083048, avg_loss: 0.238412 +016530/063150, loss: 0.031124, avg_loss: 0.238389 +016535/063150, loss: 0.071019, avg_loss: 0.238335 +016540/063150, loss: 0.063919, avg_loss: 0.238286 +016545/063150, loss: 0.111142, avg_loss: 0.238242 +016550/063150, loss: 0.157742, avg_loss: 0.238213 +016555/063150, loss: 0.040504, avg_loss: 0.238159 +016560/063150, loss: 0.042707, avg_loss: 0.238117 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 16560/63150: {'accuracy': 0.8612385321100917} +016565/063150, loss: 0.032571, avg_loss: 0.238077 +016570/063150, loss: 0.053007, avg_loss: 0.238025 +016575/063150, loss: 0.009305, avg_loss: 0.237972 +016580/063150, loss: 0.076985, avg_loss: 0.237919 +016585/063150, loss: 0.137714, avg_loss: 0.237885 +016590/063150, loss: 0.047511, avg_loss: 0.237837 +016595/063150, loss: 0.104895, avg_loss: 0.237788 +016600/063150, loss: 0.065866, avg_loss: 0.237738 +016605/063150, loss: 0.041281, avg_loss: 0.237696 +016610/063150, loss: 0.055500, avg_loss: 0.237656 +016615/063150, loss: 0.193311, avg_loss: 0.237619 +016620/063150, loss: 0.082116, avg_loss: 0.237596 +016625/063150, loss: 0.065884, avg_loss: 0.237553 +016630/063150, loss: 0.045553, avg_loss: 0.237502 +016635/063150, loss: 0.224087, avg_loss: 0.237466 +016640/063150, loss: 0.071157, avg_loss: 0.237412 +016645/063150, loss: 0.189170, avg_loss: 0.237381 +016650/063150, loss: 0.206425, avg_loss: 0.237340 +016655/063150, loss: 0.095484, avg_loss: 0.237285 +016660/063150, loss: 0.039728, avg_loss: 0.237232 +016665/063150, loss: 0.163450, avg_loss: 0.237215 +016670/063150, loss: 0.165841, avg_loss: 0.237167 +016675/063150, loss: 0.287012, avg_loss: 0.237147 +016680/063150, loss: 0.041589, avg_loss: 0.237121 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 16680/63150: {'accuracy': 0.8623853211009175} +016685/063150, loss: 0.120070, avg_loss: 0.237086 +016690/063150, loss: 0.066053, avg_loss: 0.237047 +016695/063150, loss: 0.047316, avg_loss: 0.237009 +016700/063150, loss: 0.175039, avg_loss: 0.236974 +016705/063150, loss: 0.081830, avg_loss: 0.236925 +016710/063150, loss: 0.105867, avg_loss: 0.236884 +016715/063150, loss: 0.041150, avg_loss: 0.236829 +016720/063150, loss: 0.083114, avg_loss: 0.236793 +016725/063150, loss: 0.094519, avg_loss: 0.236736 +016730/063150, loss: 0.197794, avg_loss: 0.236687 +016735/063150, loss: 0.119675, avg_loss: 0.236663 +016740/063150, loss: 0.086907, avg_loss: 0.236620 +016745/063150, loss: 0.101801, avg_loss: 0.236599 +016750/063150, loss: 0.049607, avg_loss: 0.236558 +016755/063150, loss: 0.047553, avg_loss: 0.236517 +016760/063150, loss: 0.034978, avg_loss: 0.236456 +016765/063150, loss: 0.096803, avg_loss: 0.236415 +016770/063150, loss: 0.025004, avg_loss: 0.236369 +016775/063150, loss: 0.034605, avg_loss: 0.236308 +016780/063150, loss: 0.052020, avg_loss: 0.236259 +016785/063150, loss: 0.138093, avg_loss: 0.236237 +016790/063150, loss: 0.035095, avg_loss: 0.236192 +016795/063150, loss: 0.067674, avg_loss: 0.236138 +016800/063150, loss: 0.018855, avg_loss: 0.236093 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 7, step 16800/63150: {'accuracy': 0.8635321100917431} +016805/063150, loss: 0.101116, avg_loss: 0.236049 +016810/063150, loss: 0.011860, avg_loss: 0.235997 +016815/063150, loss: 0.187719, avg_loss: 0.235959 +016820/063150, loss: 0.018296, avg_loss: 0.235914 +016825/063150, loss: 0.123737, avg_loss: 0.235868 +016830/063150, loss: 0.113800, avg_loss: 0.235823 +016835/063150, loss: 0.152372, avg_loss: 0.235793 +016840/063150, loss: 0.081752, avg_loss: 0.235744 +016845/063150, loss: 0.213118, avg_loss: 0.235697 +016850/063150, loss: 0.055062, avg_loss: 0.235662 +016855/063150, loss: 0.143505, avg_loss: 0.235613 +016860/063150, loss: 0.095485, avg_loss: 0.235561 +016865/063150, loss: 0.011055, avg_loss: 0.235497 +016870/063150, loss: 0.109862, avg_loss: 0.235449 +016875/063150, loss: 0.040288, avg_loss: 0.235409 +016880/063150, loss: 0.031741, avg_loss: 0.235349 +016885/063150, loss: 0.196666, avg_loss: 0.235325 +016890/063150, loss: 0.017752, avg_loss: 0.235273 +016895/063150, loss: 0.024362, avg_loss: 0.235235 +016900/063150, loss: 0.166875, avg_loss: 0.235179 +016905/063150, loss: 0.067633, avg_loss: 0.235126 +016910/063150, loss: 0.055818, avg_loss: 0.235073 +016915/063150, loss: 0.092095, avg_loss: 0.235043 +016920/063150, loss: 0.011565, avg_loss: 0.235000 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 16920/63150: {'accuracy': 0.856651376146789} +016925/063150, loss: 0.188910, avg_loss: 0.234976 +016930/063150, loss: 0.061645, avg_loss: 0.234923 +016935/063150, loss: 0.108689, avg_loss: 0.234871 +016940/063150, loss: 0.068740, avg_loss: 0.234831 +016945/063150, loss: 0.047377, avg_loss: 0.234789 +016950/063150, loss: 0.024611, avg_loss: 0.234745 +016955/063150, loss: 0.062378, avg_loss: 0.234688 +016960/063150, loss: 0.008458, avg_loss: 0.234633 +016965/063150, loss: 0.218376, avg_loss: 0.234589 +016970/063150, loss: 0.104255, avg_loss: 0.234544 +016975/063150, loss: 0.094962, avg_loss: 0.234487 +016980/063150, loss: 0.168265, avg_loss: 0.234442 +016985/063150, loss: 0.087028, avg_loss: 0.234403 +016990/063150, loss: 0.037372, avg_loss: 0.234347 +016995/063150, loss: 0.363057, avg_loss: 0.234322 +017000/063150, loss: 0.016488, avg_loss: 0.234272 +017005/063150, loss: 0.141457, avg_loss: 0.234226 +017010/063150, loss: 0.020359, avg_loss: 0.234170 +017015/063150, loss: 0.129698, avg_loss: 0.234128 +017020/063150, loss: 0.146840, avg_loss: 0.234079 +017025/063150, loss: 0.054228, avg_loss: 0.234049 +017030/063150, loss: 0.127275, avg_loss: 0.234005 +017035/063150, loss: 0.219676, avg_loss: 0.233957 +017040/063150, loss: 0.032730, avg_loss: 0.233926 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 17040/63150: {'accuracy': 0.856651376146789} +017045/063150, loss: 0.046308, avg_loss: 0.233872 +017050/063150, loss: 0.005736, avg_loss: 0.233816 +017055/063150, loss: 0.103439, avg_loss: 0.233773 +017060/063150, loss: 0.208360, avg_loss: 0.233723 +017065/063150, loss: 0.079755, avg_loss: 0.233669 +017070/063150, loss: 0.093970, avg_loss: 0.233631 +017075/063150, loss: 0.038068, avg_loss: 0.233581 +017080/063150, loss: 0.205549, avg_loss: 0.233533 +017085/063150, loss: 0.194604, avg_loss: 0.233495 +017090/063150, loss: 0.052513, avg_loss: 0.233450 +017095/063150, loss: 0.014904, avg_loss: 0.233404 +017100/063150, loss: 0.058633, avg_loss: 0.233358 +017105/063150, loss: 0.053776, avg_loss: 0.233308 +017110/063150, loss: 0.113098, avg_loss: 0.233268 +017115/063150, loss: 0.309650, avg_loss: 0.233234 +017120/063150, loss: 0.038255, avg_loss: 0.233180 +017125/063150, loss: 0.153401, avg_loss: 0.233134 +017130/063150, loss: 0.007677, avg_loss: 0.233083 +017135/063150, loss: 0.297544, avg_loss: 0.233046 +017140/063150, loss: 0.044526, avg_loss: 0.232996 +017145/063150, loss: 0.032363, avg_loss: 0.232953 +017150/063150, loss: 0.048358, avg_loss: 0.232899 +017155/063150, loss: 0.198430, avg_loss: 0.232874 +017160/063150, loss: 0.055898, avg_loss: 0.232837 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 17160/63150: {'accuracy': 0.8302752293577982} +017165/063150, loss: 0.098937, avg_loss: 0.232797 +017170/063150, loss: 0.105463, avg_loss: 0.232752 +017175/063150, loss: 0.059305, avg_loss: 0.232705 +017180/063150, loss: 0.036069, avg_loss: 0.232657 +017185/063150, loss: 0.059806, avg_loss: 0.232617 +017190/063150, loss: 0.027286, avg_loss: 0.232576 +017195/063150, loss: 0.046667, avg_loss: 0.232530 +017200/063150, loss: 0.167140, avg_loss: 0.232496 +017205/063150, loss: 0.047795, avg_loss: 0.232446 +017210/063150, loss: 0.020975, avg_loss: 0.232407 +017215/063150, loss: 0.067810, avg_loss: 0.232371 +017220/063150, loss: 0.131941, avg_loss: 0.232320 +017225/063150, loss: 0.067531, avg_loss: 0.232289 +017230/063150, loss: 0.054477, avg_loss: 0.232238 +017235/063150, loss: 0.126009, avg_loss: 0.232204 +017240/063150, loss: 0.237204, avg_loss: 0.232167 +017245/063150, loss: 0.053867, avg_loss: 0.232128 +017250/063150, loss: 0.108877, avg_loss: 0.232083 +017255/063150, loss: 0.009721, avg_loss: 0.232044 +017260/063150, loss: 0.186060, avg_loss: 0.232010 +017265/063150, loss: 0.069663, avg_loss: 0.231968 +017270/063150, loss: 0.029338, avg_loss: 0.231922 +017275/063150, loss: 0.109713, avg_loss: 0.231878 +017280/063150, loss: 0.102273, avg_loss: 0.231832 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 17280/63150: {'accuracy': 0.8612385321100917} +017285/063150, loss: 0.070566, avg_loss: 0.231784 +017290/063150, loss: 0.059461, avg_loss: 0.231732 +017295/063150, loss: 0.310723, avg_loss: 0.231708 +017300/063150, loss: 0.063797, avg_loss: 0.231661 +017305/063150, loss: 0.049737, avg_loss: 0.231621 +017310/063150, loss: 0.029027, avg_loss: 0.231580 +017315/063150, loss: 0.136663, avg_loss: 0.231537 +017320/063150, loss: 0.049554, avg_loss: 0.231492 +017325/063150, loss: 0.059589, avg_loss: 0.231442 +017330/063150, loss: 0.189943, avg_loss: 0.231399 +017335/063150, loss: 0.032731, avg_loss: 0.231348 +017340/063150, loss: 0.034896, avg_loss: 0.231301 +017345/063150, loss: 0.054217, avg_loss: 0.231246 +017350/063150, loss: 0.140563, avg_loss: 0.231192 +017355/063150, loss: 0.075075, avg_loss: 0.231144 +017360/063150, loss: 0.012152, avg_loss: 0.231104 +017365/063150, loss: 0.302546, avg_loss: 0.231067 +017370/063150, loss: 0.133162, avg_loss: 0.231025 +017375/063150, loss: 0.064575, avg_loss: 0.230971 +017380/063150, loss: 0.019819, avg_loss: 0.230927 +017385/063150, loss: 0.188444, avg_loss: 0.230892 +017390/063150, loss: 0.005671, avg_loss: 0.230849 +017395/063150, loss: 0.090578, avg_loss: 0.230796 +017400/063150, loss: 0.043809, avg_loss: 0.230738 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 17400/63150: {'accuracy': 0.8463302752293578} +017405/063150, loss: 0.036402, avg_loss: 0.230681 +017410/063150, loss: 0.039472, avg_loss: 0.230639 +017415/063150, loss: 0.016200, avg_loss: 0.230588 +017420/063150, loss: 0.011126, avg_loss: 0.230552 +017425/063150, loss: 0.126715, avg_loss: 0.230504 +017430/063150, loss: 0.021441, avg_loss: 0.230471 +017435/063150, loss: 0.008443, avg_loss: 0.230416 +017440/063150, loss: 0.052094, avg_loss: 0.230361 +017445/063150, loss: 0.071290, avg_loss: 0.230327 +017450/063150, loss: 0.091956, avg_loss: 0.230276 +017455/063150, loss: 0.042454, avg_loss: 0.230239 +017460/063150, loss: 0.088620, avg_loss: 0.230203 +017465/063150, loss: 0.097100, avg_loss: 0.230161 +017470/063150, loss: 0.011998, avg_loss: 0.230105 +017475/063150, loss: 0.080687, avg_loss: 0.230060 +017480/063150, loss: 0.060479, avg_loss: 0.230016 +017485/063150, loss: 0.047041, avg_loss: 0.229981 +017490/063150, loss: 0.030602, avg_loss: 0.229931 +017495/063150, loss: 0.129102, avg_loss: 0.229879 +017500/063150, loss: 0.153531, avg_loss: 0.229835 +017505/063150, loss: 0.017624, avg_loss: 0.229789 +017510/063150, loss: 0.008433, avg_loss: 0.229733 +017515/063150, loss: 0.045323, avg_loss: 0.229688 +017520/063150, loss: 0.094997, avg_loss: 0.229635 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 17520/63150: {'accuracy': 0.8589449541284404} +017525/063150, loss: 0.021621, avg_loss: 0.229577 +017530/063150, loss: 0.046823, avg_loss: 0.229536 +017535/063150, loss: 0.007758, avg_loss: 0.229485 +017540/063150, loss: 0.014523, avg_loss: 0.229438 +017545/063150, loss: 0.081847, avg_loss: 0.229401 +017550/063150, loss: 0.010575, avg_loss: 0.229349 +017555/063150, loss: 0.486461, avg_loss: 0.229325 +017560/063150, loss: 0.125393, avg_loss: 0.229293 +017565/063150, loss: 0.007944, avg_loss: 0.229237 +017570/063150, loss: 0.088168, avg_loss: 0.229187 +017575/063150, loss: 0.144809, avg_loss: 0.229134 +017580/063150, loss: 0.031677, avg_loss: 0.229085 +017585/063150, loss: 0.008890, avg_loss: 0.229036 +017590/063150, loss: 0.204599, avg_loss: 0.228995 +017595/063150, loss: 0.034883, avg_loss: 0.228966 +017600/063150, loss: 0.011582, avg_loss: 0.228910 +017605/063150, loss: 0.019860, avg_loss: 0.228869 +017610/063150, loss: 0.005591, avg_loss: 0.228827 +017615/063150, loss: 0.031591, avg_loss: 0.228786 +017620/063150, loss: 0.154887, avg_loss: 0.228752 +017625/063150, loss: 0.018517, avg_loss: 0.228698 +017630/063150, loss: 0.035602, avg_loss: 0.228645 +017635/063150, loss: 0.018666, avg_loss: 0.228612 +017640/063150, loss: 0.066284, avg_loss: 0.228564 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 17640/63150: {'accuracy': 0.8486238532110092} +017645/063150, loss: 0.072173, avg_loss: 0.228513 +017650/063150, loss: 0.007612, avg_loss: 0.228471 +017655/063150, loss: 0.010855, avg_loss: 0.228434 +017660/063150, loss: 0.033809, avg_loss: 0.228393 +017665/063150, loss: 0.230516, avg_loss: 0.228355 +017670/063150, loss: 0.035392, avg_loss: 0.228323 +017675/063150, loss: 0.038668, avg_loss: 0.228275 +017680/063150, loss: 0.006914, avg_loss: 0.228229 +017685/063150, loss: 0.028876, avg_loss: 0.228181 +017690/063150, loss: 0.035444, avg_loss: 0.228137 +017695/063150, loss: 0.027767, avg_loss: 0.228103 +017700/063150, loss: 0.075690, avg_loss: 0.228059 +017705/063150, loss: 0.045387, avg_loss: 0.228012 +017710/063150, loss: 0.007984, avg_loss: 0.227965 +017715/063150, loss: 0.035655, avg_loss: 0.227934 +017720/063150, loss: 0.017365, avg_loss: 0.227892 +017725/063150, loss: 0.162145, avg_loss: 0.227848 +017730/063150, loss: 0.043882, avg_loss: 0.227814 +017735/063150, loss: 0.038666, avg_loss: 0.227781 +017740/063150, loss: 0.208579, avg_loss: 0.227744 +017745/063150, loss: 0.092012, avg_loss: 0.227696 +017750/063150, loss: 0.047530, avg_loss: 0.227662 +017755/063150, loss: 0.009785, avg_loss: 0.227615 +017760/063150, loss: 0.028872, avg_loss: 0.227573 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 17760/63150: {'accuracy': 0.8658256880733946} +017765/063150, loss: 0.120344, avg_loss: 0.227555 +017770/063150, loss: 0.095952, avg_loss: 0.227526 +017775/063150, loss: 0.220576, avg_loss: 0.227491 +017780/063150, loss: 0.082850, avg_loss: 0.227438 +017785/063150, loss: 0.014846, avg_loss: 0.227390 +017790/063150, loss: 0.043109, avg_loss: 0.227359 +017795/063150, loss: 0.065601, avg_loss: 0.227312 +017800/063150, loss: 0.048581, avg_loss: 0.227278 +017805/063150, loss: 0.035231, avg_loss: 0.227233 +017810/063150, loss: 0.079415, avg_loss: 0.227190 +017815/063150, loss: 0.193466, avg_loss: 0.227159 +017820/063150, loss: 0.083692, avg_loss: 0.227113 +017825/063150, loss: 0.039092, avg_loss: 0.227070 +017830/063150, loss: 0.054011, avg_loss: 0.227021 +017835/063150, loss: 0.180928, avg_loss: 0.226995 +017840/063150, loss: 0.045921, avg_loss: 0.226962 +017845/063150, loss: 0.025807, avg_loss: 0.226913 +017850/063150, loss: 0.132919, avg_loss: 0.226873 +017855/063150, loss: 0.142249, avg_loss: 0.226846 +017860/063150, loss: 0.061275, avg_loss: 0.226799 +017865/063150, loss: 0.058661, avg_loss: 0.226755 +017870/063150, loss: 0.094644, avg_loss: 0.226713 +017875/063150, loss: 0.026756, avg_loss: 0.226658 +017880/063150, loss: 0.095061, avg_loss: 0.226617 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 17880/63150: {'accuracy': 0.8635321100917431} +017885/063150, loss: 0.018860, avg_loss: 0.226578 +017890/063150, loss: 0.031925, avg_loss: 0.226530 +017895/063150, loss: 0.091192, avg_loss: 0.226489 +017900/063150, loss: 0.036902, avg_loss: 0.226442 +017905/063150, loss: 0.011141, avg_loss: 0.226394 +017910/063150, loss: 0.062903, avg_loss: 0.226352 +017915/063150, loss: 0.013839, avg_loss: 0.226317 +017920/063150, loss: 0.043660, avg_loss: 0.226276 +017925/063150, loss: 0.059538, avg_loss: 0.226231 +017930/063150, loss: 0.065650, avg_loss: 0.226200 +017935/063150, loss: 0.043581, avg_loss: 0.226153 +017940/063150, loss: 0.156602, avg_loss: 0.226136 +017945/063150, loss: 0.199880, avg_loss: 0.226112 +017950/063150, loss: 0.005969, avg_loss: 0.226072 +017955/063150, loss: 0.080872, avg_loss: 0.226043 +017960/063150, loss: 0.023863, avg_loss: 0.226000 +017965/063150, loss: 0.160618, avg_loss: 0.225959 +017970/063150, loss: 0.054974, avg_loss: 0.225922 +017975/063150, loss: 0.054022, avg_loss: 0.225883 +017980/063150, loss: 0.053272, avg_loss: 0.225830 +017985/063150, loss: 0.054394, avg_loss: 0.225781 +017990/063150, loss: 0.059562, avg_loss: 0.225734 +017995/063150, loss: 0.219956, avg_loss: 0.225699 +018000/063150, loss: 0.016590, avg_loss: 0.225659 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 18000/63150: {'accuracy': 0.8532110091743119} +018005/063150, loss: 0.094877, avg_loss: 0.225623 +018010/063150, loss: 0.303773, avg_loss: 0.225609 +018015/063150, loss: 0.025608, avg_loss: 0.225554 +018020/063150, loss: 0.096185, avg_loss: 0.225516 +018025/063150, loss: 0.112359, avg_loss: 0.225472 +018030/063150, loss: 0.065774, avg_loss: 0.225431 +018035/063150, loss: 0.012045, avg_loss: 0.225398 +018040/063150, loss: 0.014176, avg_loss: 0.225351 +018045/063150, loss: 0.127408, avg_loss: 0.225311 +018050/063150, loss: 0.176268, avg_loss: 0.225264 +018055/063150, loss: 0.031399, avg_loss: 0.225236 +018060/063150, loss: 0.058895, avg_loss: 0.225218 +018065/063150, loss: 0.016002, avg_loss: 0.225165 +018070/063150, loss: 0.134147, avg_loss: 0.225126 +018075/063150, loss: 0.077181, avg_loss: 0.225091 +018080/063150, loss: 0.031730, avg_loss: 0.225055 +018085/063150, loss: 0.040803, avg_loss: 0.225009 +018090/063150, loss: 0.054315, avg_loss: 0.224966 +018095/063150, loss: 0.255076, avg_loss: 0.224930 +018100/063150, loss: 0.006734, avg_loss: 0.224881 +018105/063150, loss: 0.067851, avg_loss: 0.224855 +018110/063150, loss: 0.039820, avg_loss: 0.224828 +018115/063150, loss: 0.064140, avg_loss: 0.224776 +018120/063150, loss: 0.090054, avg_loss: 0.224735 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 18120/63150: {'accuracy': 0.8612385321100917} +018125/063150, loss: 0.068486, avg_loss: 0.224705 +018130/063150, loss: 0.012807, avg_loss: 0.224665 +018135/063150, loss: 0.061182, avg_loss: 0.224617 +018140/063150, loss: 0.036541, avg_loss: 0.224580 +018145/063150, loss: 0.085190, avg_loss: 0.224542 +018150/063150, loss: 0.145590, avg_loss: 0.224501 +018155/063150, loss: 0.006795, avg_loss: 0.224463 +018160/063150, loss: 0.066192, avg_loss: 0.224426 +018165/063150, loss: 0.042268, avg_loss: 0.224377 +018170/063150, loss: 0.025564, avg_loss: 0.224339 +018175/063150, loss: 0.013970, avg_loss: 0.224312 +018180/063150, loss: 0.011923, avg_loss: 0.224264 +018185/063150, loss: 0.307300, avg_loss: 0.224248 +018190/063150, loss: 0.141313, avg_loss: 0.224202 +018195/063150, loss: 0.255624, avg_loss: 0.224181 +018200/063150, loss: 0.021737, avg_loss: 0.224145 +018205/063150, loss: 0.090034, avg_loss: 0.224106 +018210/063150, loss: 0.066458, avg_loss: 0.224071 +018215/063150, loss: 0.210535, avg_loss: 0.224062 +018220/063150, loss: 0.100298, avg_loss: 0.224019 +018225/063150, loss: 0.099111, avg_loss: 0.223969 +018230/063150, loss: 0.095595, avg_loss: 0.223924 +018235/063150, loss: 0.115430, avg_loss: 0.223886 +018240/063150, loss: 0.014914, avg_loss: 0.223839 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 18240/63150: {'accuracy': 0.8555045871559633} +018245/063150, loss: 0.176087, avg_loss: 0.223801 +018250/063150, loss: 0.042016, avg_loss: 0.223759 +018255/063150, loss: 0.169340, avg_loss: 0.223730 +018260/063150, loss: 0.024305, avg_loss: 0.223688 +018265/063150, loss: 0.114911, avg_loss: 0.223668 +018270/063150, loss: 0.007482, avg_loss: 0.223633 +018275/063150, loss: 0.082665, avg_loss: 0.223592 +018280/063150, loss: 0.092684, avg_loss: 0.223549 +018285/063150, loss: 0.008454, avg_loss: 0.223503 +018290/063150, loss: 0.025961, avg_loss: 0.223453 +018295/063150, loss: 0.022461, avg_loss: 0.223412 +018300/063150, loss: 0.011742, avg_loss: 0.223362 +018305/063150, loss: 0.131183, avg_loss: 0.223323 +018310/063150, loss: 0.035741, avg_loss: 0.223271 +018315/063150, loss: 0.210260, avg_loss: 0.223246 +018320/063150, loss: 0.006041, avg_loss: 0.223198 +018325/063150, loss: 0.073307, avg_loss: 0.223184 +018330/063150, loss: 0.036502, avg_loss: 0.223140 +018335/063150, loss: 0.113309, avg_loss: 0.223096 +018340/063150, loss: 0.011259, avg_loss: 0.223050 +018345/063150, loss: 0.299175, avg_loss: 0.223023 +018350/063150, loss: 0.049813, avg_loss: 0.222990 +018355/063150, loss: 0.061634, avg_loss: 0.222946 +018360/063150, loss: 0.018357, avg_loss: 0.222907 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 18360/63150: {'accuracy': 0.8428899082568807} +018365/063150, loss: 0.154111, avg_loss: 0.222865 +018370/063150, loss: 0.055873, avg_loss: 0.222826 +018375/063150, loss: 0.009445, avg_loss: 0.222801 +018380/063150, loss: 0.032229, avg_loss: 0.222757 +018385/063150, loss: 0.010724, avg_loss: 0.222730 +018390/063150, loss: 0.153864, avg_loss: 0.222697 +018395/063150, loss: 0.191097, avg_loss: 0.222659 +018400/063150, loss: 0.117784, avg_loss: 0.222617 +018405/063150, loss: 0.066621, avg_loss: 0.222572 +018410/063150, loss: 0.077809, avg_loss: 0.222530 +018415/063150, loss: 0.013450, avg_loss: 0.222499 +018420/063150, loss: 0.042740, avg_loss: 0.222466 +018425/063150, loss: 0.120426, avg_loss: 0.222430 +018430/063150, loss: 0.010818, avg_loss: 0.222385 +018435/063150, loss: 0.008703, avg_loss: 0.222345 +018440/063150, loss: 0.009350, avg_loss: 0.222306 +018445/063150, loss: 0.028555, avg_loss: 0.222266 +018450/063150, loss: 0.027964, avg_loss: 0.222226 +018455/063150, loss: 0.143402, avg_loss: 0.222184 +018460/063150, loss: 0.127254, avg_loss: 0.222143 +018465/063150, loss: 0.015297, avg_loss: 0.222117 +018470/063150, loss: 0.047723, avg_loss: 0.222071 +018475/063150, loss: 0.261815, avg_loss: 0.222031 +018480/063150, loss: 0.120404, avg_loss: 0.221988 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 18480/63150: {'accuracy': 0.8646788990825688} +018485/063150, loss: 0.064984, avg_loss: 0.221949 +018490/063150, loss: 0.037511, avg_loss: 0.221905 +018495/063150, loss: 0.127570, avg_loss: 0.221880 +018500/063150, loss: 0.195143, avg_loss: 0.221853 +018505/063150, loss: 0.037831, avg_loss: 0.221837 +018510/063150, loss: 0.088682, avg_loss: 0.221794 +018515/063150, loss: 0.053058, avg_loss: 0.221757 +018520/063150, loss: 0.080938, avg_loss: 0.221723 +018525/063150, loss: 0.095276, avg_loss: 0.221691 +018530/063150, loss: 0.039497, avg_loss: 0.221661 +018535/063150, loss: 0.044286, avg_loss: 0.221622 +018540/063150, loss: 0.319078, avg_loss: 0.221604 +018545/063150, loss: 0.054725, avg_loss: 0.221559 +018550/063150, loss: 0.049340, avg_loss: 0.221528 +018555/063150, loss: 0.080291, avg_loss: 0.221502 +018560/063150, loss: 0.033925, avg_loss: 0.221470 +018565/063150, loss: 0.058828, avg_loss: 0.221427 +018570/063150, loss: 0.101471, avg_loss: 0.221403 +018575/063150, loss: 0.008715, avg_loss: 0.221360 +018580/063150, loss: 0.139146, avg_loss: 0.221325 +018585/063150, loss: 0.037854, avg_loss: 0.221296 +018590/063150, loss: 0.170593, avg_loss: 0.221258 +018595/063150, loss: 0.182651, avg_loss: 0.221229 +018600/063150, loss: 0.024977, avg_loss: 0.221188 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 18600/63150: {'accuracy': 0.8623853211009175} +018605/063150, loss: 0.171517, avg_loss: 0.221166 +018610/063150, loss: 0.054443, avg_loss: 0.221118 +018615/063150, loss: 0.126820, avg_loss: 0.221078 +018620/063150, loss: 0.187955, avg_loss: 0.221049 +018625/063150, loss: 0.014203, avg_loss: 0.221002 +018630/063150, loss: 0.015251, avg_loss: 0.220961 +018635/063150, loss: 0.058205, avg_loss: 0.220927 +018640/063150, loss: 0.052869, avg_loss: 0.220874 +018645/063150, loss: 0.008448, avg_loss: 0.220838 +018650/063150, loss: 0.008589, avg_loss: 0.220803 +018655/063150, loss: 0.007442, avg_loss: 0.220764 +018660/063150, loss: 0.068382, avg_loss: 0.220717 +018665/063150, loss: 0.037799, avg_loss: 0.220668 +018670/063150, loss: 0.089896, avg_loss: 0.220645 +018675/063150, loss: 0.008053, avg_loss: 0.220610 +018680/063150, loss: 0.032760, avg_loss: 0.220566 +018685/063150, loss: 0.055638, avg_loss: 0.220526 +018690/063150, loss: 0.072445, avg_loss: 0.220484 +018695/063150, loss: 0.100890, avg_loss: 0.220441 +018700/063150, loss: 0.041633, avg_loss: 0.220397 +018705/063150, loss: 0.208999, avg_loss: 0.220359 +018710/063150, loss: 0.119963, avg_loss: 0.220335 +018715/063150, loss: 0.035308, avg_loss: 0.220287 +018720/063150, loss: 0.405673, avg_loss: 0.220259 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 18720/63150: {'accuracy': 0.8623853211009175} +018725/063150, loss: 0.145305, avg_loss: 0.220219 +018730/063150, loss: 0.175643, avg_loss: 0.220203 +018735/063150, loss: 0.090242, avg_loss: 0.220168 +018740/063150, loss: 0.113953, avg_loss: 0.220123 +018745/063150, loss: 0.057388, avg_loss: 0.220088 +018750/063150, loss: 0.026575, avg_loss: 0.220064 +018755/063150, loss: 0.060014, avg_loss: 0.220026 +018760/063150, loss: 0.075421, avg_loss: 0.220006 +018765/063150, loss: 0.196025, avg_loss: 0.219966 +018770/063150, loss: 0.102817, avg_loss: 0.219939 +018775/063150, loss: 0.107872, avg_loss: 0.219905 +018780/063150, loss: 0.066504, avg_loss: 0.219869 +018785/063150, loss: 0.092743, avg_loss: 0.219837 +018790/063150, loss: 0.025489, avg_loss: 0.219807 +018795/063150, loss: 0.090109, avg_loss: 0.219769 +018800/063150, loss: 0.176351, avg_loss: 0.219730 +018805/063150, loss: 0.089408, avg_loss: 0.219688 +018810/063150, loss: 0.126943, avg_loss: 0.219658 +018815/063150, loss: 0.047076, avg_loss: 0.219629 +018820/063150, loss: 0.024257, avg_loss: 0.219586 +018825/063150, loss: 0.068136, avg_loss: 0.219548 +018830/063150, loss: 0.065755, avg_loss: 0.219520 +018835/063150, loss: 0.373434, avg_loss: 0.219501 +018840/063150, loss: 0.147209, avg_loss: 0.219467 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 8, step 18840/63150: {'accuracy': 0.856651376146789} +018845/063150, loss: 0.097032, avg_loss: 0.219435 +018850/063150, loss: 0.028307, avg_loss: 0.219398 +018855/063150, loss: 0.438921, avg_loss: 0.219367 +018860/063150, loss: 0.058575, avg_loss: 0.219325 +018865/063150, loss: 0.092320, avg_loss: 0.219296 +018870/063150, loss: 0.156517, avg_loss: 0.219253 +018875/063150, loss: 0.028116, avg_loss: 0.219208 +018880/063150, loss: 0.043536, avg_loss: 0.219169 +018885/063150, loss: 0.007797, avg_loss: 0.219129 +018890/063150, loss: 0.042524, avg_loss: 0.219101 +018895/063150, loss: 0.005262, avg_loss: 0.219057 +018900/063150, loss: 0.030635, avg_loss: 0.219031 +018905/063150, loss: 0.032573, avg_loss: 0.218990 +018910/063150, loss: 0.089701, avg_loss: 0.218956 +018915/063150, loss: 0.117259, avg_loss: 0.218924 +018920/063150, loss: 0.098910, avg_loss: 0.218897 +018925/063150, loss: 0.090920, avg_loss: 0.218860 +018930/063150, loss: 0.072013, avg_loss: 0.218851 +018935/063150, loss: 0.103394, avg_loss: 0.218804 +018940/063150, loss: 0.068606, avg_loss: 0.218769 +018945/063150, loss: 0.140656, avg_loss: 0.218736 +018950/063150, loss: 0.095962, avg_loss: 0.218701 +018955/063150, loss: 0.042470, avg_loss: 0.218681 +018960/063150, loss: 0.016058, avg_loss: 0.218641 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 18960/63150: {'accuracy': 0.8612385321100917} +018965/063150, loss: 0.180997, avg_loss: 0.218616 +018970/063150, loss: 0.110749, avg_loss: 0.218572 +018975/063150, loss: 0.016178, avg_loss: 0.218532 +018980/063150, loss: 0.162525, avg_loss: 0.218493 +018985/063150, loss: 0.011016, avg_loss: 0.218444 +018990/063150, loss: 0.007060, avg_loss: 0.218395 +018995/063150, loss: 0.011415, avg_loss: 0.218342 +019000/063150, loss: 0.105103, avg_loss: 0.218314 +019005/063150, loss: 0.052087, avg_loss: 0.218278 +019010/063150, loss: 0.035837, avg_loss: 0.218250 +019015/063150, loss: 0.047668, avg_loss: 0.218204 +019020/063150, loss: 0.033483, avg_loss: 0.218155 +019025/063150, loss: 0.052653, avg_loss: 0.218125 +019030/063150, loss: 0.202084, avg_loss: 0.218093 +019035/063150, loss: 0.086567, avg_loss: 0.218059 +019040/063150, loss: 0.008701, avg_loss: 0.218011 +019045/063150, loss: 0.014826, avg_loss: 0.217963 +019050/063150, loss: 0.066158, avg_loss: 0.217918 +019055/063150, loss: 0.016057, avg_loss: 0.217870 +019060/063150, loss: 0.011187, avg_loss: 0.217833 +019065/063150, loss: 0.020678, avg_loss: 0.217785 +019070/063150, loss: 0.061636, avg_loss: 0.217740 +019075/063150, loss: 0.053741, avg_loss: 0.217698 +019080/063150, loss: 0.246716, avg_loss: 0.217664 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 19080/63150: {'accuracy': 0.8635321100917431} +019085/063150, loss: 0.010740, avg_loss: 0.217613 +019090/063150, loss: 0.005055, avg_loss: 0.217567 +019095/063150, loss: 0.221914, avg_loss: 0.217527 +019100/063150, loss: 0.003849, avg_loss: 0.217489 +019105/063150, loss: 0.033573, avg_loss: 0.217461 +019110/063150, loss: 0.185240, avg_loss: 0.217420 +019115/063150, loss: 0.170535, avg_loss: 0.217393 +019120/063150, loss: 0.009032, avg_loss: 0.217352 +019125/063150, loss: 0.050983, avg_loss: 0.217316 +019130/063150, loss: 0.097995, avg_loss: 0.217286 +019135/063150, loss: 0.090675, avg_loss: 0.217243 +019140/063150, loss: 0.074113, avg_loss: 0.217200 +019145/063150, loss: 0.068241, avg_loss: 0.217154 +019150/063150, loss: 0.024249, avg_loss: 0.217109 +019155/063150, loss: 0.007722, avg_loss: 0.217062 +019160/063150, loss: 0.104711, avg_loss: 0.217027 +019165/063150, loss: 0.088703, avg_loss: 0.216993 +019170/063150, loss: 0.208635, avg_loss: 0.216966 +019175/063150, loss: 0.015174, avg_loss: 0.216932 +019180/063150, loss: 0.135024, avg_loss: 0.216895 +019185/063150, loss: 0.174911, avg_loss: 0.216869 +019190/063150, loss: 0.021792, avg_loss: 0.216825 +019195/063150, loss: 0.218855, avg_loss: 0.216787 +019200/063150, loss: 0.007629, avg_loss: 0.216750 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 19200/63150: {'accuracy': 0.8543577981651376} +019205/063150, loss: 0.024247, avg_loss: 0.216708 +019210/063150, loss: 0.057514, avg_loss: 0.216661 +019215/063150, loss: 0.030202, avg_loss: 0.216615 +019220/063150, loss: 0.062161, avg_loss: 0.216570 +019225/063150, loss: 0.087198, avg_loss: 0.216525 +019230/063150, loss: 0.087506, avg_loss: 0.216484 +019235/063150, loss: 0.013654, avg_loss: 0.216455 +019240/063150, loss: 0.066586, avg_loss: 0.216415 +019245/063150, loss: 0.022497, avg_loss: 0.216372 +019250/063150, loss: 0.025786, avg_loss: 0.216336 +019255/063150, loss: 0.047644, avg_loss: 0.216296 +019260/063150, loss: 0.124519, avg_loss: 0.216266 +019265/063150, loss: 0.048460, avg_loss: 0.216222 +019270/063150, loss: 0.328344, avg_loss: 0.216192 +019275/063150, loss: 0.051528, avg_loss: 0.216155 +019280/063150, loss: 0.003120, avg_loss: 0.216120 +019285/063150, loss: 0.351587, avg_loss: 0.216102 +019290/063150, loss: 0.138053, avg_loss: 0.216065 +019295/063150, loss: 0.107496, avg_loss: 0.216027 +019300/063150, loss: 0.206535, avg_loss: 0.215999 +019305/063150, loss: 0.099606, avg_loss: 0.215956 +019310/063150, loss: 0.086262, avg_loss: 0.215918 +019315/063150, loss: 0.119667, avg_loss: 0.215889 +019320/063150, loss: 0.108768, avg_loss: 0.215846 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 19320/63150: {'accuracy': 0.8577981651376146} +019325/063150, loss: 0.051342, avg_loss: 0.215809 +019330/063150, loss: 0.357806, avg_loss: 0.215789 +019335/063150, loss: 0.043653, avg_loss: 0.215752 +019340/063150, loss: 0.011829, avg_loss: 0.215710 +019345/063150, loss: 0.102938, avg_loss: 0.215668 +019350/063150, loss: 0.019636, avg_loss: 0.215623 +019355/063150, loss: 0.020850, avg_loss: 0.215588 +019360/063150, loss: 0.119910, avg_loss: 0.215551 +019365/063150, loss: 0.030863, avg_loss: 0.215542 +019370/063150, loss: 0.068839, avg_loss: 0.215506 +019375/063150, loss: 0.047784, avg_loss: 0.215471 +019380/063150, loss: 0.121680, avg_loss: 0.215440 +019385/063150, loss: 0.049925, avg_loss: 0.215397 +019390/063150, loss: 0.027935, avg_loss: 0.215355 +019395/063150, loss: 0.055413, avg_loss: 0.215313 +019400/063150, loss: 0.189959, avg_loss: 0.215276 +019405/063150, loss: 0.011148, avg_loss: 0.215234 +019410/063150, loss: 0.024897, avg_loss: 0.215202 +019415/063150, loss: 0.161681, avg_loss: 0.215162 +019420/063150, loss: 0.025722, avg_loss: 0.215123 +019425/063150, loss: 0.030716, avg_loss: 0.215076 +019430/063150, loss: 0.018566, avg_loss: 0.215054 +019435/063150, loss: 0.056378, avg_loss: 0.215017 +019440/063150, loss: 0.025367, avg_loss: 0.214975 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 19440/63150: {'accuracy': 0.8658256880733946} +019445/063150, loss: 0.015941, avg_loss: 0.214932 +019450/063150, loss: 0.008002, avg_loss: 0.214890 +019455/063150, loss: 0.012858, avg_loss: 0.214841 +019460/063150, loss: 0.074739, avg_loss: 0.214802 +019465/063150, loss: 0.066991, avg_loss: 0.214766 +019470/063150, loss: 0.028028, avg_loss: 0.214731 +019475/063150, loss: 0.017374, avg_loss: 0.214690 +019480/063150, loss: 0.029072, avg_loss: 0.214646 +019485/063150, loss: 0.054066, avg_loss: 0.214615 +019490/063150, loss: 0.050698, avg_loss: 0.214578 +019495/063150, loss: 0.096692, avg_loss: 0.214542 +019500/063150, loss: 0.013739, avg_loss: 0.214505 +019505/063150, loss: 0.010987, avg_loss: 0.214467 +019510/063150, loss: 0.151400, avg_loss: 0.214425 +019515/063150, loss: 0.057381, avg_loss: 0.214386 +019520/063150, loss: 0.086120, avg_loss: 0.214345 +019525/063150, loss: 0.157143, avg_loss: 0.214314 +019530/063150, loss: 0.029578, avg_loss: 0.214284 +019535/063150, loss: 0.033633, avg_loss: 0.214238 +019540/063150, loss: 0.051959, avg_loss: 0.214208 +019545/063150, loss: 0.051819, avg_loss: 0.214168 +019550/063150, loss: 0.040001, avg_loss: 0.214126 +019555/063150, loss: 0.063705, avg_loss: 0.214082 +019560/063150, loss: 0.037794, avg_loss: 0.214037 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 19560/63150: {'accuracy': 0.856651376146789} +019565/063150, loss: 0.298034, avg_loss: 0.214018 +019570/063150, loss: 0.064875, avg_loss: 0.213976 +019575/063150, loss: 0.156289, avg_loss: 0.213951 +019580/063150, loss: 0.030443, avg_loss: 0.213930 +019585/063150, loss: 0.072835, avg_loss: 0.213892 +019590/063150, loss: 0.129566, avg_loss: 0.213870 +019595/063150, loss: 0.042120, avg_loss: 0.213826 +019600/063150, loss: 0.047562, avg_loss: 0.213785 +019605/063150, loss: 0.056296, avg_loss: 0.213743 +019610/063150, loss: 0.250150, avg_loss: 0.213708 +019615/063150, loss: 0.024024, avg_loss: 0.213672 +019620/063150, loss: 0.056505, avg_loss: 0.213632 +019625/063150, loss: 0.060204, avg_loss: 0.213593 +019630/063150, loss: 0.024938, avg_loss: 0.213567 +019635/063150, loss: 0.037644, avg_loss: 0.213521 +019640/063150, loss: 0.007446, avg_loss: 0.213477 +019645/063150, loss: 0.115919, avg_loss: 0.213452 +019650/063150, loss: 0.072759, avg_loss: 0.213418 +019655/063150, loss: 0.066302, avg_loss: 0.213387 +019660/063150, loss: 0.182726, avg_loss: 0.213360 +019665/063150, loss: 0.091197, avg_loss: 0.213334 +019670/063150, loss: 0.115154, avg_loss: 0.213298 +019675/063150, loss: 0.046497, avg_loss: 0.213262 +019680/063150, loss: 0.030155, avg_loss: 0.213214 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 19680/63150: {'accuracy': 0.8658256880733946} +019685/063150, loss: 0.183271, avg_loss: 0.213187 +019690/063150, loss: 0.102782, avg_loss: 0.213152 +019695/063150, loss: 0.026408, avg_loss: 0.213119 +019700/063150, loss: 0.010477, avg_loss: 0.213079 +019705/063150, loss: 0.253706, avg_loss: 0.213050 +019710/063150, loss: 0.129237, avg_loss: 0.213017 +019715/063150, loss: 0.235604, avg_loss: 0.212992 +019720/063150, loss: 0.040719, avg_loss: 0.212958 +019725/063150, loss: 0.035795, avg_loss: 0.212912 +019730/063150, loss: 0.265109, avg_loss: 0.212880 +019735/063150, loss: 0.231487, avg_loss: 0.212850 +019740/063150, loss: 0.042533, avg_loss: 0.212821 +019745/063150, loss: 0.121317, avg_loss: 0.212788 +019750/063150, loss: 0.087611, avg_loss: 0.212752 +019755/063150, loss: 0.041269, avg_loss: 0.212721 +019760/063150, loss: 0.091146, avg_loss: 0.212687 +019765/063150, loss: 0.013834, avg_loss: 0.212648 +019770/063150, loss: 0.072278, avg_loss: 0.212613 +019775/063150, loss: 0.042760, avg_loss: 0.212573 +019780/063150, loss: 0.041594, avg_loss: 0.212535 +019785/063150, loss: 0.054135, avg_loss: 0.212491 +019790/063150, loss: 0.141927, avg_loss: 0.212452 +019795/063150, loss: 0.142629, avg_loss: 0.212429 +019800/063150, loss: 0.073915, avg_loss: 0.212394 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 19800/63150: {'accuracy': 0.8635321100917431} +019805/063150, loss: 0.021128, avg_loss: 0.212349 +019810/063150, loss: 0.021840, avg_loss: 0.212310 +019815/063150, loss: 0.017284, avg_loss: 0.212263 +019820/063150, loss: 0.325577, avg_loss: 0.212247 +019825/063150, loss: 0.037908, avg_loss: 0.212229 +019830/063150, loss: 0.146878, avg_loss: 0.212198 +019835/063150, loss: 0.114555, avg_loss: 0.212163 +019840/063150, loss: 0.032154, avg_loss: 0.212123 +019845/063150, loss: 0.108952, avg_loss: 0.212094 +019850/063150, loss: 0.133731, avg_loss: 0.212076 +019855/063150, loss: 0.092918, avg_loss: 0.212046 +019860/063150, loss: 0.140383, avg_loss: 0.212013 +019865/063150, loss: 0.045069, avg_loss: 0.211972 +019870/063150, loss: 0.058704, avg_loss: 0.211937 +019875/063150, loss: 0.140951, avg_loss: 0.211909 +019880/063150, loss: 0.054925, avg_loss: 0.211866 +019885/063150, loss: 0.076593, avg_loss: 0.211827 +019890/063150, loss: 0.056155, avg_loss: 0.211784 +019895/063150, loss: 0.155074, avg_loss: 0.211750 +019900/063150, loss: 0.005892, avg_loss: 0.211707 +019905/063150, loss: 0.017230, avg_loss: 0.211679 +019910/063150, loss: 0.072572, avg_loss: 0.211651 +019915/063150, loss: 0.028444, avg_loss: 0.211618 +019920/063150, loss: 0.189735, avg_loss: 0.211608 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 19920/63150: {'accuracy': 0.8646788990825688} +019925/063150, loss: 0.160397, avg_loss: 0.211577 +019930/063150, loss: 0.043215, avg_loss: 0.211542 +019935/063150, loss: 0.234430, avg_loss: 0.211510 +019940/063150, loss: 0.007714, avg_loss: 0.211476 +019945/063150, loss: 0.015270, avg_loss: 0.211437 +019950/063150, loss: 0.031907, avg_loss: 0.211408 +019955/063150, loss: 0.031654, avg_loss: 0.211374 +019960/063150, loss: 0.019460, avg_loss: 0.211337 +019965/063150, loss: 0.044637, avg_loss: 0.211300 +019970/063150, loss: 0.047601, avg_loss: 0.211265 +019975/063150, loss: 0.050534, avg_loss: 0.211229 +019980/063150, loss: 0.084906, avg_loss: 0.211192 +019985/063150, loss: 0.112903, avg_loss: 0.211162 +019990/063150, loss: 0.109337, avg_loss: 0.211129 +019995/063150, loss: 0.056791, avg_loss: 0.211088 +020000/063150, loss: 0.097843, avg_loss: 0.211049 +020005/063150, loss: 0.014693, avg_loss: 0.211007 +020010/063150, loss: 0.125128, avg_loss: 0.210978 +020015/063150, loss: 0.054642, avg_loss: 0.210954 +020020/063150, loss: 0.065417, avg_loss: 0.210917 +020025/063150, loss: 0.152131, avg_loss: 0.210880 +020030/063150, loss: 0.009045, avg_loss: 0.210848 +020035/063150, loss: 0.036245, avg_loss: 0.210813 +020040/063150, loss: 0.159550, avg_loss: 0.210781 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 20040/63150: {'accuracy': 0.8681192660550459} +020045/063150, loss: 0.007589, avg_loss: 0.210743 +020050/063150, loss: 0.033831, avg_loss: 0.210703 +020055/063150, loss: 0.031441, avg_loss: 0.210664 +020060/063150, loss: 0.124152, avg_loss: 0.210644 +020065/063150, loss: 0.135039, avg_loss: 0.210616 +020070/063150, loss: 0.014693, avg_loss: 0.210569 +020075/063150, loss: 0.042115, avg_loss: 0.210535 +020080/063150, loss: 0.033478, avg_loss: 0.210489 +020085/063150, loss: 0.070043, avg_loss: 0.210453 +020090/063150, loss: 0.016736, avg_loss: 0.210411 +020095/063150, loss: 0.030043, avg_loss: 0.210380 +020100/063150, loss: 0.199432, avg_loss: 0.210349 +020105/063150, loss: 0.046917, avg_loss: 0.210305 +020110/063150, loss: 0.063614, avg_loss: 0.210278 +020115/063150, loss: 0.005540, avg_loss: 0.210244 +020120/063150, loss: 0.022867, avg_loss: 0.210204 +020125/063150, loss: 0.036301, avg_loss: 0.210160 +020130/063150, loss: 0.066623, avg_loss: 0.210116 +020135/063150, loss: 0.023106, avg_loss: 0.210086 +020140/063150, loss: 0.005517, avg_loss: 0.210042 +020145/063150, loss: 0.065499, avg_loss: 0.210011 +020150/063150, loss: 0.058279, avg_loss: 0.209995 +020155/063150, loss: 0.040224, avg_loss: 0.209956 +020160/063150, loss: 0.047493, avg_loss: 0.209920 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 20160/63150: {'accuracy': 0.8612385321100917} +020165/063150, loss: 0.031267, avg_loss: 0.209879 +020170/063150, loss: 0.040430, avg_loss: 0.209841 +020175/063150, loss: 0.076775, avg_loss: 0.209808 +020180/063150, loss: 0.024195, avg_loss: 0.209793 +020185/063150, loss: 0.066413, avg_loss: 0.209762 +020190/063150, loss: 0.022350, avg_loss: 0.209718 +020195/063150, loss: 0.021636, avg_loss: 0.209673 +020200/063150, loss: 0.115731, avg_loss: 0.209639 +020205/063150, loss: 0.048761, avg_loss: 0.209599 +020210/063150, loss: 0.010425, avg_loss: 0.209560 +020215/063150, loss: 0.040928, avg_loss: 0.209522 +020220/063150, loss: 0.052633, avg_loss: 0.209495 +020225/063150, loss: 0.081250, avg_loss: 0.209465 +020230/063150, loss: 0.338672, avg_loss: 0.209455 +020235/063150, loss: 0.078776, avg_loss: 0.209433 +020240/063150, loss: 0.045523, avg_loss: 0.209403 +020245/063150, loss: 0.027094, avg_loss: 0.209374 +020250/063150, loss: 0.126051, avg_loss: 0.209339 +020255/063150, loss: 0.062450, avg_loss: 0.209298 +020260/063150, loss: 0.080874, avg_loss: 0.209260 +020265/063150, loss: 0.103944, avg_loss: 0.209225 +020270/063150, loss: 0.066819, avg_loss: 0.209198 +020275/063150, loss: 0.009557, avg_loss: 0.209163 +020280/063150, loss: 0.097118, avg_loss: 0.209123 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 20280/63150: {'accuracy': 0.8692660550458715} +020285/063150, loss: 0.027364, avg_loss: 0.209079 +020290/063150, loss: 0.127520, avg_loss: 0.209040 +020295/063150, loss: 0.040172, avg_loss: 0.209002 +020300/063150, loss: 0.122501, avg_loss: 0.208981 +020305/063150, loss: 0.031599, avg_loss: 0.208946 +020310/063150, loss: 0.029067, avg_loss: 0.208909 +020315/063150, loss: 0.012607, avg_loss: 0.208877 +020320/063150, loss: 0.011562, avg_loss: 0.208835 +020325/063150, loss: 0.004901, avg_loss: 0.208791 +020330/063150, loss: 0.064952, avg_loss: 0.208749 +020335/063150, loss: 0.081793, avg_loss: 0.208721 +020340/063150, loss: 0.016810, avg_loss: 0.208708 +020345/063150, loss: 0.162338, avg_loss: 0.208679 +020350/063150, loss: 0.003809, avg_loss: 0.208639 +020355/063150, loss: 0.049692, avg_loss: 0.208627 +020360/063150, loss: 0.086006, avg_loss: 0.208597 +020365/063150, loss: 0.141004, avg_loss: 0.208573 +020370/063150, loss: 0.091642, avg_loss: 0.208553 +020375/063150, loss: 0.030129, avg_loss: 0.208521 +020380/063150, loss: 0.035328, avg_loss: 0.208496 +020385/063150, loss: 0.040126, avg_loss: 0.208461 +020390/063150, loss: 0.024271, avg_loss: 0.208452 +020395/063150, loss: 0.073741, avg_loss: 0.208416 +020400/063150, loss: 0.074133, avg_loss: 0.208379 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 20400/63150: {'accuracy': 0.8589449541284404} +020405/063150, loss: 0.110410, avg_loss: 0.208342 +020410/063150, loss: 0.030092, avg_loss: 0.208310 +020415/063150, loss: 0.106829, avg_loss: 0.208281 +020420/063150, loss: 0.217506, avg_loss: 0.208253 +020425/063150, loss: 0.018779, avg_loss: 0.208220 +020430/063150, loss: 0.042773, avg_loss: 0.208185 +020435/063150, loss: 0.120134, avg_loss: 0.208146 +020440/063150, loss: 0.054610, avg_loss: 0.208113 +020445/063150, loss: 0.072023, avg_loss: 0.208093 +020450/063150, loss: 0.157310, avg_loss: 0.208068 +020455/063150, loss: 0.012583, avg_loss: 0.208043 +020460/063150, loss: 0.153856, avg_loss: 0.208014 +020465/063150, loss: 0.015902, avg_loss: 0.207971 +020470/063150, loss: 0.044655, avg_loss: 0.207950 +020475/063150, loss: 0.040242, avg_loss: 0.207916 +020480/063150, loss: 0.025547, avg_loss: 0.207883 +020485/063150, loss: 0.118910, avg_loss: 0.207844 +020490/063150, loss: 0.024883, avg_loss: 0.207809 +020495/063150, loss: 0.083840, avg_loss: 0.207779 +020500/063150, loss: 0.041461, avg_loss: 0.207740 +020505/063150, loss: 0.070068, avg_loss: 0.207715 +020510/063150, loss: 0.018793, avg_loss: 0.207681 +020515/063150, loss: 0.037700, avg_loss: 0.207642 +020520/063150, loss: 0.029463, avg_loss: 0.207601 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 20520/63150: {'accuracy': 0.8635321100917431} +020525/063150, loss: 0.030204, avg_loss: 0.207559 +020530/063150, loss: 0.166353, avg_loss: 0.207527 +020535/063150, loss: 0.110615, avg_loss: 0.207491 +020540/063150, loss: 0.126063, avg_loss: 0.207449 +020545/063150, loss: 0.040499, avg_loss: 0.207408 +020550/063150, loss: 0.028593, avg_loss: 0.207372 +020555/063150, loss: 0.117682, avg_loss: 0.207335 +020560/063150, loss: 0.006620, avg_loss: 0.207307 +020565/063150, loss: 0.013388, avg_loss: 0.207292 +020570/063150, loss: 0.018150, avg_loss: 0.207269 +020575/063150, loss: 0.231136, avg_loss: 0.207248 +020580/063150, loss: 0.114148, avg_loss: 0.207226 +020585/063150, loss: 0.061861, avg_loss: 0.207199 +020590/063150, loss: 0.418328, avg_loss: 0.207176 +020595/063150, loss: 0.213805, avg_loss: 0.207148 +020600/063150, loss: 0.047238, avg_loss: 0.207116 +020605/063150, loss: 0.006294, avg_loss: 0.207074 +020610/063150, loss: 0.105144, avg_loss: 0.207043 +020615/063150, loss: 0.024012, avg_loss: 0.207007 +020620/063150, loss: 0.160236, avg_loss: 0.206989 +020625/063150, loss: 0.066625, avg_loss: 0.206961 +020630/063150, loss: 0.039587, avg_loss: 0.206926 +020635/063150, loss: 0.022547, avg_loss: 0.206884 +020640/063150, loss: 0.135983, avg_loss: 0.206853 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 20640/63150: {'accuracy': 0.8555045871559633} +020645/063150, loss: 0.028459, avg_loss: 0.206816 +020650/063150, loss: 0.067286, avg_loss: 0.206778 +020655/063150, loss: 0.027723, avg_loss: 0.206747 +020660/063150, loss: 0.213605, avg_loss: 0.206717 +020665/063150, loss: 0.110135, avg_loss: 0.206678 +020670/063150, loss: 0.006411, avg_loss: 0.206642 +020675/063150, loss: 0.008009, avg_loss: 0.206606 +020680/063150, loss: 0.084562, avg_loss: 0.206583 +020685/063150, loss: 0.032616, avg_loss: 0.206544 +020690/063150, loss: 0.039479, avg_loss: 0.206503 +020695/063150, loss: 0.017251, avg_loss: 0.206466 +020700/063150, loss: 0.048638, avg_loss: 0.206434 +020705/063150, loss: 0.127265, avg_loss: 0.206393 +020710/063150, loss: 0.104122, avg_loss: 0.206359 +020715/063150, loss: 0.079874, avg_loss: 0.206345 +020720/063150, loss: 0.075914, avg_loss: 0.206330 +020725/063150, loss: 0.159761, avg_loss: 0.206297 +020730/063150, loss: 0.048971, avg_loss: 0.206260 +020735/063150, loss: 0.135927, avg_loss: 0.206221 +020740/063150, loss: 0.086144, avg_loss: 0.206191 +020745/063150, loss: 0.017295, avg_loss: 0.206155 +020750/063150, loss: 0.162105, avg_loss: 0.206127 +020755/063150, loss: 0.065647, avg_loss: 0.206094 +020760/063150, loss: 0.146410, avg_loss: 0.206078 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 20760/63150: {'accuracy': 0.8497706422018348} +020765/063150, loss: 0.083786, avg_loss: 0.206048 +020770/063150, loss: 0.109062, avg_loss: 0.206020 +020775/063150, loss: 0.027642, avg_loss: 0.205990 +020780/063150, loss: 0.144391, avg_loss: 0.205973 +020785/063150, loss: 0.086016, avg_loss: 0.205945 +020790/063150, loss: 0.050243, avg_loss: 0.205909 +020795/063150, loss: 0.010270, avg_loss: 0.205887 +020800/063150, loss: 0.113305, avg_loss: 0.205861 +020805/063150, loss: 0.174551, avg_loss: 0.205835 +020810/063150, loss: 0.031637, avg_loss: 0.205802 +020815/063150, loss: 0.196363, avg_loss: 0.205774 +020820/063150, loss: 0.073117, avg_loss: 0.205734 +020825/063150, loss: 0.021083, avg_loss: 0.205694 +020830/063150, loss: 0.005502, avg_loss: 0.205653 +020835/063150, loss: 0.092456, avg_loss: 0.205619 +020840/063150, loss: 0.011423, avg_loss: 0.205586 +020845/063150, loss: 0.271513, avg_loss: 0.205559 +020850/063150, loss: 0.011996, avg_loss: 0.205518 +020855/063150, loss: 0.004455, avg_loss: 0.205474 +020860/063150, loss: 0.148890, avg_loss: 0.205445 +020865/063150, loss: 0.009384, avg_loss: 0.205417 +020870/063150, loss: 0.088145, avg_loss: 0.205381 +020875/063150, loss: 0.022598, avg_loss: 0.205343 +020880/063150, loss: 0.110031, avg_loss: 0.205319 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 20880/63150: {'accuracy': 0.8394495412844036} +020885/063150, loss: 0.051499, avg_loss: 0.205279 +020890/063150, loss: 0.077464, avg_loss: 0.205264 +020895/063150, loss: 0.034167, avg_loss: 0.205230 +020900/063150, loss: 0.039848, avg_loss: 0.205191 +020905/063150, loss: 0.110465, avg_loss: 0.205173 +020910/063150, loss: 0.175760, avg_loss: 0.205153 +020915/063150, loss: 0.022009, avg_loss: 0.205116 +020920/063150, loss: 0.011672, avg_loss: 0.205080 +020925/063150, loss: 0.247682, avg_loss: 0.205055 +020930/063150, loss: 0.263460, avg_loss: 0.205029 +020935/063150, loss: 0.041382, avg_loss: 0.205001 +020940/063150, loss: 0.053091, avg_loss: 0.204971 +020945/063150, loss: 0.052148, avg_loss: 0.204940 +020950/063150, loss: 0.081791, avg_loss: 0.204908 +020955/063150, loss: 0.046655, avg_loss: 0.204876 +020960/063150, loss: 0.056876, avg_loss: 0.204841 +020965/063150, loss: 0.069362, avg_loss: 0.204815 +020970/063150, loss: 0.188785, avg_loss: 0.204796 +020975/063150, loss: 0.045364, avg_loss: 0.204774 +020980/063150, loss: 0.027498, avg_loss: 0.204746 +020985/063150, loss: 0.034381, avg_loss: 0.204714 +020990/063150, loss: 0.017459, avg_loss: 0.204683 +020995/063150, loss: 0.065009, avg_loss: 0.204648 +021000/063150, loss: 0.075192, avg_loss: 0.204622 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 9, step 21000/63150: {'accuracy': 0.8577981651376146} +021005/063150, loss: 0.027160, avg_loss: 0.204582 +021010/063150, loss: 0.088042, avg_loss: 0.204558 +021015/063150, loss: 0.100500, avg_loss: 0.204535 +021020/063150, loss: 0.032092, avg_loss: 0.204503 +021025/063150, loss: 0.048235, avg_loss: 0.204488 +021030/063150, loss: 0.013025, avg_loss: 0.204458 +021035/063150, loss: 0.213528, avg_loss: 0.204433 +021040/063150, loss: 0.222350, avg_loss: 0.204412 +021045/063150, loss: 0.088415, avg_loss: 0.204384 +021050/063150, loss: 0.031291, avg_loss: 0.204351 +021055/063150, loss: 0.043558, avg_loss: 0.204316 +021060/063150, loss: 0.012827, avg_loss: 0.204283 +021065/063150, loss: 0.033970, avg_loss: 0.204245 +021070/063150, loss: 0.066525, avg_loss: 0.204211 +021075/063150, loss: 0.031503, avg_loss: 0.204177 +021080/063150, loss: 0.165437, avg_loss: 0.204150 +021085/063150, loss: 0.033988, avg_loss: 0.204110 +021090/063150, loss: 0.303216, avg_loss: 0.204085 +021095/063150, loss: 0.079413, avg_loss: 0.204045 +021100/063150, loss: 0.012632, avg_loss: 0.204009 +021105/063150, loss: 0.024073, avg_loss: 0.203974 +021110/063150, loss: 0.017585, avg_loss: 0.203935 +021115/063150, loss: 0.076978, avg_loss: 0.203906 +021120/063150, loss: 0.099934, avg_loss: 0.203868 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 21120/63150: {'accuracy': 0.8669724770642202} +021125/063150, loss: 0.061035, avg_loss: 0.203834 +021130/063150, loss: 0.037070, avg_loss: 0.203794 +021135/063150, loss: 0.050149, avg_loss: 0.203773 +021140/063150, loss: 0.032280, avg_loss: 0.203737 +021145/063150, loss: 0.014559, avg_loss: 0.203695 +021150/063150, loss: 0.097263, avg_loss: 0.203662 +021155/063150, loss: 0.011140, avg_loss: 0.203622 +021160/063150, loss: 0.019545, avg_loss: 0.203591 +021165/063150, loss: 0.051003, avg_loss: 0.203561 +021170/063150, loss: 0.056874, avg_loss: 0.203532 +021175/063150, loss: 0.041219, avg_loss: 0.203489 +021180/063150, loss: 0.027904, avg_loss: 0.203461 +021185/063150, loss: 0.067805, avg_loss: 0.203434 +021190/063150, loss: 0.018498, avg_loss: 0.203395 +021195/063150, loss: 0.079323, avg_loss: 0.203357 +021200/063150, loss: 0.151178, avg_loss: 0.203321 +021205/063150, loss: 0.007469, avg_loss: 0.203283 +021210/063150, loss: 0.026797, avg_loss: 0.203243 +021215/063150, loss: 0.005706, avg_loss: 0.203207 +021220/063150, loss: 0.009869, avg_loss: 0.203168 +021225/063150, loss: 0.003080, avg_loss: 0.203137 +021230/063150, loss: 0.022177, avg_loss: 0.203103 +021235/063150, loss: 0.082857, avg_loss: 0.203067 +021240/063150, loss: 0.012739, avg_loss: 0.203030 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 21240/63150: {'accuracy': 0.8635321100917431} +021245/063150, loss: 0.058605, avg_loss: 0.202996 +021250/063150, loss: 0.162820, avg_loss: 0.202972 +021255/063150, loss: 0.058399, avg_loss: 0.202932 +021260/063150, loss: 0.006702, avg_loss: 0.202891 +021265/063150, loss: 0.039642, avg_loss: 0.202850 +021270/063150, loss: 0.006989, avg_loss: 0.202817 +021275/063150, loss: 0.116692, avg_loss: 0.202783 +021280/063150, loss: 0.017230, avg_loss: 0.202748 +021285/063150, loss: 0.039603, avg_loss: 0.202711 +021290/063150, loss: 0.074391, avg_loss: 0.202681 +021295/063150, loss: 0.027657, avg_loss: 0.202656 +021300/063150, loss: 0.037208, avg_loss: 0.202621 +021305/063150, loss: 0.041017, avg_loss: 0.202589 +021310/063150, loss: 0.013274, avg_loss: 0.202555 +021315/063150, loss: 0.013868, avg_loss: 0.202525 +021320/063150, loss: 0.038810, avg_loss: 0.202491 +021325/063150, loss: 0.111779, avg_loss: 0.202460 +021330/063150, loss: 0.036742, avg_loss: 0.202423 +021335/063150, loss: 0.061389, avg_loss: 0.202385 +021340/063150, loss: 0.018781, avg_loss: 0.202356 +021345/063150, loss: 0.028492, avg_loss: 0.202324 +021350/063150, loss: 0.126661, avg_loss: 0.202286 +021355/063150, loss: 0.017322, avg_loss: 0.202252 +021360/063150, loss: 0.042239, avg_loss: 0.202213 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 21360/63150: {'accuracy': 0.8727064220183486} +021365/063150, loss: 0.089343, avg_loss: 0.202183 +021370/063150, loss: 0.038140, avg_loss: 0.202141 +021375/063150, loss: 0.158543, avg_loss: 0.202108 +021380/063150, loss: 0.087458, avg_loss: 0.202069 +021385/063150, loss: 0.020430, avg_loss: 0.202036 +021390/063150, loss: 0.003075, avg_loss: 0.202003 +021395/063150, loss: 0.010664, avg_loss: 0.201972 +021400/063150, loss: 0.046870, avg_loss: 0.201934 +021405/063150, loss: 0.063215, avg_loss: 0.201900 +021410/063150, loss: 0.114710, avg_loss: 0.201866 +021415/063150, loss: 0.059310, avg_loss: 0.201829 +021420/063150, loss: 0.068002, avg_loss: 0.201800 +021425/063150, loss: 0.023043, avg_loss: 0.201766 +021430/063150, loss: 0.053517, avg_loss: 0.201726 +021435/063150, loss: 0.092121, avg_loss: 0.201694 +021440/063150, loss: 0.103308, avg_loss: 0.201659 +021445/063150, loss: 0.007302, avg_loss: 0.201614 +021450/063150, loss: 0.011896, avg_loss: 0.201578 +021455/063150, loss: 0.048774, avg_loss: 0.201550 +021460/063150, loss: 0.175042, avg_loss: 0.201519 +021465/063150, loss: 0.163047, avg_loss: 0.201495 +021470/063150, loss: 0.011442, avg_loss: 0.201454 +021475/063150, loss: 0.031252, avg_loss: 0.201416 +021480/063150, loss: 0.027138, avg_loss: 0.201377 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 21480/63150: {'accuracy': 0.8520642201834863} +021485/063150, loss: 0.007853, avg_loss: 0.201339 +021490/063150, loss: 0.020112, avg_loss: 0.201310 +021495/063150, loss: 0.004416, avg_loss: 0.201280 +021500/063150, loss: 0.018525, avg_loss: 0.201253 +021505/063150, loss: 0.048418, avg_loss: 0.201214 +021510/063150, loss: 0.059886, avg_loss: 0.201189 +021515/063150, loss: 0.042302, avg_loss: 0.201158 +021520/063150, loss: 0.066871, avg_loss: 0.201120 +021525/063150, loss: 0.009210, avg_loss: 0.201092 +021530/063150, loss: 0.027710, avg_loss: 0.201073 +021535/063150, loss: 0.097717, avg_loss: 0.201045 +021540/063150, loss: 0.014640, avg_loss: 0.201013 +021545/063150, loss: 0.053703, avg_loss: 0.200974 +021550/063150, loss: 0.025651, avg_loss: 0.200962 +021555/063150, loss: 0.017275, avg_loss: 0.200925 +021560/063150, loss: 0.046003, avg_loss: 0.200897 +021565/063150, loss: 0.063088, avg_loss: 0.200864 +021570/063150, loss: 0.159226, avg_loss: 0.200841 +021575/063150, loss: 0.075134, avg_loss: 0.200816 +021580/063150, loss: 0.056126, avg_loss: 0.200776 +021585/063150, loss: 0.125232, avg_loss: 0.200755 +021590/063150, loss: 0.111993, avg_loss: 0.200732 +021595/063150, loss: 0.049268, avg_loss: 0.200702 +021600/063150, loss: 0.087106, avg_loss: 0.200666 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 21600/63150: {'accuracy': 0.8405963302752294} +021605/063150, loss: 0.025933, avg_loss: 0.200630 +021610/063150, loss: 0.017660, avg_loss: 0.200594 +021615/063150, loss: 0.086530, avg_loss: 0.200573 +021620/063150, loss: 0.012778, avg_loss: 0.200533 +021625/063150, loss: 0.144414, avg_loss: 0.200503 +021630/063150, loss: 0.095246, avg_loss: 0.200467 +021635/063150, loss: 0.024590, avg_loss: 0.200450 +021640/063150, loss: 0.089856, avg_loss: 0.200423 +021645/063150, loss: 0.179813, avg_loss: 0.200405 +021650/063150, loss: 0.021562, avg_loss: 0.200374 +021655/063150, loss: 0.025524, avg_loss: 0.200337 +021660/063150, loss: 0.047650, avg_loss: 0.200305 +021665/063150, loss: 0.237280, avg_loss: 0.200290 +021670/063150, loss: 0.064881, avg_loss: 0.200257 +021675/063150, loss: 0.027729, avg_loss: 0.200226 +021680/063150, loss: 0.110178, avg_loss: 0.200190 +021685/063150, loss: 0.025863, avg_loss: 0.200150 +021690/063150, loss: 0.048851, avg_loss: 0.200113 +021695/063150, loss: 0.004236, avg_loss: 0.200074 +021700/063150, loss: 0.123597, avg_loss: 0.200040 +021705/063150, loss: 0.042912, avg_loss: 0.200006 +021710/063150, loss: 0.009431, avg_loss: 0.199970 +021715/063150, loss: 0.144364, avg_loss: 0.199943 +021720/063150, loss: 0.013373, avg_loss: 0.199920 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 21720/63150: {'accuracy': 0.8600917431192661} +021725/063150, loss: 0.084313, avg_loss: 0.199884 +021730/063150, loss: 0.028954, avg_loss: 0.199851 +021735/063150, loss: 0.176788, avg_loss: 0.199826 +021740/063150, loss: 0.005431, avg_loss: 0.199790 +021745/063150, loss: 0.009105, avg_loss: 0.199753 +021750/063150, loss: 0.049044, avg_loss: 0.199724 +021755/063150, loss: 0.061687, avg_loss: 0.199690 +021760/063150, loss: 0.019765, avg_loss: 0.199655 +021765/063150, loss: 0.016331, avg_loss: 0.199621 +021770/063150, loss: 0.083436, avg_loss: 0.199584 +021775/063150, loss: 0.106852, avg_loss: 0.199558 +021780/063150, loss: 0.090581, avg_loss: 0.199524 +021785/063150, loss: 0.111586, avg_loss: 0.199493 +021790/063150, loss: 0.031610, avg_loss: 0.199455 +021795/063150, loss: 0.009092, avg_loss: 0.199415 +021800/063150, loss: 0.042358, avg_loss: 0.199378 +021805/063150, loss: 0.010819, avg_loss: 0.199349 +021810/063150, loss: 0.107072, avg_loss: 0.199320 +021815/063150, loss: 0.055009, avg_loss: 0.199300 +021820/063150, loss: 0.190386, avg_loss: 0.199277 +021825/063150, loss: 0.012031, avg_loss: 0.199242 +021830/063150, loss: 0.032310, avg_loss: 0.199204 +021835/063150, loss: 0.058814, avg_loss: 0.199174 +021840/063150, loss: 0.070953, avg_loss: 0.199152 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 21840/63150: {'accuracy': 0.8543577981651376} +021845/063150, loss: 0.013568, avg_loss: 0.199115 +021850/063150, loss: 0.029372, avg_loss: 0.199088 +021855/063150, loss: 0.065179, avg_loss: 0.199050 +021860/063150, loss: 0.127836, avg_loss: 0.199017 +021865/063150, loss: 0.006953, avg_loss: 0.198980 +021870/063150, loss: 0.048309, avg_loss: 0.198965 +021875/063150, loss: 0.019982, avg_loss: 0.198938 +021880/063150, loss: 0.043102, avg_loss: 0.198912 +021885/063150, loss: 0.036355, avg_loss: 0.198894 +021890/063150, loss: 0.006664, avg_loss: 0.198864 +021895/063150, loss: 0.126826, avg_loss: 0.198830 +021900/063150, loss: 0.011602, avg_loss: 0.198795 +021905/063150, loss: 0.036565, avg_loss: 0.198771 +021910/063150, loss: 0.016536, avg_loss: 0.198740 +021915/063150, loss: 0.053892, avg_loss: 0.198713 +021920/063150, loss: 0.109127, avg_loss: 0.198687 +021925/063150, loss: 0.035026, avg_loss: 0.198652 +021930/063150, loss: 0.021003, avg_loss: 0.198616 +021935/063150, loss: 0.088228, avg_loss: 0.198590 +021940/063150, loss: 0.046900, avg_loss: 0.198553 +021945/063150, loss: 0.017754, avg_loss: 0.198518 +021950/063150, loss: 0.091679, avg_loss: 0.198488 +021955/063150, loss: 0.141407, avg_loss: 0.198455 +021960/063150, loss: 0.028794, avg_loss: 0.198427 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 21960/63150: {'accuracy': 0.8577981651376146} +021965/063150, loss: 0.040378, avg_loss: 0.198390 +021970/063150, loss: 0.098541, avg_loss: 0.198360 +021975/063150, loss: 0.039115, avg_loss: 0.198337 +021980/063150, loss: 0.014699, avg_loss: 0.198306 +021985/063150, loss: 0.024117, avg_loss: 0.198282 +021990/063150, loss: 0.011524, avg_loss: 0.198241 +021995/063150, loss: 0.008876, avg_loss: 0.198214 +022000/063150, loss: 0.023660, avg_loss: 0.198190 +022005/063150, loss: 0.133314, avg_loss: 0.198164 +022010/063150, loss: 0.072934, avg_loss: 0.198133 +022015/063150, loss: 0.168936, avg_loss: 0.198107 +022020/063150, loss: 0.102314, avg_loss: 0.198082 +022025/063150, loss: 0.012388, avg_loss: 0.198047 +022030/063150, loss: 0.034317, avg_loss: 0.198011 +022035/063150, loss: 0.034185, avg_loss: 0.197979 +022040/063150, loss: 0.033702, avg_loss: 0.197952 +022045/063150, loss: 0.062564, avg_loss: 0.197914 +022050/063150, loss: 0.093162, avg_loss: 0.197883 +022055/063150, loss: 0.081634, avg_loss: 0.197852 +022060/063150, loss: 0.105888, avg_loss: 0.197817 +022065/063150, loss: 0.088682, avg_loss: 0.197793 +022070/063150, loss: 0.042452, avg_loss: 0.197757 +022075/063150, loss: 0.103377, avg_loss: 0.197731 +022080/063150, loss: 0.005138, avg_loss: 0.197697 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 22080/63150: {'accuracy': 0.856651376146789} +022085/063150, loss: 0.137126, avg_loss: 0.197676 +022090/063150, loss: 0.014711, avg_loss: 0.197647 +022095/063150, loss: 0.017935, avg_loss: 0.197608 +022100/063150, loss: 0.014679, avg_loss: 0.197567 +022105/063150, loss: 0.076943, avg_loss: 0.197543 +022110/063150, loss: 0.011852, avg_loss: 0.197514 +022115/063150, loss: 0.041861, avg_loss: 0.197489 +022120/063150, loss: 0.049748, avg_loss: 0.197469 +022125/063150, loss: 0.156984, avg_loss: 0.197440 +022130/063150, loss: 0.024157, avg_loss: 0.197426 +022135/063150, loss: 0.026004, avg_loss: 0.197396 +022140/063150, loss: 0.034191, avg_loss: 0.197365 +022145/063150, loss: 0.142887, avg_loss: 0.197335 +022150/063150, loss: 0.010900, avg_loss: 0.197308 +022155/063150, loss: 0.176128, avg_loss: 0.197290 +022160/063150, loss: 0.142700, avg_loss: 0.197262 +022165/063150, loss: 0.025616, avg_loss: 0.197223 +022170/063150, loss: 0.055992, avg_loss: 0.197198 +022175/063150, loss: 0.076019, avg_loss: 0.197167 +022180/063150, loss: 0.015440, avg_loss: 0.197140 +022185/063150, loss: 0.149633, avg_loss: 0.197122 +022190/063150, loss: 0.008623, avg_loss: 0.197090 +022195/063150, loss: 0.020159, avg_loss: 0.197058 +022200/063150, loss: 0.066964, avg_loss: 0.197024 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 22200/63150: {'accuracy': 0.8543577981651376} +022205/063150, loss: 0.293325, avg_loss: 0.197009 +022210/063150, loss: 0.033500, avg_loss: 0.196982 +022215/063150, loss: 0.069676, avg_loss: 0.196954 +022220/063150, loss: 0.044650, avg_loss: 0.196923 +022225/063150, loss: 0.074841, avg_loss: 0.196891 +022230/063150, loss: 0.070426, avg_loss: 0.196863 +022235/063150, loss: 0.030231, avg_loss: 0.196841 +022240/063150, loss: 0.054111, avg_loss: 0.196810 +022245/063150, loss: 0.051011, avg_loss: 0.196773 +022250/063150, loss: 0.025203, avg_loss: 0.196745 +022255/063150, loss: 0.017682, avg_loss: 0.196712 +022260/063150, loss: 0.047338, avg_loss: 0.196683 +022265/063150, loss: 0.295749, avg_loss: 0.196672 +022270/063150, loss: 0.046192, avg_loss: 0.196649 +022275/063150, loss: 0.118811, avg_loss: 0.196633 +022280/063150, loss: 0.008980, avg_loss: 0.196611 +022285/063150, loss: 0.109894, avg_loss: 0.196582 +022290/063150, loss: 0.052682, avg_loss: 0.196554 +022295/063150, loss: 0.123655, avg_loss: 0.196535 +022300/063150, loss: 0.104137, avg_loss: 0.196505 +022305/063150, loss: 0.041919, avg_loss: 0.196478 +022310/063150, loss: 0.018834, avg_loss: 0.196459 +022315/063150, loss: 0.059563, avg_loss: 0.196448 +022320/063150, loss: 0.032077, avg_loss: 0.196425 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 22320/63150: {'accuracy': 0.8612385321100917} +022325/063150, loss: 0.095994, avg_loss: 0.196397 +022330/063150, loss: 0.136851, avg_loss: 0.196376 +022335/063150, loss: 0.053174, avg_loss: 0.196344 +022340/063150, loss: 0.034559, avg_loss: 0.196326 +022345/063150, loss: 0.056062, avg_loss: 0.196308 +022350/063150, loss: 0.044600, avg_loss: 0.196272 +022355/063150, loss: 0.029881, avg_loss: 0.196240 +022360/063150, loss: 0.010959, avg_loss: 0.196212 +022365/063150, loss: 0.092464, avg_loss: 0.196182 +022370/063150, loss: 0.019121, avg_loss: 0.196153 +022375/063150, loss: 0.061359, avg_loss: 0.196127 +022380/063150, loss: 0.127033, avg_loss: 0.196106 +022385/063150, loss: 0.041401, avg_loss: 0.196084 +022390/063150, loss: 0.005995, avg_loss: 0.196048 +022395/063150, loss: 0.004339, avg_loss: 0.196017 +022400/063150, loss: 0.088383, avg_loss: 0.195988 +022405/063150, loss: 0.163154, avg_loss: 0.195964 +022410/063150, loss: 0.065892, avg_loss: 0.195942 +022415/063150, loss: 0.011418, avg_loss: 0.195907 +022420/063150, loss: 0.036883, avg_loss: 0.195869 +022425/063150, loss: 0.022298, avg_loss: 0.195836 +022430/063150, loss: 0.089791, avg_loss: 0.195804 +022435/063150, loss: 0.034271, avg_loss: 0.195777 +022440/063150, loss: 0.128377, avg_loss: 0.195751 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 22440/63150: {'accuracy': 0.8589449541284404} +022445/063150, loss: 0.167781, avg_loss: 0.195728 +022450/063150, loss: 0.061771, avg_loss: 0.195691 +022455/063150, loss: 0.045238, avg_loss: 0.195653 +022460/063150, loss: 0.094943, avg_loss: 0.195621 +022465/063150, loss: 0.004108, avg_loss: 0.195581 +022470/063150, loss: 0.157663, avg_loss: 0.195554 +022475/063150, loss: 0.008671, avg_loss: 0.195518 +022480/063150, loss: 0.021298, avg_loss: 0.195504 +022485/063150, loss: 0.168470, avg_loss: 0.195476 +022490/063150, loss: 0.012726, avg_loss: 0.195451 +022495/063150, loss: 0.011042, avg_loss: 0.195414 +022500/063150, loss: 0.090841, avg_loss: 0.195381 +022505/063150, loss: 0.018633, avg_loss: 0.195347 +022510/063150, loss: 0.039132, avg_loss: 0.195314 +022515/063150, loss: 0.087783, avg_loss: 0.195280 +022520/063150, loss: 0.009073, avg_loss: 0.195252 +022525/063150, loss: 0.011479, avg_loss: 0.195221 +022530/063150, loss: 0.066836, avg_loss: 0.195186 +022535/063150, loss: 0.014882, avg_loss: 0.195150 +022540/063150, loss: 0.051247, avg_loss: 0.195117 +022545/063150, loss: 0.009370, avg_loss: 0.195088 +022550/063150, loss: 0.037039, avg_loss: 0.195060 +022555/063150, loss: 0.014592, avg_loss: 0.195031 +022560/063150, loss: 0.020069, avg_loss: 0.194998 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 22560/63150: {'accuracy': 0.8440366972477065} +022565/063150, loss: 0.098400, avg_loss: 0.194974 +022570/063150, loss: 0.003008, avg_loss: 0.194939 +022575/063150, loss: 0.017197, avg_loss: 0.194902 +022580/063150, loss: 0.117138, avg_loss: 0.194889 +022585/063150, loss: 0.047130, avg_loss: 0.194862 +022590/063150, loss: 0.007353, avg_loss: 0.194828 +022595/063150, loss: 0.199140, avg_loss: 0.194813 +022600/063150, loss: 0.093617, avg_loss: 0.194790 +022605/063150, loss: 0.011175, avg_loss: 0.194755 +022610/063150, loss: 0.178753, avg_loss: 0.194733 +022615/063150, loss: 0.099559, avg_loss: 0.194702 +022620/063150, loss: 0.026797, avg_loss: 0.194664 +022625/063150, loss: 0.020657, avg_loss: 0.194639 +022630/063150, loss: 0.043500, avg_loss: 0.194610 +022635/063150, loss: 0.243713, avg_loss: 0.194588 +022640/063150, loss: 0.080739, avg_loss: 0.194551 +022645/063150, loss: 0.002077, avg_loss: 0.194533 +022650/063150, loss: 0.193794, avg_loss: 0.194505 +022655/063150, loss: 0.051798, avg_loss: 0.194476 +022660/063150, loss: 0.026690, avg_loss: 0.194446 +022665/063150, loss: 0.013211, avg_loss: 0.194422 +022670/063150, loss: 0.037913, avg_loss: 0.194391 +022675/063150, loss: 0.117704, avg_loss: 0.194365 +022680/063150, loss: 0.064739, avg_loss: 0.194337 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 22680/63150: {'accuracy': 0.8532110091743119} +022685/063150, loss: 0.054149, avg_loss: 0.194307 +022690/063150, loss: 0.159059, avg_loss: 0.194279 +022695/063150, loss: 0.029992, avg_loss: 0.194252 +022700/063150, loss: 0.032528, avg_loss: 0.194222 +022705/063150, loss: 0.052093, avg_loss: 0.194188 +022710/063150, loss: 0.101192, avg_loss: 0.194158 +022715/063150, loss: 0.021592, avg_loss: 0.194126 +022720/063150, loss: 0.057674, avg_loss: 0.194100 +022725/063150, loss: 0.002278, avg_loss: 0.194086 +022730/063150, loss: 0.171402, avg_loss: 0.194077 +022735/063150, loss: 0.036579, avg_loss: 0.194046 +022740/063150, loss: 0.161170, avg_loss: 0.194017 +022745/063150, loss: 0.061871, avg_loss: 0.193992 +022750/063150, loss: 0.034428, avg_loss: 0.193957 +022755/063150, loss: 0.030176, avg_loss: 0.193923 +022760/063150, loss: 0.081419, avg_loss: 0.193903 +022765/063150, loss: 0.354843, avg_loss: 0.193896 +022770/063150, loss: 0.188152, avg_loss: 0.193889 +022775/063150, loss: 0.137218, avg_loss: 0.193866 +022780/063150, loss: 0.024686, avg_loss: 0.193834 +022785/063150, loss: 0.040832, avg_loss: 0.193806 +022790/063150, loss: 0.149951, avg_loss: 0.193779 +022795/063150, loss: 0.146829, avg_loss: 0.193757 +022800/063150, loss: 0.044110, avg_loss: 0.193730 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 22800/63150: {'accuracy': 0.8371559633027523} +022805/063150, loss: 0.007515, avg_loss: 0.193695 +022810/063150, loss: 0.158882, avg_loss: 0.193667 +022815/063150, loss: 0.059854, avg_loss: 0.193634 +022820/063150, loss: 0.144683, avg_loss: 0.193603 +022825/063150, loss: 0.023843, avg_loss: 0.193566 +022830/063150, loss: 0.012936, avg_loss: 0.193538 +022835/063150, loss: 0.006677, avg_loss: 0.193506 +022840/063150, loss: 0.019081, avg_loss: 0.193480 +022845/063150, loss: 0.087645, avg_loss: 0.193449 +022850/063150, loss: 0.138032, avg_loss: 0.193429 +022855/063150, loss: 0.083127, avg_loss: 0.193403 +022860/063150, loss: 0.073598, avg_loss: 0.193371 +022865/063150, loss: 0.040333, avg_loss: 0.193342 +022870/063150, loss: 0.011868, avg_loss: 0.193311 +022875/063150, loss: 0.017620, avg_loss: 0.193274 +022880/063150, loss: 0.007211, avg_loss: 0.193241 +022885/063150, loss: 0.049768, avg_loss: 0.193222 +022890/063150, loss: 0.031942, avg_loss: 0.193194 +022895/063150, loss: 0.045853, avg_loss: 0.193169 +022900/063150, loss: 0.082127, avg_loss: 0.193138 +022905/063150, loss: 0.040827, avg_loss: 0.193104 +022910/063150, loss: 0.048356, avg_loss: 0.193071 +022915/063150, loss: 0.098776, avg_loss: 0.193043 +022920/063150, loss: 0.116651, avg_loss: 0.193018 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 22920/63150: {'accuracy': 0.8543577981651376} +022925/063150, loss: 0.012783, avg_loss: 0.192987 +022930/063150, loss: 0.002173, avg_loss: 0.192949 +022935/063150, loss: 0.026290, avg_loss: 0.192921 +022940/063150, loss: 0.030074, avg_loss: 0.192889 +022945/063150, loss: 0.014593, avg_loss: 0.192857 +022950/063150, loss: 0.098203, avg_loss: 0.192830 +022955/063150, loss: 0.038577, avg_loss: 0.192801 +022960/063150, loss: 0.166765, avg_loss: 0.192787 +022965/063150, loss: 0.041652, avg_loss: 0.192758 +022970/063150, loss: 0.232168, avg_loss: 0.192743 +022975/063150, loss: 0.056446, avg_loss: 0.192714 +022980/063150, loss: 0.121726, avg_loss: 0.192687 +022985/063150, loss: 0.004820, avg_loss: 0.192658 +022990/063150, loss: 0.021978, avg_loss: 0.192628 +022995/063150, loss: 0.236425, avg_loss: 0.192607 +023000/063150, loss: 0.053714, avg_loss: 0.192580 +023005/063150, loss: 0.211114, avg_loss: 0.192558 +023010/063150, loss: 0.003515, avg_loss: 0.192533 +023015/063150, loss: 0.052144, avg_loss: 0.192499 +023020/063150, loss: 0.026799, avg_loss: 0.192469 +023025/063150, loss: 0.091898, avg_loss: 0.192437 +023030/063150, loss: 0.003500, avg_loss: 0.192414 +023035/063150, loss: 0.005857, avg_loss: 0.192376 +023040/063150, loss: 0.044703, avg_loss: 0.192364 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 10, step 23040/63150: {'accuracy': 0.8589449541284404} +023045/063150, loss: 0.143516, avg_loss: 0.192334 +023050/063150, loss: 0.170426, avg_loss: 0.192318 +023055/063150, loss: 0.096597, avg_loss: 0.192286 +023060/063150, loss: 0.003429, avg_loss: 0.192253 +023065/063150, loss: 0.012101, avg_loss: 0.192233 +023070/063150, loss: 0.010048, avg_loss: 0.192202 +023075/063150, loss: 0.043284, avg_loss: 0.192169 +023080/063150, loss: 0.004529, avg_loss: 0.192132 +023085/063150, loss: 0.011963, avg_loss: 0.192095 +023090/063150, loss: 0.024565, avg_loss: 0.192068 +023095/063150, loss: 0.006504, avg_loss: 0.192057 +023100/063150, loss: 0.026959, avg_loss: 0.192019 +023105/063150, loss: 0.012585, avg_loss: 0.191985 +023110/063150, loss: 0.132001, avg_loss: 0.191975 +023115/063150, loss: 0.026568, avg_loss: 0.191954 +023120/063150, loss: 0.026058, avg_loss: 0.191929 +023125/063150, loss: 0.076050, avg_loss: 0.191898 +023130/063150, loss: 0.015319, avg_loss: 0.191866 +023135/063150, loss: 0.131488, avg_loss: 0.191840 +023140/063150, loss: 0.013351, avg_loss: 0.191807 +023145/063150, loss: 0.023956, avg_loss: 0.191781 +023150/063150, loss: 0.005832, avg_loss: 0.191747 +023155/063150, loss: 0.024498, avg_loss: 0.191717 +023160/063150, loss: 0.032976, avg_loss: 0.191686 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 23160/63150: {'accuracy': 0.8440366972477065} +023165/063150, loss: 0.015528, avg_loss: 0.191651 +023170/063150, loss: 0.025395, avg_loss: 0.191617 +023175/063150, loss: 0.031576, avg_loss: 0.191581 +023180/063150, loss: 0.309930, avg_loss: 0.191561 +023185/063150, loss: 0.006360, avg_loss: 0.191524 +023190/063150, loss: 0.001535, avg_loss: 0.191486 +023195/063150, loss: 0.237478, avg_loss: 0.191469 +023200/063150, loss: 0.001275, avg_loss: 0.191430 +023205/063150, loss: 0.136791, avg_loss: 0.191402 +023210/063150, loss: 0.104698, avg_loss: 0.191377 +023215/063150, loss: 0.035337, avg_loss: 0.191342 +023220/063150, loss: 0.019973, avg_loss: 0.191320 +023225/063150, loss: 0.069043, avg_loss: 0.191287 +023230/063150, loss: 0.048119, avg_loss: 0.191257 +023235/063150, loss: 0.034886, avg_loss: 0.191225 +023240/063150, loss: 0.008942, avg_loss: 0.191186 +023245/063150, loss: 0.026159, avg_loss: 0.191158 +023250/063150, loss: 0.207585, avg_loss: 0.191140 +023255/063150, loss: 0.019199, avg_loss: 0.191103 +023260/063150, loss: 0.007328, avg_loss: 0.191076 +023265/063150, loss: 0.018104, avg_loss: 0.191039 +023270/063150, loss: 0.002913, avg_loss: 0.191008 +023275/063150, loss: 0.144199, avg_loss: 0.190984 +023280/063150, loss: 0.011352, avg_loss: 0.190952 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 23280/63150: {'accuracy': 0.8486238532110092} +023285/063150, loss: 0.063804, avg_loss: 0.190924 +023290/063150, loss: 0.195421, avg_loss: 0.190906 +023295/063150, loss: 0.041876, avg_loss: 0.190878 +023300/063150, loss: 0.008782, avg_loss: 0.190844 +023305/063150, loss: 0.045159, avg_loss: 0.190820 +023310/063150, loss: 0.023753, avg_loss: 0.190788 +023315/063150, loss: 0.170227, avg_loss: 0.190758 +023320/063150, loss: 0.113302, avg_loss: 0.190727 +023325/063150, loss: 0.043905, avg_loss: 0.190693 +023330/063150, loss: 0.002386, avg_loss: 0.190659 +023335/063150, loss: 0.009420, avg_loss: 0.190622 +023340/063150, loss: 0.089200, avg_loss: 0.190588 +023345/063150, loss: 0.010737, avg_loss: 0.190560 +023350/063150, loss: 0.089563, avg_loss: 0.190530 +023355/063150, loss: 0.003631, avg_loss: 0.190494 +023360/063150, loss: 0.013963, avg_loss: 0.190462 +023365/063150, loss: 0.011867, avg_loss: 0.190426 +023370/063150, loss: 0.001041, avg_loss: 0.190399 +023375/063150, loss: 0.016217, avg_loss: 0.190369 +023380/063150, loss: 0.012014, avg_loss: 0.190331 +023385/063150, loss: 0.040433, avg_loss: 0.190298 +023390/063150, loss: 0.023484, avg_loss: 0.190272 +023395/063150, loss: 0.004207, avg_loss: 0.190235 +023400/063150, loss: 0.081062, avg_loss: 0.190202 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 23400/63150: {'accuracy': 0.8486238532110092} +023405/063150, loss: 0.032287, avg_loss: 0.190173 +023410/063150, loss: 0.003912, avg_loss: 0.190135 +023415/063150, loss: 0.003492, avg_loss: 0.190102 +023420/063150, loss: 0.027165, avg_loss: 0.190070 +023425/063150, loss: 0.004454, avg_loss: 0.190032 +023430/063150, loss: 0.047393, avg_loss: 0.189997 +023435/063150, loss: 0.003825, avg_loss: 0.189963 +023440/063150, loss: 0.007256, avg_loss: 0.189925 +023445/063150, loss: 0.004597, avg_loss: 0.189893 +023450/063150, loss: 0.007908, avg_loss: 0.189866 +023455/063150, loss: 0.016762, avg_loss: 0.189831 +023460/063150, loss: 0.006530, avg_loss: 0.189809 +023465/063150, loss: 0.174834, avg_loss: 0.189793 +023470/063150, loss: 0.114805, avg_loss: 0.189763 +023475/063150, loss: 0.047003, avg_loss: 0.189736 +023480/063150, loss: 0.018862, avg_loss: 0.189702 +023485/063150, loss: 0.122990, avg_loss: 0.189675 +023490/063150, loss: 0.033680, avg_loss: 0.189646 +023495/063150, loss: 0.019300, avg_loss: 0.189625 +023500/063150, loss: 0.025757, avg_loss: 0.189599 +023505/063150, loss: 0.169331, avg_loss: 0.189579 +023510/063150, loss: 0.201785, avg_loss: 0.189561 +023515/063150, loss: 0.015488, avg_loss: 0.189527 +023520/063150, loss: 0.021997, avg_loss: 0.189496 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 23520/63150: {'accuracy': 0.8451834862385321} +023525/063150, loss: 0.036511, avg_loss: 0.189470 +023530/063150, loss: 0.047651, avg_loss: 0.189442 +023535/063150, loss: 0.187139, avg_loss: 0.189415 +023540/063150, loss: 0.038144, avg_loss: 0.189393 +023545/063150, loss: 0.071739, avg_loss: 0.189372 +023550/063150, loss: 0.034931, avg_loss: 0.189348 +023555/063150, loss: 0.009769, avg_loss: 0.189322 +023560/063150, loss: 0.060041, avg_loss: 0.189294 +023565/063150, loss: 0.039219, avg_loss: 0.189265 +023570/063150, loss: 0.227561, avg_loss: 0.189240 +023575/063150, loss: 0.163626, avg_loss: 0.189214 +023580/063150, loss: 0.041914, avg_loss: 0.189185 +023585/063150, loss: 0.012803, avg_loss: 0.189147 +023590/063150, loss: 0.003413, avg_loss: 0.189120 +023595/063150, loss: 0.003761, avg_loss: 0.189088 +023600/063150, loss: 0.059110, avg_loss: 0.189064 +023605/063150, loss: 0.038025, avg_loss: 0.189041 +023610/063150, loss: 0.031567, avg_loss: 0.189014 +023615/063150, loss: 0.030665, avg_loss: 0.188984 +023620/063150, loss: 0.127741, avg_loss: 0.188961 +023625/063150, loss: 0.044313, avg_loss: 0.188938 +023630/063150, loss: 0.022755, avg_loss: 0.188904 +023635/063150, loss: 0.019720, avg_loss: 0.188874 +023640/063150, loss: 0.144572, avg_loss: 0.188847 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 23640/63150: {'accuracy': 0.8589449541284404} +023645/063150, loss: 0.121178, avg_loss: 0.188816 +023650/063150, loss: 0.051083, avg_loss: 0.188780 +023655/063150, loss: 0.079024, avg_loss: 0.188757 +023660/063150, loss: 0.145072, avg_loss: 0.188738 +023665/063150, loss: 0.144136, avg_loss: 0.188713 +023670/063150, loss: 0.090432, avg_loss: 0.188680 +023675/063150, loss: 0.134375, avg_loss: 0.188651 +023680/063150, loss: 0.002004, avg_loss: 0.188619 +023685/063150, loss: 0.110233, avg_loss: 0.188589 +023690/063150, loss: 0.030898, avg_loss: 0.188554 +023695/063150, loss: 0.037261, avg_loss: 0.188522 +023700/063150, loss: 0.067426, avg_loss: 0.188492 +023705/063150, loss: 0.012747, avg_loss: 0.188459 +023710/063150, loss: 0.069943, avg_loss: 0.188442 +023715/063150, loss: 0.008648, avg_loss: 0.188414 +023720/063150, loss: 0.049922, avg_loss: 0.188384 +023725/063150, loss: 0.142128, avg_loss: 0.188356 +023730/063150, loss: 0.079043, avg_loss: 0.188330 +023735/063150, loss: 0.025516, avg_loss: 0.188301 +023740/063150, loss: 0.233129, avg_loss: 0.188284 +023745/063150, loss: 0.147132, avg_loss: 0.188256 +023750/063150, loss: 0.081683, avg_loss: 0.188224 +023755/063150, loss: 0.037890, avg_loss: 0.188204 +023760/063150, loss: 0.012304, avg_loss: 0.188169 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 23760/63150: {'accuracy': 0.856651376146789} +023765/063150, loss: 0.081415, avg_loss: 0.188148 +023770/063150, loss: 0.036861, avg_loss: 0.188116 +023775/063150, loss: 0.028176, avg_loss: 0.188100 +023780/063150, loss: 0.009934, avg_loss: 0.188076 +023785/063150, loss: 0.026138, avg_loss: 0.188053 +023790/063150, loss: 0.063748, avg_loss: 0.188022 +023795/063150, loss: 0.115647, avg_loss: 0.188002 +023800/063150, loss: 0.016312, avg_loss: 0.187977 +023805/063150, loss: 0.049148, avg_loss: 0.187942 +023810/063150, loss: 0.006433, avg_loss: 0.187915 +023815/063150, loss: 0.237302, avg_loss: 0.187894 +023820/063150, loss: 0.033043, avg_loss: 0.187862 +023825/063150, loss: 0.285820, avg_loss: 0.187838 +023830/063150, loss: 0.021223, avg_loss: 0.187808 +023835/063150, loss: 0.009376, avg_loss: 0.187779 +023840/063150, loss: 0.040155, avg_loss: 0.187747 +023845/063150, loss: 0.005624, avg_loss: 0.187710 +023850/063150, loss: 0.041572, avg_loss: 0.187684 +023855/063150, loss: 0.021178, avg_loss: 0.187652 +023860/063150, loss: 0.051807, avg_loss: 0.187621 +023865/063150, loss: 0.045404, avg_loss: 0.187599 +023870/063150, loss: 0.021514, avg_loss: 0.187581 +023875/063150, loss: 0.055826, avg_loss: 0.187549 +023880/063150, loss: 0.065029, avg_loss: 0.187524 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 23880/63150: {'accuracy': 0.8623853211009175} +023885/063150, loss: 0.008970, avg_loss: 0.187491 +023890/063150, loss: 0.040309, avg_loss: 0.187466 +023895/063150, loss: 0.006797, avg_loss: 0.187441 +023900/063150, loss: 0.259366, avg_loss: 0.187418 +023905/063150, loss: 0.042972, avg_loss: 0.187385 +023910/063150, loss: 0.036785, avg_loss: 0.187353 +023915/063150, loss: 0.022961, avg_loss: 0.187318 +023920/063150, loss: 0.050631, avg_loss: 0.187297 +023925/063150, loss: 0.202922, avg_loss: 0.187272 +023930/063150, loss: 0.068438, avg_loss: 0.187250 +023935/063150, loss: 0.119687, avg_loss: 0.187227 +023940/063150, loss: 0.013069, avg_loss: 0.187199 +023945/063150, loss: 0.082980, avg_loss: 0.187175 +023950/063150, loss: 0.046344, avg_loss: 0.187147 +023955/063150, loss: 0.019819, avg_loss: 0.187123 +023960/063150, loss: 0.009261, avg_loss: 0.187088 +023965/063150, loss: 0.044669, avg_loss: 0.187054 +023970/063150, loss: 0.029293, avg_loss: 0.187019 +023975/063150, loss: 0.095187, avg_loss: 0.186989 +023980/063150, loss: 0.008274, avg_loss: 0.186954 +023985/063150, loss: 0.265252, avg_loss: 0.186942 +023990/063150, loss: 0.065779, avg_loss: 0.186919 +023995/063150, loss: 0.007927, avg_loss: 0.186896 +024000/063150, loss: 0.033144, avg_loss: 0.186864 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24000/63150: {'accuracy': 0.8658256880733946} +024005/063150, loss: 0.079531, avg_loss: 0.186848 +024010/063150, loss: 0.078247, avg_loss: 0.186826 +024015/063150, loss: 0.012809, avg_loss: 0.186798 +024020/063150, loss: 0.070974, avg_loss: 0.186774 +024025/063150, loss: 0.101348, avg_loss: 0.186751 +024030/063150, loss: 0.193687, avg_loss: 0.186727 +024035/063150, loss: 0.142077, avg_loss: 0.186698 +024040/063150, loss: 0.016566, avg_loss: 0.186668 +024045/063150, loss: 0.031934, avg_loss: 0.186634 +024050/063150, loss: 0.115324, avg_loss: 0.186605 +024055/063150, loss: 0.046231, avg_loss: 0.186575 +024060/063150, loss: 0.150850, avg_loss: 0.186557 +024065/063150, loss: 0.240307, avg_loss: 0.186540 +024070/063150, loss: 0.156217, avg_loss: 0.186512 +024075/063150, loss: 0.214173, avg_loss: 0.186485 +024080/063150, loss: 0.002789, avg_loss: 0.186454 +024085/063150, loss: 0.074546, avg_loss: 0.186438 +024090/063150, loss: 0.002071, avg_loss: 0.186408 +024095/063150, loss: 0.037241, avg_loss: 0.186377 +024100/063150, loss: 0.055435, avg_loss: 0.186347 +024105/063150, loss: 0.003818, avg_loss: 0.186314 +024110/063150, loss: 0.038079, avg_loss: 0.186281 +024115/063150, loss: 0.011029, avg_loss: 0.186255 +024120/063150, loss: 0.097449, avg_loss: 0.186234 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24120/63150: {'accuracy': 0.8509174311926605} +024125/063150, loss: 0.079656, avg_loss: 0.186208 +024130/063150, loss: 0.190383, avg_loss: 0.186189 +024135/063150, loss: 0.021348, avg_loss: 0.186158 +024140/063150, loss: 0.019057, avg_loss: 0.186123 +024145/063150, loss: 0.007752, avg_loss: 0.186094 +024150/063150, loss: 0.014676, avg_loss: 0.186066 +024155/063150, loss: 0.010485, avg_loss: 0.186033 +024160/063150, loss: 0.089235, avg_loss: 0.186014 +024165/063150, loss: 0.100111, avg_loss: 0.185991 +024170/063150, loss: 0.045686, avg_loss: 0.185957 +024175/063150, loss: 0.021316, avg_loss: 0.185927 +024180/063150, loss: 0.066890, avg_loss: 0.185894 +024185/063150, loss: 0.017975, avg_loss: 0.185864 +024190/063150, loss: 0.024485, avg_loss: 0.185829 +024195/063150, loss: 0.082547, avg_loss: 0.185810 +024200/063150, loss: 0.007939, avg_loss: 0.185778 +024205/063150, loss: 0.016385, avg_loss: 0.185758 +024210/063150, loss: 0.024695, avg_loss: 0.185736 +024215/063150, loss: 0.037296, avg_loss: 0.185711 +024220/063150, loss: 0.045817, avg_loss: 0.185678 +024225/063150, loss: 0.045894, avg_loss: 0.185647 +024230/063150, loss: 0.088235, avg_loss: 0.185618 +024235/063150, loss: 0.097757, avg_loss: 0.185587 +024240/063150, loss: 0.067483, avg_loss: 0.185569 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24240/63150: {'accuracy': 0.8532110091743119} +024245/063150, loss: 0.017868, avg_loss: 0.185557 +024250/063150, loss: 0.013520, avg_loss: 0.185523 +024255/063150, loss: 0.023752, avg_loss: 0.185510 +024260/063150, loss: 0.012535, avg_loss: 0.185487 +024265/063150, loss: 0.047992, avg_loss: 0.185460 +024270/063150, loss: 0.032897, avg_loss: 0.185440 +024275/063150, loss: 0.014173, avg_loss: 0.185429 +024280/063150, loss: 0.047329, avg_loss: 0.185407 +024285/063150, loss: 0.104657, avg_loss: 0.185382 +024290/063150, loss: 0.010988, avg_loss: 0.185349 +024295/063150, loss: 0.019007, avg_loss: 0.185326 +024300/063150, loss: 0.036677, avg_loss: 0.185304 +024305/063150, loss: 0.025723, avg_loss: 0.185275 +024310/063150, loss: 0.007102, avg_loss: 0.185245 +024315/063150, loss: 0.041256, avg_loss: 0.185226 +024320/063150, loss: 0.194447, avg_loss: 0.185207 +024325/063150, loss: 0.023186, avg_loss: 0.185183 +024330/063150, loss: 0.010497, avg_loss: 0.185152 +024335/063150, loss: 0.015822, avg_loss: 0.185123 +024340/063150, loss: 0.083555, avg_loss: 0.185099 +024345/063150, loss: 0.004449, avg_loss: 0.185071 +024350/063150, loss: 0.004824, avg_loss: 0.185040 +024355/063150, loss: 0.182587, avg_loss: 0.185017 +024360/063150, loss: 0.048005, avg_loss: 0.184988 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24360/63150: {'accuracy': 0.8623853211009175} +024365/063150, loss: 0.068125, avg_loss: 0.184959 +024370/063150, loss: 0.096433, avg_loss: 0.184942 +024375/063150, loss: 0.013241, avg_loss: 0.184910 +024380/063150, loss: 0.030887, avg_loss: 0.184882 +024385/063150, loss: 0.003620, avg_loss: 0.184853 +024390/063150, loss: 0.002875, avg_loss: 0.184821 +024395/063150, loss: 0.001897, avg_loss: 0.184791 +024400/063150, loss: 0.077137, avg_loss: 0.184771 +024405/063150, loss: 0.012710, avg_loss: 0.184737 +024410/063150, loss: 0.048309, avg_loss: 0.184705 +024415/063150, loss: 0.034666, avg_loss: 0.184683 +024420/063150, loss: 0.115656, avg_loss: 0.184663 +024425/063150, loss: 0.004650, avg_loss: 0.184629 +024430/063150, loss: 0.011696, avg_loss: 0.184603 +024435/063150, loss: 0.173228, avg_loss: 0.184578 +024440/063150, loss: 0.189898, avg_loss: 0.184556 +024445/063150, loss: 0.012757, avg_loss: 0.184521 +024450/063150, loss: 0.009241, avg_loss: 0.184493 +024455/063150, loss: 0.168062, avg_loss: 0.184473 +024460/063150, loss: 0.033910, avg_loss: 0.184447 +024465/063150, loss: 0.141621, avg_loss: 0.184420 +024470/063150, loss: 0.042807, avg_loss: 0.184398 +024475/063150, loss: 0.031887, avg_loss: 0.184375 +024480/063150, loss: 0.008083, avg_loss: 0.184353 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24480/63150: {'accuracy': 0.8600917431192661} +024485/063150, loss: 0.153941, avg_loss: 0.184336 +024490/063150, loss: 0.087934, avg_loss: 0.184307 +024495/063150, loss: 0.281662, avg_loss: 0.184289 +024500/063150, loss: 0.020725, avg_loss: 0.184259 +024505/063150, loss: 0.006375, avg_loss: 0.184237 +024510/063150, loss: 0.199935, avg_loss: 0.184220 +024515/063150, loss: 0.067076, avg_loss: 0.184189 +024520/063150, loss: 0.034830, avg_loss: 0.184158 +024525/063150, loss: 0.013565, avg_loss: 0.184129 +024530/063150, loss: 0.005967, avg_loss: 0.184099 +024535/063150, loss: 0.062543, avg_loss: 0.184070 +024540/063150, loss: 0.149947, avg_loss: 0.184051 +024545/063150, loss: 0.115131, avg_loss: 0.184024 +024550/063150, loss: 0.026918, avg_loss: 0.184010 +024555/063150, loss: 0.016813, avg_loss: 0.183981 +024560/063150, loss: 0.036097, avg_loss: 0.183952 +024565/063150, loss: 0.028338, avg_loss: 0.183928 +024570/063150, loss: 0.149037, avg_loss: 0.183902 +024575/063150, loss: 0.038650, avg_loss: 0.183874 +024580/063150, loss: 0.041926, avg_loss: 0.183860 +024585/063150, loss: 0.084249, avg_loss: 0.183832 +024590/063150, loss: 0.128375, avg_loss: 0.183810 +024595/063150, loss: 0.007631, avg_loss: 0.183784 +024600/063150, loss: 0.047822, avg_loss: 0.183763 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24600/63150: {'accuracy': 0.8577981651376146} +024605/063150, loss: 0.120331, avg_loss: 0.183745 +024610/063150, loss: 0.212945, avg_loss: 0.183730 +024615/063150, loss: 0.042800, avg_loss: 0.183706 +024620/063150, loss: 0.069049, avg_loss: 0.183680 +024625/063150, loss: 0.187248, avg_loss: 0.183659 +024630/063150, loss: 0.036414, avg_loss: 0.183632 +024635/063150, loss: 0.042599, avg_loss: 0.183603 +024640/063150, loss: 0.069723, avg_loss: 0.183580 +024645/063150, loss: 0.022289, avg_loss: 0.183558 +024650/063150, loss: 0.018387, avg_loss: 0.183529 +024655/063150, loss: 0.002605, avg_loss: 0.183507 +024660/063150, loss: 0.014013, avg_loss: 0.183480 +024665/063150, loss: 0.018442, avg_loss: 0.183456 +024670/063150, loss: 0.093861, avg_loss: 0.183427 +024675/063150, loss: 0.096538, avg_loss: 0.183404 +024680/063150, loss: 0.010969, avg_loss: 0.183377 +024685/063150, loss: 0.031297, avg_loss: 0.183344 +024690/063150, loss: 0.013102, avg_loss: 0.183311 +024695/063150, loss: 0.003529, avg_loss: 0.183283 +024700/063150, loss: 0.032750, avg_loss: 0.183249 +024705/063150, loss: 0.045763, avg_loss: 0.183220 +024710/063150, loss: 0.025768, avg_loss: 0.183206 +024715/063150, loss: 0.011057, avg_loss: 0.183180 +024720/063150, loss: 0.036514, avg_loss: 0.183156 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24720/63150: {'accuracy': 0.8532110091743119} +024725/063150, loss: 0.064662, avg_loss: 0.183127 +024730/063150, loss: 0.018193, avg_loss: 0.183105 +024735/063150, loss: 0.030102, avg_loss: 0.183087 +024740/063150, loss: 0.162927, avg_loss: 0.183064 +024745/063150, loss: 0.017302, avg_loss: 0.183041 +024750/063150, loss: 0.062712, avg_loss: 0.183030 +024755/063150, loss: 0.028731, avg_loss: 0.183003 +024760/063150, loss: 0.192076, avg_loss: 0.182982 +024765/063150, loss: 0.009942, avg_loss: 0.182959 +024770/063150, loss: 0.014857, avg_loss: 0.182932 +024775/063150, loss: 0.136700, avg_loss: 0.182910 +024780/063150, loss: 0.097069, avg_loss: 0.182888 +024785/063150, loss: 0.072616, avg_loss: 0.182863 +024790/063150, loss: 0.063138, avg_loss: 0.182847 +024795/063150, loss: 0.026529, avg_loss: 0.182819 +024800/063150, loss: 0.006449, avg_loss: 0.182790 +024805/063150, loss: 0.012190, avg_loss: 0.182773 +024810/063150, loss: 0.041017, avg_loss: 0.182745 +024815/063150, loss: 0.054915, avg_loss: 0.182721 +024820/063150, loss: 0.192289, avg_loss: 0.182702 +024825/063150, loss: 0.075893, avg_loss: 0.182685 +024830/063150, loss: 0.226798, avg_loss: 0.182666 +024835/063150, loss: 0.032918, avg_loss: 0.182640 +024840/063150, loss: 0.046388, avg_loss: 0.182618 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24840/63150: {'accuracy': 0.8325688073394495} +024845/063150, loss: 0.024028, avg_loss: 0.182600 +024850/063150, loss: 0.041789, avg_loss: 0.182576 +024855/063150, loss: 0.013222, avg_loss: 0.182544 +024860/063150, loss: 0.026604, avg_loss: 0.182522 +024865/063150, loss: 0.088350, avg_loss: 0.182494 +024870/063150, loss: 0.022411, avg_loss: 0.182473 +024875/063150, loss: 0.016755, avg_loss: 0.182442 +024880/063150, loss: 0.004009, avg_loss: 0.182413 +024885/063150, loss: 0.148761, avg_loss: 0.182398 +024890/063150, loss: 0.003763, avg_loss: 0.182366 +024895/063150, loss: 0.092980, avg_loss: 0.182339 +024900/063150, loss: 0.069951, avg_loss: 0.182311 +024905/063150, loss: 0.009654, avg_loss: 0.182290 +024910/063150, loss: 0.011232, avg_loss: 0.182267 +024915/063150, loss: 0.054456, avg_loss: 0.182243 +024920/063150, loss: 0.020618, avg_loss: 0.182212 +024925/063150, loss: 0.073096, avg_loss: 0.182186 +024930/063150, loss: 0.067772, avg_loss: 0.182154 +024935/063150, loss: 0.028049, avg_loss: 0.182127 +024940/063150, loss: 0.002562, avg_loss: 0.182100 +024945/063150, loss: 0.156420, avg_loss: 0.182081 +024950/063150, loss: 0.014377, avg_loss: 0.182054 +024955/063150, loss: 0.015429, avg_loss: 0.182037 +024960/063150, loss: 0.071069, avg_loss: 0.182014 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 24960/63150: {'accuracy': 0.8555045871559633} +024965/063150, loss: 0.020242, avg_loss: 0.181989 +024970/063150, loss: 0.035802, avg_loss: 0.181963 +024975/063150, loss: 0.043467, avg_loss: 0.181941 +024980/063150, loss: 0.059451, avg_loss: 0.181913 +024985/063150, loss: 0.085899, avg_loss: 0.181892 +024990/063150, loss: 0.177826, avg_loss: 0.181868 +024995/063150, loss: 0.012488, avg_loss: 0.181843 +025000/063150, loss: 0.020636, avg_loss: 0.181820 +025005/063150, loss: 0.050793, avg_loss: 0.181790 +025010/063150, loss: 0.014892, avg_loss: 0.181764 +025015/063150, loss: 0.208433, avg_loss: 0.181746 +025020/063150, loss: 0.035705, avg_loss: 0.181724 +025025/063150, loss: 0.021759, avg_loss: 0.181696 +025030/063150, loss: 0.026641, avg_loss: 0.181667 +025035/063150, loss: 0.033330, avg_loss: 0.181641 +025040/063150, loss: 0.034776, avg_loss: 0.181616 +025045/063150, loss: 0.021790, avg_loss: 0.181588 +025050/063150, loss: 0.016698, avg_loss: 0.181558 +025055/063150, loss: 0.031271, avg_loss: 0.181526 +025060/063150, loss: 0.086882, avg_loss: 0.181509 +025065/063150, loss: 0.014553, avg_loss: 0.181480 +025070/063150, loss: 0.182002, avg_loss: 0.181468 +025075/063150, loss: 0.046344, avg_loss: 0.181437 +025080/063150, loss: 0.164926, avg_loss: 0.181417 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 25080/63150: {'accuracy': 0.8635321100917431} +025085/063150, loss: 0.027479, avg_loss: 0.181393 +025090/063150, loss: 0.003435, avg_loss: 0.181362 +025095/063150, loss: 0.011622, avg_loss: 0.181333 +025100/063150, loss: 0.062323, avg_loss: 0.181308 +025105/063150, loss: 0.056266, avg_loss: 0.181284 +025110/063150, loss: 0.118883, avg_loss: 0.181259 +025115/063150, loss: 0.055621, avg_loss: 0.181237 +025120/063150, loss: 0.033765, avg_loss: 0.181218 +025125/063150, loss: 0.014399, avg_loss: 0.181199 +025130/063150, loss: 0.032655, avg_loss: 0.181168 +025135/063150, loss: 0.121390, avg_loss: 0.181151 +025140/063150, loss: 0.149139, avg_loss: 0.181129 +025145/063150, loss: 0.033929, avg_loss: 0.181106 +025150/063150, loss: 0.155339, avg_loss: 0.181089 +025155/063150, loss: 0.006720, avg_loss: 0.181065 +025160/063150, loss: 0.009288, avg_loss: 0.181044 +025165/063150, loss: 0.029317, avg_loss: 0.181022 +025170/063150, loss: 0.098267, avg_loss: 0.180998 +025175/063150, loss: 0.025863, avg_loss: 0.180974 +025180/063150, loss: 0.048030, avg_loss: 0.180953 +025185/063150, loss: 0.070609, avg_loss: 0.180945 +025190/063150, loss: 0.043682, avg_loss: 0.180933 +025195/063150, loss: 0.272356, avg_loss: 0.180920 +025200/063150, loss: 0.151014, avg_loss: 0.180896 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 11, step 25200/63150: {'accuracy': 0.8440366972477065} +025205/063150, loss: 0.035805, avg_loss: 0.180873 +025210/063150, loss: 0.019690, avg_loss: 0.180852 +025215/063150, loss: 0.073691, avg_loss: 0.180828 +025220/063150, loss: 0.030669, avg_loss: 0.180801 +025225/063150, loss: 0.188566, avg_loss: 0.180785 +025230/063150, loss: 0.078658, avg_loss: 0.180763 +025235/063150, loss: 0.030544, avg_loss: 0.180741 +025240/063150, loss: 0.039595, avg_loss: 0.180711 +025245/063150, loss: 0.078517, avg_loss: 0.180696 +025250/063150, loss: 0.116549, avg_loss: 0.180675 +025255/063150, loss: 0.110948, avg_loss: 0.180659 +025260/063150, loss: 0.055206, avg_loss: 0.180637 +025265/063150, loss: 0.070407, avg_loss: 0.180607 +025270/063150, loss: 0.012311, avg_loss: 0.180581 +025275/063150, loss: 0.039873, avg_loss: 0.180556 +025280/063150, loss: 0.102752, avg_loss: 0.180532 +025285/063150, loss: 0.016404, avg_loss: 0.180504 +025290/063150, loss: 0.003273, avg_loss: 0.180473 +025295/063150, loss: 0.027373, avg_loss: 0.180450 +025300/063150, loss: 0.018549, avg_loss: 0.180423 +025305/063150, loss: 0.008971, avg_loss: 0.180395 +025310/063150, loss: 0.010499, avg_loss: 0.180370 +025315/063150, loss: 0.254581, avg_loss: 0.180357 +025320/063150, loss: 0.026328, avg_loss: 0.180330 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 25320/63150: {'accuracy': 0.8486238532110092} +025325/063150, loss: 0.036055, avg_loss: 0.180314 +025330/063150, loss: 0.004353, avg_loss: 0.180284 +025335/063150, loss: 0.126383, avg_loss: 0.180256 +025340/063150, loss: 0.116422, avg_loss: 0.180232 +025345/063150, loss: 0.004972, avg_loss: 0.180207 +025350/063150, loss: 0.012679, avg_loss: 0.180176 +025355/063150, loss: 0.151178, avg_loss: 0.180152 +025360/063150, loss: 0.010965, avg_loss: 0.180131 +025365/063150, loss: 0.015291, avg_loss: 0.180102 +025370/063150, loss: 0.030493, avg_loss: 0.180078 +025375/063150, loss: 0.038352, avg_loss: 0.180062 +025380/063150, loss: 0.005015, avg_loss: 0.180040 +025385/063150, loss: 0.010526, avg_loss: 0.180016 +025390/063150, loss: 0.078440, avg_loss: 0.180003 +025395/063150, loss: 0.078548, avg_loss: 0.179988 +025400/063150, loss: 0.007958, avg_loss: 0.179958 +025405/063150, loss: 0.025154, avg_loss: 0.179933 +025410/063150, loss: 0.044686, avg_loss: 0.179909 +025415/063150, loss: 0.026168, avg_loss: 0.179891 +025420/063150, loss: 0.029578, avg_loss: 0.179861 +025425/063150, loss: 0.098433, avg_loss: 0.179834 +025430/063150, loss: 0.021918, avg_loss: 0.179806 +025435/063150, loss: 0.064521, avg_loss: 0.179775 +025440/063150, loss: 0.130319, avg_loss: 0.179748 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 25440/63150: {'accuracy': 0.8543577981651376} +025445/063150, loss: 0.043099, avg_loss: 0.179723 +025450/063150, loss: 0.024163, avg_loss: 0.179695 +025455/063150, loss: 0.105224, avg_loss: 0.179667 +025460/063150, loss: 0.076689, avg_loss: 0.179642 +025465/063150, loss: 0.107199, avg_loss: 0.179623 +025470/063150, loss: 0.142201, avg_loss: 0.179604 +025475/063150, loss: 0.003523, avg_loss: 0.179573 +025480/063150, loss: 0.103005, avg_loss: 0.179549 +025485/063150, loss: 0.044651, avg_loss: 0.179529 +025490/063150, loss: 0.182101, avg_loss: 0.179506 +025495/063150, loss: 0.055720, avg_loss: 0.179478 +025500/063150, loss: 0.059569, avg_loss: 0.179448 +025505/063150, loss: 0.024212, avg_loss: 0.179419 +025510/063150, loss: 0.085435, avg_loss: 0.179397 +025515/063150, loss: 0.002877, avg_loss: 0.179367 +025520/063150, loss: 0.006330, avg_loss: 0.179341 +025525/063150, loss: 0.127947, avg_loss: 0.179315 +025530/063150, loss: 0.043959, avg_loss: 0.179293 +025535/063150, loss: 0.103943, avg_loss: 0.179270 +025540/063150, loss: 0.038381, avg_loss: 0.179246 +025545/063150, loss: 0.030754, avg_loss: 0.179223 +025550/063150, loss: 0.019015, avg_loss: 0.179196 +025555/063150, loss: 0.054003, avg_loss: 0.179176 +025560/063150, loss: 0.086202, avg_loss: 0.179151 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 25560/63150: {'accuracy': 0.8509174311926605} +025565/063150, loss: 0.123316, avg_loss: 0.179124 +025570/063150, loss: 0.009997, avg_loss: 0.179091 +025575/063150, loss: 0.034674, avg_loss: 0.179065 +025580/063150, loss: 0.009962, avg_loss: 0.179040 +025585/063150, loss: 0.007702, avg_loss: 0.179010 +025590/063150, loss: 0.005024, avg_loss: 0.178986 +025595/063150, loss: 0.006685, avg_loss: 0.178957 +025600/063150, loss: 0.025698, avg_loss: 0.178924 +025605/063150, loss: 0.007942, avg_loss: 0.178898 +025610/063150, loss: 0.005722, avg_loss: 0.178867 +025615/063150, loss: 0.067931, avg_loss: 0.178839 +025620/063150, loss: 0.092502, avg_loss: 0.178821 +025625/063150, loss: 0.130648, avg_loss: 0.178804 +025630/063150, loss: 0.035525, avg_loss: 0.178778 +025635/063150, loss: 0.149600, avg_loss: 0.178758 +025640/063150, loss: 0.024876, avg_loss: 0.178734 +025645/063150, loss: 0.061973, avg_loss: 0.178711 +025650/063150, loss: 0.037176, avg_loss: 0.178682 +025655/063150, loss: 0.010677, avg_loss: 0.178652 +025660/063150, loss: 0.002081, avg_loss: 0.178623 +025665/063150, loss: 0.016659, avg_loss: 0.178591 +025670/063150, loss: 0.019940, avg_loss: 0.178560 +025675/063150, loss: 0.012090, avg_loss: 0.178533 +025680/063150, loss: 0.016379, avg_loss: 0.178518 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 25680/63150: {'accuracy': 0.8612385321100917} +025685/063150, loss: 0.009089, avg_loss: 0.178490 +025690/063150, loss: 0.002533, avg_loss: 0.178470 +025695/063150, loss: 0.049635, avg_loss: 0.178446 +025700/063150, loss: 0.040922, avg_loss: 0.178421 +025705/063150, loss: 0.004225, avg_loss: 0.178393 +025710/063150, loss: 0.022018, avg_loss: 0.178362 +025715/063150, loss: 0.004992, avg_loss: 0.178340 +025720/063150, loss: 0.002986, avg_loss: 0.178309 +025725/063150, loss: 0.009355, avg_loss: 0.178278 +025730/063150, loss: 0.040988, avg_loss: 0.178252 +025735/063150, loss: 0.002253, avg_loss: 0.178220 +025740/063150, loss: 0.106646, avg_loss: 0.178192 +025745/063150, loss: 0.021209, avg_loss: 0.178172 +025750/063150, loss: 0.004755, avg_loss: 0.178142 +025755/063150, loss: 0.003120, avg_loss: 0.178109 +025760/063150, loss: 0.041219, avg_loss: 0.178080 +025765/063150, loss: 0.029819, avg_loss: 0.178055 +025770/063150, loss: 0.027382, avg_loss: 0.178024 +025775/063150, loss: 0.000718, avg_loss: 0.178000 +025780/063150, loss: 0.001890, avg_loss: 0.177970 +025785/063150, loss: 0.228049, avg_loss: 0.177954 +025790/063150, loss: 0.008679, avg_loss: 0.177932 +025795/063150, loss: 0.114314, avg_loss: 0.177907 +025800/063150, loss: 0.236510, avg_loss: 0.177884 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 25800/63150: {'accuracy': 0.8612385321100917} +025805/063150, loss: 0.174846, avg_loss: 0.177865 +025810/063150, loss: 0.006171, avg_loss: 0.177850 +025815/063150, loss: 0.172056, avg_loss: 0.177826 +025820/063150, loss: 0.071335, avg_loss: 0.177804 +025825/063150, loss: 0.007823, avg_loss: 0.177780 +025830/063150, loss: 0.071160, avg_loss: 0.177766 +025835/063150, loss: 0.005636, avg_loss: 0.177753 +025840/063150, loss: 0.028840, avg_loss: 0.177725 +025845/063150, loss: 0.025217, avg_loss: 0.177706 +025850/063150, loss: 0.110818, avg_loss: 0.177686 +025855/063150, loss: 0.024595, avg_loss: 0.177659 +025860/063150, loss: 0.040332, avg_loss: 0.177631 +025865/063150, loss: 0.023318, avg_loss: 0.177601 +025870/063150, loss: 0.031631, avg_loss: 0.177573 +025875/063150, loss: 0.040223, avg_loss: 0.177556 +025880/063150, loss: 0.056486, avg_loss: 0.177525 +025885/063150, loss: 0.009252, avg_loss: 0.177499 +025890/063150, loss: 0.029961, avg_loss: 0.177472 +025895/063150, loss: 0.004423, avg_loss: 0.177442 +025900/063150, loss: 0.057323, avg_loss: 0.177419 +025905/063150, loss: 0.038005, avg_loss: 0.177394 +025910/063150, loss: 0.014551, avg_loss: 0.177364 +025915/063150, loss: 0.002359, avg_loss: 0.177336 +025920/063150, loss: 0.031817, avg_loss: 0.177307 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 25920/63150: {'accuracy': 0.8612385321100917} +025925/063150, loss: 0.051613, avg_loss: 0.177281 +025930/063150, loss: 0.011287, avg_loss: 0.177255 +025935/063150, loss: 0.020584, avg_loss: 0.177228 +025940/063150, loss: 0.006318, avg_loss: 0.177202 +025945/063150, loss: 0.019082, avg_loss: 0.177176 +025950/063150, loss: 0.041726, avg_loss: 0.177153 +025955/063150, loss: 0.010348, avg_loss: 0.177124 +025960/063150, loss: 0.003156, avg_loss: 0.177097 +025965/063150, loss: 0.008176, avg_loss: 0.177074 +025970/063150, loss: 0.039437, avg_loss: 0.177045 +025975/063150, loss: 0.315310, avg_loss: 0.177033 +025980/063150, loss: 0.002024, avg_loss: 0.177004 +025985/063150, loss: 0.009122, avg_loss: 0.176978 +025990/063150, loss: 0.022559, avg_loss: 0.176953 +025995/063150, loss: 0.014303, avg_loss: 0.176927 +026000/063150, loss: 0.040606, avg_loss: 0.176900 +026005/063150, loss: 0.164569, avg_loss: 0.176880 +026010/063150, loss: 0.003887, avg_loss: 0.176848 +026015/063150, loss: 0.008937, avg_loss: 0.176822 +026020/063150, loss: 0.150807, avg_loss: 0.176802 +026025/063150, loss: 0.107966, avg_loss: 0.176776 +026030/063150, loss: 0.096582, avg_loss: 0.176755 +026035/063150, loss: 0.203925, avg_loss: 0.176733 +026040/063150, loss: 0.177352, avg_loss: 0.176720 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 26040/63150: {'accuracy': 0.8635321100917431} +026045/063150, loss: 0.008895, avg_loss: 0.176698 +026050/063150, loss: 0.019497, avg_loss: 0.176678 +026055/063150, loss: 0.084837, avg_loss: 0.176656 +026060/063150, loss: 0.008363, avg_loss: 0.176636 +026065/063150, loss: 0.025034, avg_loss: 0.176612 +026070/063150, loss: 0.168590, avg_loss: 0.176592 +026075/063150, loss: 0.054124, avg_loss: 0.176569 +026080/063150, loss: 0.113294, avg_loss: 0.176542 +026085/063150, loss: 0.080666, avg_loss: 0.176518 +026090/063150, loss: 0.038165, avg_loss: 0.176495 +026095/063150, loss: 0.077737, avg_loss: 0.176469 +026100/063150, loss: 0.039365, avg_loss: 0.176449 +026105/063150, loss: 0.047292, avg_loss: 0.176424 +026110/063150, loss: 0.007632, avg_loss: 0.176403 +026115/063150, loss: 0.015617, avg_loss: 0.176373 +026120/063150, loss: 0.024119, avg_loss: 0.176355 +026125/063150, loss: 0.015245, avg_loss: 0.176325 +026130/063150, loss: 0.022136, avg_loss: 0.176296 +026135/063150, loss: 0.184966, avg_loss: 0.176278 +026140/063150, loss: 0.052855, avg_loss: 0.176252 +026145/063150, loss: 0.201355, avg_loss: 0.176238 +026150/063150, loss: 0.029181, avg_loss: 0.176213 +026155/063150, loss: 0.034329, avg_loss: 0.176187 +026160/063150, loss: 0.010293, avg_loss: 0.176166 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 26160/63150: {'accuracy': 0.8509174311926605} +026165/063150, loss: 0.019409, avg_loss: 0.176137 +026170/063150, loss: 0.197464, avg_loss: 0.176113 +026175/063150, loss: 0.109693, avg_loss: 0.176089 +026180/063150, loss: 0.079245, avg_loss: 0.176066 +026185/063150, loss: 0.031555, avg_loss: 0.176049 +026190/063150, loss: 0.045944, avg_loss: 0.176023 +026195/063150, loss: 0.049008, avg_loss: 0.175998 +026200/063150, loss: 0.011719, avg_loss: 0.175971 +026205/063150, loss: 0.071017, avg_loss: 0.175945 +026210/063150, loss: 0.031797, avg_loss: 0.175921 +026215/063150, loss: 0.048544, avg_loss: 0.175893 +026220/063150, loss: 0.020856, avg_loss: 0.175870 +026225/063150, loss: 0.005539, avg_loss: 0.175843 +026230/063150, loss: 0.244267, avg_loss: 0.175824 +026235/063150, loss: 0.003661, avg_loss: 0.175799 +026240/063150, loss: 0.050767, avg_loss: 0.175776 +026245/063150, loss: 0.039586, avg_loss: 0.175753 +026250/063150, loss: 0.015857, avg_loss: 0.175737 +026255/063150, loss: 0.030485, avg_loss: 0.175717 +026260/063150, loss: 0.030583, avg_loss: 0.175700 +026265/063150, loss: 0.116681, avg_loss: 0.175674 +026270/063150, loss: 0.072904, avg_loss: 0.175660 +026275/063150, loss: 0.133219, avg_loss: 0.175638 +026280/063150, loss: 0.034455, avg_loss: 0.175623 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 26280/63150: {'accuracy': 0.8612385321100917} +026285/063150, loss: 0.018311, avg_loss: 0.175605 +026290/063150, loss: 0.038477, avg_loss: 0.175580 +026295/063150, loss: 0.015998, avg_loss: 0.175551 +026300/063150, loss: 0.109865, avg_loss: 0.175525 +026305/063150, loss: 0.035125, avg_loss: 0.175497 +026310/063150, loss: 0.007492, avg_loss: 0.175474 +026315/063150, loss: 0.133003, avg_loss: 0.175452 +026320/063150, loss: 0.038199, avg_loss: 0.175429 +026325/063150, loss: 0.061738, avg_loss: 0.175407 +026330/063150, loss: 0.022608, avg_loss: 0.175381 +026335/063150, loss: 0.058620, avg_loss: 0.175353 +026340/063150, loss: 0.079079, avg_loss: 0.175332 +026345/063150, loss: 0.118646, avg_loss: 0.175315 +026350/063150, loss: 0.004813, avg_loss: 0.175296 +026355/063150, loss: 0.061195, avg_loss: 0.175268 +026360/063150, loss: 0.016608, avg_loss: 0.175243 +026365/063150, loss: 0.101748, avg_loss: 0.175218 +026370/063150, loss: 0.087435, avg_loss: 0.175193 +026375/063150, loss: 0.064701, avg_loss: 0.175166 +026380/063150, loss: 0.044360, avg_loss: 0.175139 +026385/063150, loss: 0.013221, avg_loss: 0.175112 +026390/063150, loss: 0.021608, avg_loss: 0.175096 +026395/063150, loss: 0.004493, avg_loss: 0.175071 +026400/063150, loss: 0.055042, avg_loss: 0.175055 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 26400/63150: {'accuracy': 0.8635321100917431} +026405/063150, loss: 0.016765, avg_loss: 0.175032 +026410/063150, loss: 0.031683, avg_loss: 0.175003 +026415/063150, loss: 0.165354, avg_loss: 0.174981 +026420/063150, loss: 0.094164, avg_loss: 0.174959 +026425/063150, loss: 0.073218, avg_loss: 0.174932 +026430/063150, loss: 0.033795, avg_loss: 0.174914 +026435/063150, loss: 0.012268, avg_loss: 0.174893 +026440/063150, loss: 0.021147, avg_loss: 0.174872 +026445/063150, loss: 0.052250, avg_loss: 0.174843 +026450/063150, loss: 0.093198, avg_loss: 0.174832 +026455/063150, loss: 0.016378, avg_loss: 0.174802 +026460/063150, loss: 0.033599, avg_loss: 0.174780 +026465/063150, loss: 0.044320, avg_loss: 0.174756 +026470/063150, loss: 0.030520, avg_loss: 0.174735 +026475/063150, loss: 0.031806, avg_loss: 0.174711 +026480/063150, loss: 0.013599, avg_loss: 0.174689 +026485/063150, loss: 0.019508, avg_loss: 0.174670 +026490/063150, loss: 0.029296, avg_loss: 0.174652 +026495/063150, loss: 0.040407, avg_loss: 0.174631 +026500/063150, loss: 0.033737, avg_loss: 0.174613 +026505/063150, loss: 0.140205, avg_loss: 0.174595 +026510/063150, loss: 0.092215, avg_loss: 0.174573 +026515/063150, loss: 0.079246, avg_loss: 0.174550 +026520/063150, loss: 0.006094, avg_loss: 0.174526 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 26520/63150: {'accuracy': 0.8577981651376146} +026525/063150, loss: 0.020210, avg_loss: 0.174500 +026530/063150, loss: 0.107439, avg_loss: 0.174481 +026535/063150, loss: 0.005114, avg_loss: 0.174455 +026540/063150, loss: 0.027660, avg_loss: 0.174429 +026545/063150, loss: 0.013018, avg_loss: 0.174404 +026550/063150, loss: 0.037231, avg_loss: 0.174377 +026555/063150, loss: 0.004058, avg_loss: 0.174357 +026560/063150, loss: 0.078024, avg_loss: 0.174340 +026565/063150, loss: 0.022878, avg_loss: 0.174328 +026570/063150, loss: 0.143254, avg_loss: 0.174313 +026575/063150, loss: 0.148537, avg_loss: 0.174300 +026580/063150, loss: 0.072872, avg_loss: 0.174283 +026585/063150, loss: 0.007272, avg_loss: 0.174253 +026590/063150, loss: 0.177293, avg_loss: 0.174237 +026595/063150, loss: 0.093959, avg_loss: 0.174211 +026600/063150, loss: 0.062901, avg_loss: 0.174188 +026605/063150, loss: 0.054010, avg_loss: 0.174164 +026610/063150, loss: 0.042959, avg_loss: 0.174138 +026615/063150, loss: 0.025157, avg_loss: 0.174113 +026620/063150, loss: 0.014808, avg_loss: 0.174085 +026625/063150, loss: 0.060873, avg_loss: 0.174063 +026630/063150, loss: 0.008438, avg_loss: 0.174035 +026635/063150, loss: 0.012551, avg_loss: 0.174008 +026640/063150, loss: 0.165363, avg_loss: 0.173985 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 26640/63150: {'accuracy': 0.8577981651376146} +026645/063150, loss: 0.009919, avg_loss: 0.173956 +026650/063150, loss: 0.051908, avg_loss: 0.173928 +026655/063150, loss: 0.059693, avg_loss: 0.173905 +026660/063150, loss: 0.224565, avg_loss: 0.173887 +026665/063150, loss: 0.006085, avg_loss: 0.173869 +026670/063150, loss: 0.062619, avg_loss: 0.173849 +026675/063150, loss: 0.005102, avg_loss: 0.173822 +026680/063150, loss: 0.016544, avg_loss: 0.173797 +026685/063150, loss: 0.005365, avg_loss: 0.173778 +026690/063150, loss: 0.051824, avg_loss: 0.173750 +026695/063150, loss: 0.009336, avg_loss: 0.173723 +026700/063150, loss: 0.091660, avg_loss: 0.173699 +026705/063150, loss: 0.002974, avg_loss: 0.173671 +026710/063150, loss: 0.071114, avg_loss: 0.173645 +026715/063150, loss: 0.005099, avg_loss: 0.173625 +026720/063150, loss: 0.006074, avg_loss: 0.173600 +026725/063150, loss: 0.235761, avg_loss: 0.173579 +026730/063150, loss: 0.067065, avg_loss: 0.173554 +026735/063150, loss: 0.048149, avg_loss: 0.173525 +026740/063150, loss: 0.008848, avg_loss: 0.173502 +026745/063150, loss: 0.008395, avg_loss: 0.173486 +026750/063150, loss: 0.001538, avg_loss: 0.173458 +026755/063150, loss: 0.257501, avg_loss: 0.173443 +026760/063150, loss: 0.005566, avg_loss: 0.173416 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 26760/63150: {'accuracy': 0.8635321100917431} +026765/063150, loss: 0.007365, avg_loss: 0.173394 +026770/063150, loss: 0.044137, avg_loss: 0.173372 +026775/063150, loss: 0.018343, avg_loss: 0.173347 +026780/063150, loss: 0.053034, avg_loss: 0.173332 +026785/063150, loss: 0.054620, avg_loss: 0.173305 +026790/063150, loss: 0.005234, avg_loss: 0.173291 +026795/063150, loss: 0.041505, avg_loss: 0.173264 +026800/063150, loss: 0.202493, avg_loss: 0.173246 +026805/063150, loss: 0.135826, avg_loss: 0.173230 +026810/063150, loss: 0.009055, avg_loss: 0.173206 +026815/063150, loss: 0.053127, avg_loss: 0.173185 +026820/063150, loss: 0.044213, avg_loss: 0.173169 +026825/063150, loss: 0.015025, avg_loss: 0.173152 +026830/063150, loss: 0.039702, avg_loss: 0.173128 +026835/063150, loss: 0.090928, avg_loss: 0.173106 +026840/063150, loss: 0.018924, avg_loss: 0.173086 +026845/063150, loss: 0.024336, avg_loss: 0.173060 +026850/063150, loss: 0.009836, avg_loss: 0.173032 +026855/063150, loss: 0.377723, avg_loss: 0.173020 +026860/063150, loss: 0.003201, avg_loss: 0.172993 +026865/063150, loss: 0.029066, avg_loss: 0.172965 +026870/063150, loss: 0.062505, avg_loss: 0.172944 +026875/063150, loss: 0.034272, avg_loss: 0.172916 +026880/063150, loss: 0.052026, avg_loss: 0.172897 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 26880/63150: {'accuracy': 0.8577981651376146} +026885/063150, loss: 0.011020, avg_loss: 0.172872 +026890/063150, loss: 0.031410, avg_loss: 0.172845 +026895/063150, loss: 0.020815, avg_loss: 0.172824 +026900/063150, loss: 0.043787, avg_loss: 0.172807 +026905/063150, loss: 0.020614, avg_loss: 0.172790 +026910/063150, loss: 0.019679, avg_loss: 0.172770 +026915/063150, loss: 0.084147, avg_loss: 0.172748 +026920/063150, loss: 0.176122, avg_loss: 0.172740 +026925/063150, loss: 0.066586, avg_loss: 0.172716 +026930/063150, loss: 0.112169, avg_loss: 0.172700 +026935/063150, loss: 0.010257, avg_loss: 0.172678 +026940/063150, loss: 0.006340, avg_loss: 0.172651 +026945/063150, loss: 0.025723, avg_loss: 0.172632 +026950/063150, loss: 0.026241, avg_loss: 0.172612 +026955/063150, loss: 0.018900, avg_loss: 0.172583 +026960/063150, loss: 0.028388, avg_loss: 0.172564 +026965/063150, loss: 0.056258, avg_loss: 0.172541 +026970/063150, loss: 0.023552, avg_loss: 0.172518 +026975/063150, loss: 0.047599, avg_loss: 0.172491 +026980/063150, loss: 0.094918, avg_loss: 0.172469 +026985/063150, loss: 0.007090, avg_loss: 0.172446 +026990/063150, loss: 0.002823, avg_loss: 0.172421 +026995/063150, loss: 0.149088, avg_loss: 0.172404 +027000/063150, loss: 0.065625, avg_loss: 0.172385 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 27000/63150: {'accuracy': 0.8612385321100917} +027005/063150, loss: 0.052874, avg_loss: 0.172360 +027010/063150, loss: 0.237368, avg_loss: 0.172346 +027015/063150, loss: 0.026472, avg_loss: 0.172325 +027020/063150, loss: 0.023582, avg_loss: 0.172299 +027025/063150, loss: 0.056197, avg_loss: 0.172281 +027030/063150, loss: 0.007635, avg_loss: 0.172254 +027035/063150, loss: 0.025254, avg_loss: 0.172230 +027040/063150, loss: 0.152112, avg_loss: 0.172212 +027045/063150, loss: 0.026842, avg_loss: 0.172186 +027050/063150, loss: 0.018775, avg_loss: 0.172157 +027055/063150, loss: 0.040150, avg_loss: 0.172138 +027060/063150, loss: 0.011728, avg_loss: 0.172114 +027065/063150, loss: 0.055900, avg_loss: 0.172096 +027070/063150, loss: 0.022907, avg_loss: 0.172080 +027075/063150, loss: 0.102153, avg_loss: 0.172061 +027080/063150, loss: 0.008250, avg_loss: 0.172038 +027085/063150, loss: 0.011941, avg_loss: 0.172008 +027090/063150, loss: 0.022448, avg_loss: 0.171987 +027095/063150, loss: 0.009386, avg_loss: 0.171959 +027100/063150, loss: 0.000937, avg_loss: 0.171934 +027105/063150, loss: 0.006629, avg_loss: 0.171908 +027110/063150, loss: 0.030608, avg_loss: 0.171887 +027115/063150, loss: 0.008867, avg_loss: 0.171866 +027120/063150, loss: 0.087148, avg_loss: 0.171844 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 27120/63150: {'accuracy': 0.8463302752293578} +027125/063150, loss: 0.027899, avg_loss: 0.171820 +027130/063150, loss: 0.006329, avg_loss: 0.171791 +027135/063150, loss: 0.166962, avg_loss: 0.171779 +027140/063150, loss: 0.026513, avg_loss: 0.171756 +027145/063150, loss: 0.005434, avg_loss: 0.171741 +027150/063150, loss: 0.041131, avg_loss: 0.171718 +027155/063150, loss: 0.009396, avg_loss: 0.171697 +027160/063150, loss: 0.019303, avg_loss: 0.171671 +027165/063150, loss: 0.069126, avg_loss: 0.171648 +027170/063150, loss: 0.166649, avg_loss: 0.171640 +027175/063150, loss: 0.022423, avg_loss: 0.171613 +027180/063150, loss: 0.035379, avg_loss: 0.171590 +027185/063150, loss: 0.061413, avg_loss: 0.171565 +027190/063150, loss: 0.032296, avg_loss: 0.171538 +027195/063150, loss: 0.002724, avg_loss: 0.171509 +027200/063150, loss: 0.035373, avg_loss: 0.171488 +027205/063150, loss: 0.005380, avg_loss: 0.171467 +027210/063150, loss: 0.030348, avg_loss: 0.171445 +027215/063150, loss: 0.092380, avg_loss: 0.171420 +027220/063150, loss: 0.003972, avg_loss: 0.171396 +027225/063150, loss: 0.003893, avg_loss: 0.171378 +027230/063150, loss: 0.055151, avg_loss: 0.171357 +027235/063150, loss: 0.018740, avg_loss: 0.171328 +027240/063150, loss: 0.069493, avg_loss: 0.171309 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 27240/63150: {'accuracy': 0.8612385321100917} +027245/063150, loss: 0.066344, avg_loss: 0.171284 +027250/063150, loss: 0.052729, avg_loss: 0.171269 +027255/063150, loss: 0.053928, avg_loss: 0.171245 +027260/063150, loss: 0.038789, avg_loss: 0.171220 +027265/063150, loss: 0.010700, avg_loss: 0.171196 +027270/063150, loss: 0.051406, avg_loss: 0.171171 +027275/063150, loss: 0.131780, avg_loss: 0.171155 +027280/063150, loss: 0.040621, avg_loss: 0.171136 +027285/063150, loss: 0.061473, avg_loss: 0.171118 +027290/063150, loss: 0.002834, avg_loss: 0.171097 +027295/063150, loss: 0.043497, avg_loss: 0.171077 +027300/063150, loss: 0.051575, avg_loss: 0.171057 +027305/063150, loss: 0.017404, avg_loss: 0.171034 +027310/063150, loss: 0.027702, avg_loss: 0.171013 +027315/063150, loss: 0.204149, avg_loss: 0.170995 +027320/063150, loss: 0.051431, avg_loss: 0.170973 +027325/063150, loss: 0.009916, avg_loss: 0.170952 +027330/063150, loss: 0.003211, avg_loss: 0.170930 +027335/063150, loss: 0.037592, avg_loss: 0.170912 +027340/063150, loss: 0.071714, avg_loss: 0.170886 +027345/063150, loss: 0.119424, avg_loss: 0.170870 +027350/063150, loss: 0.083736, avg_loss: 0.170852 +027355/063150, loss: 0.017677, avg_loss: 0.170826 +027360/063150, loss: 0.023233, avg_loss: 0.170800 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 12, step 27360/63150: {'accuracy': 0.8577981651376146} +027365/063150, loss: 0.022504, avg_loss: 0.170783 +027370/063150, loss: 0.005160, avg_loss: 0.170758 +027375/063150, loss: 0.086048, avg_loss: 0.170735 +027380/063150, loss: 0.020944, avg_loss: 0.170716 +027385/063150, loss: 0.261572, avg_loss: 0.170703 +027390/063150, loss: 0.043341, avg_loss: 0.170681 +027395/063150, loss: 0.040651, avg_loss: 0.170658 +027400/063150, loss: 0.237097, avg_loss: 0.170644 +027405/063150, loss: 0.016556, avg_loss: 0.170620 +027410/063150, loss: 0.012082, avg_loss: 0.170597 +027415/063150, loss: 0.065450, avg_loss: 0.170575 +027420/063150, loss: 0.225460, avg_loss: 0.170560 +027425/063150, loss: 0.007740, avg_loss: 0.170533 +027430/063150, loss: 0.009014, avg_loss: 0.170510 +027435/063150, loss: 0.084257, avg_loss: 0.170485 +027440/063150, loss: 0.198444, avg_loss: 0.170463 +027445/063150, loss: 0.010752, avg_loss: 0.170435 +027450/063150, loss: 0.041156, avg_loss: 0.170411 +027455/063150, loss: 0.032650, avg_loss: 0.170397 +027460/063150, loss: 0.008656, avg_loss: 0.170372 +027465/063150, loss: 0.005737, avg_loss: 0.170349 +027470/063150, loss: 0.248669, avg_loss: 0.170332 +027475/063150, loss: 0.012696, avg_loss: 0.170310 +027480/063150, loss: 0.023002, avg_loss: 0.170283 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 27480/63150: {'accuracy': 0.8555045871559633} +027485/063150, loss: 0.045720, avg_loss: 0.170255 +027490/063150, loss: 0.044354, avg_loss: 0.170237 +027495/063150, loss: 0.003510, avg_loss: 0.170213 +027500/063150, loss: 0.018427, avg_loss: 0.170185 +027505/063150, loss: 0.046245, avg_loss: 0.170166 +027510/063150, loss: 0.084126, avg_loss: 0.170140 +027515/063150, loss: 0.023775, avg_loss: 0.170120 +027520/063150, loss: 0.199156, avg_loss: 0.170101 +027525/063150, loss: 0.052722, avg_loss: 0.170079 +027530/063150, loss: 0.007972, avg_loss: 0.170051 +027535/063150, loss: 0.022698, avg_loss: 0.170026 +027540/063150, loss: 0.103719, avg_loss: 0.170006 +027545/063150, loss: 0.116714, avg_loss: 0.169989 +027550/063150, loss: 0.037627, avg_loss: 0.169964 +027555/063150, loss: 0.004612, avg_loss: 0.169939 +027560/063150, loss: 0.008760, avg_loss: 0.169914 +027565/063150, loss: 0.006600, avg_loss: 0.169892 +027570/063150, loss: 0.151038, avg_loss: 0.169868 +027575/063150, loss: 0.034564, avg_loss: 0.169844 +027580/063150, loss: 0.011232, avg_loss: 0.169819 +027585/063150, loss: 0.224483, avg_loss: 0.169803 +027590/063150, loss: 0.017308, avg_loss: 0.169776 +027595/063150, loss: 0.060893, avg_loss: 0.169750 +027600/063150, loss: 0.076629, avg_loss: 0.169733 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 27600/63150: {'accuracy': 0.8646788990825688} +027605/063150, loss: 0.033323, avg_loss: 0.169706 +027610/063150, loss: 0.008787, avg_loss: 0.169681 +027615/063150, loss: 0.013909, avg_loss: 0.169653 +027620/063150, loss: 0.031686, avg_loss: 0.169628 +027625/063150, loss: 0.024723, avg_loss: 0.169617 +027630/063150, loss: 0.003257, avg_loss: 0.169591 +027635/063150, loss: 0.001370, avg_loss: 0.169571 +027640/063150, loss: 0.032039, avg_loss: 0.169543 +027645/063150, loss: 0.007934, avg_loss: 0.169523 +027650/063150, loss: 0.015324, avg_loss: 0.169496 +027655/063150, loss: 0.017464, avg_loss: 0.169470 +027660/063150, loss: 0.014664, avg_loss: 0.169441 +027665/063150, loss: 0.192293, avg_loss: 0.169420 +027670/063150, loss: 0.009042, avg_loss: 0.169392 +027675/063150, loss: 0.001605, avg_loss: 0.169366 +027680/063150, loss: 0.003103, avg_loss: 0.169341 +027685/063150, loss: 0.042236, avg_loss: 0.169321 +027690/063150, loss: 0.006726, avg_loss: 0.169298 +027695/063150, loss: 0.015539, avg_loss: 0.169280 +027700/063150, loss: 0.010040, avg_loss: 0.169253 +027705/063150, loss: 0.017646, avg_loss: 0.169233 +027710/063150, loss: 0.094200, avg_loss: 0.169210 +027715/063150, loss: 0.041670, avg_loss: 0.169186 +027720/063150, loss: 0.004672, avg_loss: 0.169161 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 27720/63150: {'accuracy': 0.8222477064220184} +027725/063150, loss: 0.023786, avg_loss: 0.169144 +027730/063150, loss: 0.002828, avg_loss: 0.169120 +027735/063150, loss: 0.127803, avg_loss: 0.169105 +027740/063150, loss: 0.217781, avg_loss: 0.169087 +027745/063150, loss: 0.201857, avg_loss: 0.169071 +027750/063150, loss: 0.022577, avg_loss: 0.169048 +027755/063150, loss: 0.016370, avg_loss: 0.169022 +027760/063150, loss: 0.029963, avg_loss: 0.169001 +027765/063150, loss: 0.107884, avg_loss: 0.168980 +027770/063150, loss: 0.018045, avg_loss: 0.168954 +027775/063150, loss: 0.048940, avg_loss: 0.168938 +027780/063150, loss: 0.024265, avg_loss: 0.168919 +027785/063150, loss: 0.003584, avg_loss: 0.168897 +027790/063150, loss: 0.041260, avg_loss: 0.168873 +027795/063150, loss: 0.008950, avg_loss: 0.168848 +027800/063150, loss: 0.002752, avg_loss: 0.168825 +027805/063150, loss: 0.018023, avg_loss: 0.168799 +027810/063150, loss: 0.048765, avg_loss: 0.168773 +027815/063150, loss: 0.017355, avg_loss: 0.168757 +027820/063150, loss: 0.106956, avg_loss: 0.168733 +027825/063150, loss: 0.060166, avg_loss: 0.168709 +027830/063150, loss: 0.001708, avg_loss: 0.168682 +027835/063150, loss: 0.119290, avg_loss: 0.168664 +027840/063150, loss: 0.031449, avg_loss: 0.168646 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 27840/63150: {'accuracy': 0.8532110091743119} +027845/063150, loss: 0.001824, avg_loss: 0.168620 +027850/063150, loss: 0.010888, avg_loss: 0.168594 +027855/063150, loss: 0.007230, avg_loss: 0.168566 +027860/063150, loss: 0.047503, avg_loss: 0.168540 +027865/063150, loss: 0.001900, avg_loss: 0.168512 +027870/063150, loss: 0.004294, avg_loss: 0.168488 +027875/063150, loss: 0.002686, avg_loss: 0.168466 +027880/063150, loss: 0.137212, avg_loss: 0.168444 +027885/063150, loss: 0.127088, avg_loss: 0.168423 +027890/063150, loss: 0.098445, avg_loss: 0.168400 +027895/063150, loss: 0.009915, avg_loss: 0.168374 +027900/063150, loss: 0.088546, avg_loss: 0.168352 +027905/063150, loss: 0.001617, avg_loss: 0.168326 +027910/063150, loss: 0.010013, avg_loss: 0.168303 +027915/063150, loss: 0.003222, avg_loss: 0.168275 +027920/063150, loss: 0.146089, avg_loss: 0.168251 +027925/063150, loss: 0.004163, avg_loss: 0.168233 +027930/063150, loss: 0.087446, avg_loss: 0.168216 +027935/063150, loss: 0.016721, avg_loss: 0.168191 +027940/063150, loss: 0.012104, avg_loss: 0.168167 +027945/063150, loss: 0.007010, avg_loss: 0.168144 +027950/063150, loss: 0.032185, avg_loss: 0.168118 +027955/063150, loss: 0.064780, avg_loss: 0.168094 +027960/063150, loss: 0.001886, avg_loss: 0.168080 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 27960/63150: {'accuracy': 0.8600917431192661} +027965/063150, loss: 0.052768, avg_loss: 0.168053 +027970/063150, loss: 0.001272, avg_loss: 0.168030 +027975/063150, loss: 0.001667, avg_loss: 0.168010 +027980/063150, loss: 0.173975, avg_loss: 0.167990 +027985/063150, loss: 0.191033, avg_loss: 0.167973 +027990/063150, loss: 0.076750, avg_loss: 0.167948 +027995/063150, loss: 0.063395, avg_loss: 0.167924 +028000/063150, loss: 0.247758, avg_loss: 0.167908 +028005/063150, loss: 0.043394, avg_loss: 0.167892 +028010/063150, loss: 0.068048, avg_loss: 0.167867 +028015/063150, loss: 0.000950, avg_loss: 0.167839 +028020/063150, loss: 0.023803, avg_loss: 0.167811 +028025/063150, loss: 0.069647, avg_loss: 0.167791 +028030/063150, loss: 0.006179, avg_loss: 0.167763 +028035/063150, loss: 0.149412, avg_loss: 0.167753 +028040/063150, loss: 0.260913, avg_loss: 0.167740 +028045/063150, loss: 0.007112, avg_loss: 0.167721 +028050/063150, loss: 0.062662, avg_loss: 0.167707 +028055/063150, loss: 0.033384, avg_loss: 0.167686 +028060/063150, loss: 0.086192, avg_loss: 0.167667 +028065/063150, loss: 0.055420, avg_loss: 0.167645 +028070/063150, loss: 0.175912, avg_loss: 0.167633 +028075/063150, loss: 0.007357, avg_loss: 0.167605 +028080/063150, loss: 0.016570, avg_loss: 0.167579 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 28080/63150: {'accuracy': 0.8589449541284404} +028085/063150, loss: 0.050216, avg_loss: 0.167556 +028090/063150, loss: 0.010988, avg_loss: 0.167535 +028095/063150, loss: 0.004236, avg_loss: 0.167509 +028100/063150, loss: 0.006510, avg_loss: 0.167486 +028105/063150, loss: 0.069831, avg_loss: 0.167465 +028110/063150, loss: 0.004723, avg_loss: 0.167440 +028115/063150, loss: 0.019658, avg_loss: 0.167421 +028120/063150, loss: 0.027906, avg_loss: 0.167402 +028125/063150, loss: 0.011655, avg_loss: 0.167375 +028130/063150, loss: 0.024306, avg_loss: 0.167353 +028135/063150, loss: 0.039729, avg_loss: 0.167331 +028140/063150, loss: 0.011833, avg_loss: 0.167309 +028145/063150, loss: 0.037030, avg_loss: 0.167284 +028150/063150, loss: 0.002287, avg_loss: 0.167263 +028155/063150, loss: 0.089799, avg_loss: 0.167243 +028160/063150, loss: 0.016498, avg_loss: 0.167231 +028165/063150, loss: 0.033099, avg_loss: 0.167208 +028170/063150, loss: 0.122826, avg_loss: 0.167194 +028175/063150, loss: 0.067804, avg_loss: 0.167171 +028180/063150, loss: 0.017962, avg_loss: 0.167146 +028185/063150, loss: 0.066817, avg_loss: 0.167137 +028190/063150, loss: 0.023980, avg_loss: 0.167111 +028195/063150, loss: 0.098249, avg_loss: 0.167089 +028200/063150, loss: 0.001516, avg_loss: 0.167078 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 28200/63150: {'accuracy': 0.8612385321100917} +028205/063150, loss: 0.015147, avg_loss: 0.167057 +028210/063150, loss: 0.006562, avg_loss: 0.167035 +028215/063150, loss: 0.138221, avg_loss: 0.167022 +028220/063150, loss: 0.012262, avg_loss: 0.167001 +028225/063150, loss: 0.030977, avg_loss: 0.166985 +028230/063150, loss: 0.002671, avg_loss: 0.166961 +028235/063150, loss: 0.032277, avg_loss: 0.166940 +028240/063150, loss: 0.029787, avg_loss: 0.166913 +028245/063150, loss: 0.017316, avg_loss: 0.166898 +028250/063150, loss: 0.053205, avg_loss: 0.166874 +028255/063150, loss: 0.101664, avg_loss: 0.166850 +028260/063150, loss: 0.041728, avg_loss: 0.166826 +028265/063150, loss: 0.066572, avg_loss: 0.166806 +028270/063150, loss: 0.029979, avg_loss: 0.166798 +028275/063150, loss: 0.007725, avg_loss: 0.166772 +028280/063150, loss: 0.004476, avg_loss: 0.166745 +028285/063150, loss: 0.067404, avg_loss: 0.166722 +028290/063150, loss: 0.024729, avg_loss: 0.166700 +028295/063150, loss: 0.058260, avg_loss: 0.166675 +028300/063150, loss: 0.010539, avg_loss: 0.166662 +028305/063150, loss: 0.135680, avg_loss: 0.166647 +028310/063150, loss: 0.011801, avg_loss: 0.166627 +028315/063150, loss: 0.020173, avg_loss: 0.166604 +028320/063150, loss: 0.009742, avg_loss: 0.166579 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 28320/63150: {'accuracy': 0.8520642201834863} +028325/063150, loss: 0.025849, avg_loss: 0.166559 +028330/063150, loss: 0.084111, avg_loss: 0.166537 +028335/063150, loss: 0.011020, avg_loss: 0.166515 +028340/063150, loss: 0.001050, avg_loss: 0.166488 +028345/063150, loss: 0.012184, avg_loss: 0.166466 +028350/063150, loss: 0.021985, avg_loss: 0.166443 +028355/063150, loss: 0.015199, avg_loss: 0.166434 +028360/063150, loss: 0.002518, avg_loss: 0.166408 +028365/063150, loss: 0.036101, avg_loss: 0.166390 +028370/063150, loss: 0.006141, avg_loss: 0.166369 +028375/063150, loss: 0.021799, avg_loss: 0.166348 +028380/063150, loss: 0.070193, avg_loss: 0.166327 +028385/063150, loss: 0.017686, avg_loss: 0.166304 +028390/063150, loss: 0.026984, avg_loss: 0.166283 +028395/063150, loss: 0.010160, avg_loss: 0.166255 +028400/063150, loss: 0.007895, avg_loss: 0.166241 +028405/063150, loss: 0.059838, avg_loss: 0.166217 +028410/063150, loss: 0.058588, avg_loss: 0.166193 +028415/063150, loss: 0.031011, avg_loss: 0.166167 +028420/063150, loss: 0.005845, avg_loss: 0.166151 +028425/063150, loss: 0.050391, avg_loss: 0.166127 +028430/063150, loss: 0.016239, avg_loss: 0.166103 +028435/063150, loss: 0.070611, avg_loss: 0.166085 +028440/063150, loss: 0.010411, avg_loss: 0.166067 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 28440/63150: {'accuracy': 0.856651376146789} +028445/063150, loss: 0.016816, avg_loss: 0.166051 +028450/063150, loss: 0.007382, avg_loss: 0.166030 +028455/063150, loss: 0.008736, avg_loss: 0.166006 +028460/063150, loss: 0.009211, avg_loss: 0.165983 +028465/063150, loss: 0.011259, avg_loss: 0.165960 +028470/063150, loss: 0.018984, avg_loss: 0.165934 +028475/063150, loss: 0.009329, avg_loss: 0.165914 +028480/063150, loss: 0.020577, avg_loss: 0.165892 +028485/063150, loss: 0.223059, avg_loss: 0.165872 +028490/063150, loss: 0.002591, avg_loss: 0.165853 +028495/063150, loss: 0.004926, avg_loss: 0.165828 +028500/063150, loss: 0.004833, avg_loss: 0.165803 +028505/063150, loss: 0.058992, avg_loss: 0.165787 +028510/063150, loss: 0.040001, avg_loss: 0.165763 +028515/063150, loss: 0.126455, avg_loss: 0.165740 +028520/063150, loss: 0.021724, avg_loss: 0.165717 +028525/063150, loss: 0.026139, avg_loss: 0.165706 +028530/063150, loss: 0.129199, avg_loss: 0.165685 +028535/063150, loss: 0.006899, avg_loss: 0.165662 +028540/063150, loss: 0.021979, avg_loss: 0.165639 +028545/063150, loss: 0.008334, avg_loss: 0.165617 +028550/063150, loss: 0.195612, avg_loss: 0.165598 +028555/063150, loss: 0.004226, avg_loss: 0.165571 +028560/063150, loss: 0.025035, avg_loss: 0.165545 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 28560/63150: {'accuracy': 0.8577981651376146} +028565/063150, loss: 0.027609, avg_loss: 0.165520 +028570/063150, loss: 0.004033, avg_loss: 0.165496 +028575/063150, loss: 0.050360, avg_loss: 0.165476 +028580/063150, loss: 0.014290, avg_loss: 0.165455 +028585/063150, loss: 0.235777, avg_loss: 0.165436 +028590/063150, loss: 0.025435, avg_loss: 0.165420 +028595/063150, loss: 0.005397, avg_loss: 0.165393 +028600/063150, loss: 0.117717, avg_loss: 0.165375 +028605/063150, loss: 0.002037, avg_loss: 0.165353 +028610/063150, loss: 0.254127, avg_loss: 0.165336 +028615/063150, loss: 0.002838, avg_loss: 0.165313 +028620/063150, loss: 0.095126, avg_loss: 0.165302 +028625/063150, loss: 0.022436, avg_loss: 0.165277 +028630/063150, loss: 0.092213, avg_loss: 0.165263 +028635/063150, loss: 0.104061, avg_loss: 0.165247 +028640/063150, loss: 0.017404, avg_loss: 0.165228 +028645/063150, loss: 0.017034, avg_loss: 0.165212 +028650/063150, loss: 0.043462, avg_loss: 0.165200 +028655/063150, loss: 0.010030, avg_loss: 0.165176 +028660/063150, loss: 0.040578, avg_loss: 0.165153 +028665/063150, loss: 0.094064, avg_loss: 0.165130 +028670/063150, loss: 0.025566, avg_loss: 0.165115 +028675/063150, loss: 0.048855, avg_loss: 0.165093 +028680/063150, loss: 0.022516, avg_loss: 0.165068 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 28680/63150: {'accuracy': 0.8532110091743119} +028685/063150, loss: 0.001987, avg_loss: 0.165042 +028690/063150, loss: 0.036959, avg_loss: 0.165025 +028695/063150, loss: 0.153130, avg_loss: 0.165011 +028700/063150, loss: 0.072899, avg_loss: 0.164991 +028705/063150, loss: 0.058796, avg_loss: 0.164971 +028710/063150, loss: 0.013077, avg_loss: 0.164954 +028715/063150, loss: 0.009310, avg_loss: 0.164932 +028720/063150, loss: 0.111735, avg_loss: 0.164909 +028725/063150, loss: 0.034554, avg_loss: 0.164886 +028730/063150, loss: 0.075338, avg_loss: 0.164869 +028735/063150, loss: 0.016939, avg_loss: 0.164849 +028740/063150, loss: 0.016666, avg_loss: 0.164827 +028745/063150, loss: 0.130433, avg_loss: 0.164807 +028750/063150, loss: 0.019661, avg_loss: 0.164782 +028755/063150, loss: 0.025044, avg_loss: 0.164763 +028760/063150, loss: 0.006909, avg_loss: 0.164737 +028765/063150, loss: 0.008856, avg_loss: 0.164714 +028770/063150, loss: 0.006263, avg_loss: 0.164697 +028775/063150, loss: 0.006302, avg_loss: 0.164674 +028780/063150, loss: 0.100488, avg_loss: 0.164652 +028785/063150, loss: 0.048350, avg_loss: 0.164637 +028790/063150, loss: 0.008412, avg_loss: 0.164616 +028795/063150, loss: 0.071850, avg_loss: 0.164593 +028800/063150, loss: 0.001472, avg_loss: 0.164566 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 28800/63150: {'accuracy': 0.8509174311926605} +028805/063150, loss: 0.023727, avg_loss: 0.164541 +028810/063150, loss: 0.032263, avg_loss: 0.164523 +028815/063150, loss: 0.003074, avg_loss: 0.164503 +028820/063150, loss: 0.006534, avg_loss: 0.164480 +028825/063150, loss: 0.007323, avg_loss: 0.164460 +028830/063150, loss: 0.003284, avg_loss: 0.164445 +028835/063150, loss: 0.062052, avg_loss: 0.164433 +028840/063150, loss: 0.100321, avg_loss: 0.164413 +028845/063150, loss: 0.022897, avg_loss: 0.164394 +028850/063150, loss: 0.005485, avg_loss: 0.164375 +028855/063150, loss: 0.062385, avg_loss: 0.164369 +028860/063150, loss: 0.010690, avg_loss: 0.164346 +028865/063150, loss: 0.011008, avg_loss: 0.164321 +028870/063150, loss: 0.003571, avg_loss: 0.164306 +028875/063150, loss: 0.030304, avg_loss: 0.164284 +028880/063150, loss: 0.063094, avg_loss: 0.164268 +028885/063150, loss: 0.132826, avg_loss: 0.164255 +028890/063150, loss: 0.015198, avg_loss: 0.164232 +028895/063150, loss: 0.269952, avg_loss: 0.164218 +028900/063150, loss: 0.025004, avg_loss: 0.164201 +028905/063150, loss: 0.029775, avg_loss: 0.164180 +028910/063150, loss: 0.044002, avg_loss: 0.164157 +028915/063150, loss: 0.042488, avg_loss: 0.164139 +028920/063150, loss: 0.054601, avg_loss: 0.164117 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 28920/63150: {'accuracy': 0.8497706422018348} +028925/063150, loss: 0.025257, avg_loss: 0.164104 +028930/063150, loss: 0.036359, avg_loss: 0.164080 +028935/063150, loss: 0.026186, avg_loss: 0.164059 +028940/063150, loss: 0.058101, avg_loss: 0.164038 +028945/063150, loss: 0.041011, avg_loss: 0.164020 +028950/063150, loss: 0.075152, avg_loss: 0.164001 +028955/063150, loss: 0.080023, avg_loss: 0.163983 +028960/063150, loss: 0.060679, avg_loss: 0.163967 +028965/063150, loss: 0.011518, avg_loss: 0.163947 +028970/063150, loss: 0.114544, avg_loss: 0.163924 +028975/063150, loss: 0.004298, avg_loss: 0.163897 +028980/063150, loss: 0.030343, avg_loss: 0.163877 +028985/063150, loss: 0.020442, avg_loss: 0.163853 +028990/063150, loss: 0.016661, avg_loss: 0.163839 +028995/063150, loss: 0.016685, avg_loss: 0.163814 +029000/063150, loss: 0.006103, avg_loss: 0.163787 +029005/063150, loss: 0.034401, avg_loss: 0.163762 +029010/063150, loss: 0.139720, avg_loss: 0.163763 +029015/063150, loss: 0.024141, avg_loss: 0.163741 +029020/063150, loss: 0.013081, avg_loss: 0.163717 +029025/063150, loss: 0.002962, avg_loss: 0.163698 +029030/063150, loss: 0.061370, avg_loss: 0.163679 +029035/063150, loss: 0.029041, avg_loss: 0.163655 +029040/063150, loss: 0.026278, avg_loss: 0.163638 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 29040/63150: {'accuracy': 0.8646788990825688} +029045/063150, loss: 0.010010, avg_loss: 0.163617 +029050/063150, loss: 0.183986, avg_loss: 0.163604 +029055/063150, loss: 0.255172, avg_loss: 0.163589 +029060/063150, loss: 0.015355, avg_loss: 0.163568 +029065/063150, loss: 0.164820, avg_loss: 0.163550 +029070/063150, loss: 0.054091, avg_loss: 0.163526 +029075/063150, loss: 0.087639, avg_loss: 0.163505 +029080/063150, loss: 0.040989, avg_loss: 0.163486 +029085/063150, loss: 0.004775, avg_loss: 0.163464 +029090/063150, loss: 0.042325, avg_loss: 0.163445 +029095/063150, loss: 0.122965, avg_loss: 0.163424 +029100/063150, loss: 0.043591, avg_loss: 0.163401 +029105/063150, loss: 0.032209, avg_loss: 0.163377 +029110/063150, loss: 0.051691, avg_loss: 0.163355 +029115/063150, loss: 0.142066, avg_loss: 0.163339 +029120/063150, loss: 0.039663, avg_loss: 0.163320 +029125/063150, loss: 0.014815, avg_loss: 0.163298 +029130/063150, loss: 0.065197, avg_loss: 0.163277 +029135/063150, loss: 0.011303, avg_loss: 0.163263 +029140/063150, loss: 0.076263, avg_loss: 0.163243 +029145/063150, loss: 0.128071, avg_loss: 0.163221 +029150/063150, loss: 0.015133, avg_loss: 0.163200 +029155/063150, loss: 0.018022, avg_loss: 0.163188 +029160/063150, loss: 0.017963, avg_loss: 0.163169 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 29160/63150: {'accuracy': 0.8646788990825688} +029165/063150, loss: 0.042886, avg_loss: 0.163149 +029170/063150, loss: 0.005423, avg_loss: 0.163129 +029175/063150, loss: 0.022309, avg_loss: 0.163107 +029180/063150, loss: 0.008760, avg_loss: 0.163093 +029185/063150, loss: 0.009481, avg_loss: 0.163072 +029190/063150, loss: 0.007929, avg_loss: 0.163050 +029195/063150, loss: 0.107301, avg_loss: 0.163036 +029200/063150, loss: 0.022963, avg_loss: 0.163018 +029205/063150, loss: 0.084667, avg_loss: 0.163000 +029210/063150, loss: 0.051043, avg_loss: 0.162980 +029215/063150, loss: 0.038937, avg_loss: 0.162958 +029220/063150, loss: 0.033412, avg_loss: 0.162945 +029225/063150, loss: 0.005927, avg_loss: 0.162920 +029230/063150, loss: 0.027636, avg_loss: 0.162898 +029235/063150, loss: 0.126428, avg_loss: 0.162883 +029240/063150, loss: 0.039391, avg_loss: 0.162862 +029245/063150, loss: 0.037687, avg_loss: 0.162839 +029250/063150, loss: 0.134654, avg_loss: 0.162820 +029255/063150, loss: 0.003597, avg_loss: 0.162794 +029260/063150, loss: 0.071330, avg_loss: 0.162777 +029265/063150, loss: 0.106110, avg_loss: 0.162755 +029270/063150, loss: 0.133331, avg_loss: 0.162739 +029275/063150, loss: 0.019947, avg_loss: 0.162718 +029280/063150, loss: 0.001610, avg_loss: 0.162702 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 29280/63150: {'accuracy': 0.8543577981651376} +029285/063150, loss: 0.012514, avg_loss: 0.162681 +029290/063150, loss: 0.146907, avg_loss: 0.162680 +029295/063150, loss: 0.003723, avg_loss: 0.162656 +029300/063150, loss: 0.018421, avg_loss: 0.162638 +029305/063150, loss: 0.071663, avg_loss: 0.162615 +029310/063150, loss: 0.091821, avg_loss: 0.162597 +029315/063150, loss: 0.054160, avg_loss: 0.162575 +029320/063150, loss: 0.051946, avg_loss: 0.162557 +029325/063150, loss: 0.055123, avg_loss: 0.162533 +029330/063150, loss: 0.071310, avg_loss: 0.162516 +029335/063150, loss: 0.076138, avg_loss: 0.162500 +029340/063150, loss: 0.037010, avg_loss: 0.162480 +029345/063150, loss: 0.033757, avg_loss: 0.162458 +029350/063150, loss: 0.012211, avg_loss: 0.162439 +029355/063150, loss: 0.054922, avg_loss: 0.162416 +029360/063150, loss: 0.004186, avg_loss: 0.162396 +029365/063150, loss: 0.041707, avg_loss: 0.162375 +029370/063150, loss: 0.030861, avg_loss: 0.162351 +029375/063150, loss: 0.006072, avg_loss: 0.162333 +029380/063150, loss: 0.052050, avg_loss: 0.162312 +029385/063150, loss: 0.165506, avg_loss: 0.162295 +029390/063150, loss: 0.037339, avg_loss: 0.162273 +029395/063150, loss: 0.006619, avg_loss: 0.162255 +029400/063150, loss: 0.143072, avg_loss: 0.162241 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 13, step 29400/63150: {'accuracy': 0.856651376146789} +029405/063150, loss: 0.269459, avg_loss: 0.162227 +029410/063150, loss: 0.032460, avg_loss: 0.162202 +029415/063150, loss: 0.002599, avg_loss: 0.162185 +029420/063150, loss: 0.070231, avg_loss: 0.162175 +029425/063150, loss: 0.024736, avg_loss: 0.162151 +029430/063150, loss: 0.121802, avg_loss: 0.162130 +029435/063150, loss: 0.015743, avg_loss: 0.162109 +029440/063150, loss: 0.048686, avg_loss: 0.162088 +029445/063150, loss: 0.055546, avg_loss: 0.162066 +029450/063150, loss: 0.196122, avg_loss: 0.162052 +029455/063150, loss: 0.023091, avg_loss: 0.162033 +029460/063150, loss: 0.151750, avg_loss: 0.162017 +029465/063150, loss: 0.059146, avg_loss: 0.161999 +029470/063150, loss: 0.078730, avg_loss: 0.161980 +029475/063150, loss: 0.002122, avg_loss: 0.161958 +029480/063150, loss: 0.090604, avg_loss: 0.161939 +029485/063150, loss: 0.075021, avg_loss: 0.161916 +029490/063150, loss: 0.121685, avg_loss: 0.161902 +029495/063150, loss: 0.010097, avg_loss: 0.161879 +029500/063150, loss: 0.125670, avg_loss: 0.161860 +029505/063150, loss: 0.015610, avg_loss: 0.161837 +029510/063150, loss: 0.012012, avg_loss: 0.161810 +029515/063150, loss: 0.023708, avg_loss: 0.161790 +029520/063150, loss: 0.208061, avg_loss: 0.161771 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 29520/63150: {'accuracy': 0.8543577981651376} +029525/063150, loss: 0.029809, avg_loss: 0.161747 +029530/063150, loss: 0.158969, avg_loss: 0.161728 +029535/063150, loss: 0.006258, avg_loss: 0.161710 +029540/063150, loss: 0.026417, avg_loss: 0.161686 +029545/063150, loss: 0.055589, avg_loss: 0.161664 +029550/063150, loss: 0.040480, avg_loss: 0.161643 +029555/063150, loss: 0.004644, avg_loss: 0.161622 +029560/063150, loss: 0.002206, avg_loss: 0.161598 +029565/063150, loss: 0.003171, avg_loss: 0.161577 +029570/063150, loss: 0.230687, avg_loss: 0.161559 +029575/063150, loss: 0.017428, avg_loss: 0.161537 +029580/063150, loss: 0.006898, avg_loss: 0.161514 +029585/063150, loss: 0.002860, avg_loss: 0.161494 +029590/063150, loss: 0.042689, avg_loss: 0.161472 +029595/063150, loss: 0.030789, avg_loss: 0.161452 +029600/063150, loss: 0.001615, avg_loss: 0.161427 +029605/063150, loss: 0.019307, avg_loss: 0.161401 +029610/063150, loss: 0.156288, avg_loss: 0.161380 +029615/063150, loss: 0.001029, avg_loss: 0.161358 +029620/063150, loss: 0.036586, avg_loss: 0.161338 +029625/063150, loss: 0.031545, avg_loss: 0.161317 +029630/063150, loss: 0.001884, avg_loss: 0.161294 +029635/063150, loss: 0.001182, avg_loss: 0.161283 +029640/063150, loss: 0.018922, avg_loss: 0.161264 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 29640/63150: {'accuracy': 0.8520642201834863} +029645/063150, loss: 0.004678, avg_loss: 0.161242 +029650/063150, loss: 0.038651, avg_loss: 0.161218 +029655/063150, loss: 0.055575, avg_loss: 0.161198 +029660/063150, loss: 0.030203, avg_loss: 0.161174 +029665/063150, loss: 0.014210, avg_loss: 0.161152 +029670/063150, loss: 0.027082, avg_loss: 0.161129 +029675/063150, loss: 0.026579, avg_loss: 0.161108 +029680/063150, loss: 0.011516, avg_loss: 0.161084 +029685/063150, loss: 0.001934, avg_loss: 0.161062 +029690/063150, loss: 0.118375, avg_loss: 0.161042 +029695/063150, loss: 0.002767, avg_loss: 0.161024 +029700/063150, loss: 0.012189, avg_loss: 0.161009 +029705/063150, loss: 0.051321, avg_loss: 0.160991 +029710/063150, loss: 0.032166, avg_loss: 0.160975 +029715/063150, loss: 0.033893, avg_loss: 0.160953 +029720/063150, loss: 0.030671, avg_loss: 0.160931 +029725/063150, loss: 0.099010, avg_loss: 0.160913 +029730/063150, loss: 0.003936, avg_loss: 0.160891 +029735/063150, loss: 0.060138, avg_loss: 0.160868 +029740/063150, loss: 0.095448, avg_loss: 0.160853 +029745/063150, loss: 0.034447, avg_loss: 0.160829 +029750/063150, loss: 0.011397, avg_loss: 0.160807 +029755/063150, loss: 0.005566, avg_loss: 0.160782 +029760/063150, loss: 0.000922, avg_loss: 0.160757 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 29760/63150: {'accuracy': 0.8612385321100917} +029765/063150, loss: 0.091732, avg_loss: 0.160738 +029770/063150, loss: 0.002410, avg_loss: 0.160717 +029775/063150, loss: 0.003940, avg_loss: 0.160697 +029780/063150, loss: 0.006727, avg_loss: 0.160675 +029785/063150, loss: 0.065086, avg_loss: 0.160657 +029790/063150, loss: 0.032234, avg_loss: 0.160635 +029795/063150, loss: 0.025019, avg_loss: 0.160618 +029800/063150, loss: 0.004462, avg_loss: 0.160596 +029805/063150, loss: 0.038650, avg_loss: 0.160580 +029810/063150, loss: 0.044760, avg_loss: 0.160558 +029815/063150, loss: 0.002990, avg_loss: 0.160534 +029820/063150, loss: 0.006449, avg_loss: 0.160512 +029825/063150, loss: 0.007298, avg_loss: 0.160491 +029830/063150, loss: 0.026802, avg_loss: 0.160466 +029835/063150, loss: 0.014093, avg_loss: 0.160441 +029840/063150, loss: 0.023324, avg_loss: 0.160415 +029845/063150, loss: 0.063629, avg_loss: 0.160402 +029850/063150, loss: 0.009189, avg_loss: 0.160377 +029855/063150, loss: 0.006470, avg_loss: 0.160355 +029860/063150, loss: 0.037101, avg_loss: 0.160332 +029865/063150, loss: 0.037977, avg_loss: 0.160310 +029870/063150, loss: 0.001203, avg_loss: 0.160292 +029875/063150, loss: 0.004675, avg_loss: 0.160271 +029880/063150, loss: 0.019148, avg_loss: 0.160248 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 29880/63150: {'accuracy': 0.8348623853211009} +029885/063150, loss: 0.229325, avg_loss: 0.160231 +029890/063150, loss: 0.014964, avg_loss: 0.160208 +029895/063150, loss: 0.010728, avg_loss: 0.160186 +029900/063150, loss: 0.006701, avg_loss: 0.160164 +029905/063150, loss: 0.136334, avg_loss: 0.160146 +029910/063150, loss: 0.005484, avg_loss: 0.160127 +029915/063150, loss: 0.028629, avg_loss: 0.160106 +029920/063150, loss: 0.055188, avg_loss: 0.160088 +029925/063150, loss: 0.005606, avg_loss: 0.160066 +029930/063150, loss: 0.023008, avg_loss: 0.160046 +029935/063150, loss: 0.003997, avg_loss: 0.160027 +029940/063150, loss: 0.046418, avg_loss: 0.160011 +029945/063150, loss: 0.003882, avg_loss: 0.159988 +029950/063150, loss: 0.020722, avg_loss: 0.159968 +029955/063150, loss: 0.267843, avg_loss: 0.159955 +029960/063150, loss: 0.033335, avg_loss: 0.159934 +029965/063150, loss: 0.016816, avg_loss: 0.159913 +029970/063150, loss: 0.105205, avg_loss: 0.159897 +029975/063150, loss: 0.032642, avg_loss: 0.159881 +029980/063150, loss: 0.017649, avg_loss: 0.159859 +029985/063150, loss: 0.006750, avg_loss: 0.159838 +029990/063150, loss: 0.051170, avg_loss: 0.159816 +029995/063150, loss: 0.065801, avg_loss: 0.159797 +030000/063150, loss: 0.047385, avg_loss: 0.159777 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30000/63150: {'accuracy': 0.856651376146789} +030005/063150, loss: 0.011315, avg_loss: 0.159752 +030010/063150, loss: 0.001336, avg_loss: 0.159730 +030015/063150, loss: 0.081847, avg_loss: 0.159716 +030020/063150, loss: 0.030773, avg_loss: 0.159693 +030025/063150, loss: 0.070896, avg_loss: 0.159671 +030030/063150, loss: 0.002881, avg_loss: 0.159648 +030035/063150, loss: 0.019054, avg_loss: 0.159624 +030040/063150, loss: 0.074761, avg_loss: 0.159605 +030045/063150, loss: 0.033359, avg_loss: 0.159583 +030050/063150, loss: 0.024879, avg_loss: 0.159564 +030055/063150, loss: 0.014777, avg_loss: 0.159541 +030060/063150, loss: 0.001530, avg_loss: 0.159534 +030065/063150, loss: 0.008303, avg_loss: 0.159512 +030070/063150, loss: 0.004331, avg_loss: 0.159488 +030075/063150, loss: 0.002786, avg_loss: 0.159465 +030080/063150, loss: 0.010111, avg_loss: 0.159443 +030085/063150, loss: 0.169485, avg_loss: 0.159428 +030090/063150, loss: 0.022356, avg_loss: 0.159411 +030095/063150, loss: 0.008158, avg_loss: 0.159392 +030100/063150, loss: 0.128152, avg_loss: 0.159378 +030105/063150, loss: 0.219819, avg_loss: 0.159362 +030110/063150, loss: 0.015893, avg_loss: 0.159344 +030115/063150, loss: 0.011225, avg_loss: 0.159324 +030120/063150, loss: 0.006620, avg_loss: 0.159299 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30120/63150: {'accuracy': 0.8509174311926605} +030125/063150, loss: 0.000971, avg_loss: 0.159276 +030130/063150, loss: 0.035606, avg_loss: 0.159253 +030135/063150, loss: 0.002767, avg_loss: 0.159232 +030140/063150, loss: 0.009203, avg_loss: 0.159208 +030145/063150, loss: 0.018383, avg_loss: 0.159183 +030150/063150, loss: 0.107643, avg_loss: 0.159164 +030155/063150, loss: 0.077340, avg_loss: 0.159146 +030160/063150, loss: 0.008419, avg_loss: 0.159139 +030165/063150, loss: 0.065853, avg_loss: 0.159117 +030170/063150, loss: 0.015150, avg_loss: 0.159097 +030175/063150, loss: 0.004214, avg_loss: 0.159074 +030180/063150, loss: 0.083018, avg_loss: 0.159052 +030185/063150, loss: 0.049122, avg_loss: 0.159032 +030190/063150, loss: 0.043142, avg_loss: 0.159028 +030195/063150, loss: 0.003641, avg_loss: 0.159006 +030200/063150, loss: 0.018801, avg_loss: 0.158997 +030205/063150, loss: 0.001541, avg_loss: 0.158979 +030210/063150, loss: 0.002915, avg_loss: 0.158959 +030215/063150, loss: 0.048952, avg_loss: 0.158944 +030220/063150, loss: 0.008590, avg_loss: 0.158925 +030225/063150, loss: 0.118306, avg_loss: 0.158908 +030230/063150, loss: 0.113757, avg_loss: 0.158888 +030235/063150, loss: 0.114253, avg_loss: 0.158869 +030240/063150, loss: 0.015734, avg_loss: 0.158852 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30240/63150: {'accuracy': 0.8555045871559633} +030245/063150, loss: 0.015323, avg_loss: 0.158841 +030250/063150, loss: 0.052867, avg_loss: 0.158823 +030255/063150, loss: 0.002096, avg_loss: 0.158806 +030260/063150, loss: 0.003224, avg_loss: 0.158787 +030265/063150, loss: 0.028060, avg_loss: 0.158766 +030270/063150, loss: 0.010054, avg_loss: 0.158749 +030275/063150, loss: 0.002811, avg_loss: 0.158727 +030280/063150, loss: 0.021772, avg_loss: 0.158711 +030285/063150, loss: 0.018648, avg_loss: 0.158694 +030290/063150, loss: 0.051021, avg_loss: 0.158673 +030295/063150, loss: 0.017929, avg_loss: 0.158651 +030300/063150, loss: 0.206296, avg_loss: 0.158641 +030305/063150, loss: 0.028120, avg_loss: 0.158626 +030310/063150, loss: 0.025317, avg_loss: 0.158608 +030315/063150, loss: 0.247147, avg_loss: 0.158598 +030320/063150, loss: 0.010860, avg_loss: 0.158575 +030325/063150, loss: 0.058276, avg_loss: 0.158556 +030330/063150, loss: 0.030149, avg_loss: 0.158533 +030335/063150, loss: 0.004202, avg_loss: 0.158515 +030340/063150, loss: 0.005641, avg_loss: 0.158491 +030345/063150, loss: 0.095417, avg_loss: 0.158480 +030350/063150, loss: 0.013658, avg_loss: 0.158464 +030355/063150, loss: 0.059661, avg_loss: 0.158443 +030360/063150, loss: 0.032731, avg_loss: 0.158423 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30360/63150: {'accuracy': 0.8497706422018348} +030365/063150, loss: 0.002510, avg_loss: 0.158402 +030370/063150, loss: 0.149434, avg_loss: 0.158384 +030375/063150, loss: 0.001370, avg_loss: 0.158359 +030380/063150, loss: 0.085941, avg_loss: 0.158343 +030385/063150, loss: 0.009599, avg_loss: 0.158323 +030390/063150, loss: 0.002361, avg_loss: 0.158310 +030395/063150, loss: 0.000963, avg_loss: 0.158293 +030400/063150, loss: 0.004199, avg_loss: 0.158274 +030405/063150, loss: 0.001675, avg_loss: 0.158251 +030410/063150, loss: 0.017970, avg_loss: 0.158228 +030415/063150, loss: 0.041820, avg_loss: 0.158211 +030420/063150, loss: 0.032792, avg_loss: 0.158192 +030425/063150, loss: 0.153325, avg_loss: 0.158174 +030430/063150, loss: 0.006793, avg_loss: 0.158157 +030435/063150, loss: 0.005587, avg_loss: 0.158133 +030440/063150, loss: 0.032403, avg_loss: 0.158111 +030445/063150, loss: 0.012293, avg_loss: 0.158087 +030450/063150, loss: 0.036248, avg_loss: 0.158068 +030455/063150, loss: 0.018853, avg_loss: 0.158044 +030460/063150, loss: 0.061686, avg_loss: 0.158028 +030465/063150, loss: 0.098958, avg_loss: 0.158013 +030470/063150, loss: 0.027358, avg_loss: 0.157994 +030475/063150, loss: 0.038179, avg_loss: 0.157972 +030480/063150, loss: 0.008890, avg_loss: 0.157951 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30480/63150: {'accuracy': 0.8543577981651376} +030485/063150, loss: 0.003946, avg_loss: 0.157934 +030490/063150, loss: 0.014210, avg_loss: 0.157910 +030495/063150, loss: 0.001265, avg_loss: 0.157889 +030500/063150, loss: 0.106211, avg_loss: 0.157878 +030505/063150, loss: 0.008263, avg_loss: 0.157856 +030510/063150, loss: 0.085025, avg_loss: 0.157839 +030515/063150, loss: 0.072008, avg_loss: 0.157821 +030520/063150, loss: 0.001404, avg_loss: 0.157799 +030525/063150, loss: 0.012265, avg_loss: 0.157777 +030530/063150, loss: 0.147890, avg_loss: 0.157761 +030535/063150, loss: 0.039767, avg_loss: 0.157739 +030540/063150, loss: 0.005405, avg_loss: 0.157717 +030545/063150, loss: 0.006588, avg_loss: 0.157700 +030550/063150, loss: 0.018616, avg_loss: 0.157681 +030555/063150, loss: 0.057121, avg_loss: 0.157661 +030560/063150, loss: 0.099701, avg_loss: 0.157640 +030565/063150, loss: 0.036851, avg_loss: 0.157623 +030570/063150, loss: 0.047026, avg_loss: 0.157606 +030575/063150, loss: 0.004419, avg_loss: 0.157582 +030580/063150, loss: 0.044898, avg_loss: 0.157565 +030585/063150, loss: 0.021919, avg_loss: 0.157550 +030590/063150, loss: 0.003510, avg_loss: 0.157530 +030595/063150, loss: 0.010405, avg_loss: 0.157510 +030600/063150, loss: 0.039771, avg_loss: 0.157491 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30600/63150: {'accuracy': 0.8589449541284404} +030605/063150, loss: 0.036015, avg_loss: 0.157473 +030610/063150, loss: 0.025019, avg_loss: 0.157452 +030615/063150, loss: 0.002405, avg_loss: 0.157429 +030620/063150, loss: 0.002226, avg_loss: 0.157408 +030625/063150, loss: 0.081129, avg_loss: 0.157390 +030630/063150, loss: 0.011506, avg_loss: 0.157372 +030635/063150, loss: 0.014530, avg_loss: 0.157363 +030640/063150, loss: 0.032427, avg_loss: 0.157342 +030645/063150, loss: 0.008573, avg_loss: 0.157320 +030650/063150, loss: 0.002463, avg_loss: 0.157299 +030655/063150, loss: 0.024388, avg_loss: 0.157276 +030660/063150, loss: 0.006331, avg_loss: 0.157261 +030665/063150, loss: 0.010446, avg_loss: 0.157238 +030670/063150, loss: 0.018412, avg_loss: 0.157218 +030675/063150, loss: 0.008687, avg_loss: 0.157204 +030680/063150, loss: 0.071305, avg_loss: 0.157186 +030685/063150, loss: 0.032300, avg_loss: 0.157171 +030690/063150, loss: 0.037697, avg_loss: 0.157151 +030695/063150, loss: 0.005659, avg_loss: 0.157130 +030700/063150, loss: 0.230399, avg_loss: 0.157121 +030705/063150, loss: 0.127813, avg_loss: 0.157103 +030710/063150, loss: 0.002888, avg_loss: 0.157086 +030715/063150, loss: 0.022934, avg_loss: 0.157068 +030720/063150, loss: 0.006306, avg_loss: 0.157045 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30720/63150: {'accuracy': 0.856651376146789} +030725/063150, loss: 0.005108, avg_loss: 0.157027 +030730/063150, loss: 0.061327, avg_loss: 0.157007 +030735/063150, loss: 0.053550, avg_loss: 0.156997 +030740/063150, loss: 0.017894, avg_loss: 0.156974 +030745/063150, loss: 0.003548, avg_loss: 0.156951 +030750/063150, loss: 0.001552, avg_loss: 0.156928 +030755/063150, loss: 0.062007, avg_loss: 0.156906 +030760/063150, loss: 0.158605, avg_loss: 0.156890 +030765/063150, loss: 0.070497, avg_loss: 0.156876 +030770/063150, loss: 0.261261, avg_loss: 0.156860 +030775/063150, loss: 0.075505, avg_loss: 0.156842 +030780/063150, loss: 0.015446, avg_loss: 0.156827 +030785/063150, loss: 0.017108, avg_loss: 0.156806 +030790/063150, loss: 0.030346, avg_loss: 0.156789 +030795/063150, loss: 0.008710, avg_loss: 0.156768 +030800/063150, loss: 0.003268, avg_loss: 0.156745 +030805/063150, loss: 0.055026, avg_loss: 0.156731 +030810/063150, loss: 0.009798, avg_loss: 0.156718 +030815/063150, loss: 0.003267, avg_loss: 0.156697 +030820/063150, loss: 0.228870, avg_loss: 0.156687 +030825/063150, loss: 0.048969, avg_loss: 0.156664 +030830/063150, loss: 0.052599, avg_loss: 0.156642 +030835/063150, loss: 0.003540, avg_loss: 0.156623 +030840/063150, loss: 0.035328, avg_loss: 0.156602 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30840/63150: {'accuracy': 0.8509174311926605} +030845/063150, loss: 0.253468, avg_loss: 0.156588 +030850/063150, loss: 0.004320, avg_loss: 0.156566 +030855/063150, loss: 0.027654, avg_loss: 0.156546 +030860/063150, loss: 0.040192, avg_loss: 0.156526 +030865/063150, loss: 0.136494, avg_loss: 0.156516 +030870/063150, loss: 0.032073, avg_loss: 0.156494 +030875/063150, loss: 0.008085, avg_loss: 0.156472 +030880/063150, loss: 0.032437, avg_loss: 0.156449 +030885/063150, loss: 0.003916, avg_loss: 0.156433 +030890/063150, loss: 0.001516, avg_loss: 0.156409 +030895/063150, loss: 0.026923, avg_loss: 0.156388 +030900/063150, loss: 0.060765, avg_loss: 0.156376 +030905/063150, loss: 0.028656, avg_loss: 0.156358 +030910/063150, loss: 0.000778, avg_loss: 0.156347 +030915/063150, loss: 0.040568, avg_loss: 0.156324 +030920/063150, loss: 0.001278, avg_loss: 0.156306 +030925/063150, loss: 0.002350, avg_loss: 0.156285 +030930/063150, loss: 0.009146, avg_loss: 0.156262 +030935/063150, loss: 0.007925, avg_loss: 0.156242 +030940/063150, loss: 0.023785, avg_loss: 0.156223 +030945/063150, loss: 0.146085, avg_loss: 0.156207 +030950/063150, loss: 0.115348, avg_loss: 0.156186 +030955/063150, loss: 0.046772, avg_loss: 0.156168 +030960/063150, loss: 0.047580, avg_loss: 0.156150 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 30960/63150: {'accuracy': 0.8486238532110092} +030965/063150, loss: 0.001757, avg_loss: 0.156129 +030970/063150, loss: 0.004208, avg_loss: 0.156107 +030975/063150, loss: 0.057633, avg_loss: 0.156091 +030980/063150, loss: 0.053134, avg_loss: 0.156070 +030985/063150, loss: 0.006671, avg_loss: 0.156047 +030990/063150, loss: 0.030251, avg_loss: 0.156024 +030995/063150, loss: 0.015772, avg_loss: 0.156007 +031000/063150, loss: 0.051082, avg_loss: 0.155995 +031005/063150, loss: 0.001429, avg_loss: 0.155975 +031010/063150, loss: 0.010632, avg_loss: 0.155954 +031015/063150, loss: 0.067887, avg_loss: 0.155934 +031020/063150, loss: 0.011314, avg_loss: 0.155911 +031025/063150, loss: 0.060011, avg_loss: 0.155891 +031030/063150, loss: 0.017256, avg_loss: 0.155869 +031035/063150, loss: 0.004272, avg_loss: 0.155846 +031040/063150, loss: 0.000855, avg_loss: 0.155826 +031045/063150, loss: 0.070024, avg_loss: 0.155811 +031050/063150, loss: 0.008011, avg_loss: 0.155798 +031055/063150, loss: 0.042606, avg_loss: 0.155782 +031060/063150, loss: 0.073933, avg_loss: 0.155774 +031065/063150, loss: 0.020848, avg_loss: 0.155752 +031070/063150, loss: 0.095692, avg_loss: 0.155740 +031075/063150, loss: 0.103942, avg_loss: 0.155729 +031080/063150, loss: 0.039420, avg_loss: 0.155713 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 31080/63150: {'accuracy': 0.8497706422018348} +031085/063150, loss: 0.079751, avg_loss: 0.155696 +031090/063150, loss: 0.045403, avg_loss: 0.155675 +031095/063150, loss: 0.007622, avg_loss: 0.155654 +031100/063150, loss: 0.024870, avg_loss: 0.155640 +031105/063150, loss: 0.002540, avg_loss: 0.155622 +031110/063150, loss: 0.031332, avg_loss: 0.155608 +031115/063150, loss: 0.016780, avg_loss: 0.155594 +031120/063150, loss: 0.061048, avg_loss: 0.155579 +031125/063150, loss: 0.020524, avg_loss: 0.155558 +031130/063150, loss: 0.014199, avg_loss: 0.155539 +031135/063150, loss: 0.015381, avg_loss: 0.155515 +031140/063150, loss: 0.003200, avg_loss: 0.155496 +031145/063150, loss: 0.002946, avg_loss: 0.155478 +031150/063150, loss: 0.018285, avg_loss: 0.155455 +031155/063150, loss: 0.003634, avg_loss: 0.155433 +031160/063150, loss: 0.016002, avg_loss: 0.155417 +031165/063150, loss: 0.005283, avg_loss: 0.155396 +031170/063150, loss: 0.081333, avg_loss: 0.155377 +031175/063150, loss: 0.047920, avg_loss: 0.155357 +031180/063150, loss: 0.004385, avg_loss: 0.155335 +031185/063150, loss: 0.007167, avg_loss: 0.155315 +031190/063150, loss: 0.017125, avg_loss: 0.155301 +031195/063150, loss: 0.070240, avg_loss: 0.155288 +031200/063150, loss: 0.020473, avg_loss: 0.155270 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 31200/63150: {'accuracy': 0.8486238532110092} +031205/063150, loss: 0.043037, avg_loss: 0.155253 +031210/063150, loss: 0.006261, avg_loss: 0.155229 +031215/063150, loss: 0.001673, avg_loss: 0.155210 +031220/063150, loss: 0.008318, avg_loss: 0.155189 +031225/063150, loss: 0.008910, avg_loss: 0.155175 +031230/063150, loss: 0.007713, avg_loss: 0.155154 +031235/063150, loss: 0.068322, avg_loss: 0.155134 +031240/063150, loss: 0.037572, avg_loss: 0.155120 +031245/063150, loss: 0.002862, avg_loss: 0.155104 +031250/063150, loss: 0.002596, avg_loss: 0.155091 +031255/063150, loss: 0.076086, avg_loss: 0.155072 +031260/063150, loss: 0.046203, avg_loss: 0.155056 +031265/063150, loss: 0.008974, avg_loss: 0.155036 +031270/063150, loss: 0.024836, avg_loss: 0.155013 +031275/063150, loss: 0.042243, avg_loss: 0.154996 +031280/063150, loss: 0.020888, avg_loss: 0.154977 +031285/063150, loss: 0.003390, avg_loss: 0.154959 +031290/063150, loss: 0.026576, avg_loss: 0.154938 +031295/063150, loss: 0.223176, avg_loss: 0.154928 +031300/063150, loss: 0.061455, avg_loss: 0.154913 +031305/063150, loss: 0.107567, avg_loss: 0.154896 +031310/063150, loss: 0.029950, avg_loss: 0.154878 +031315/063150, loss: 0.010695, avg_loss: 0.154859 +031320/063150, loss: 0.011212, avg_loss: 0.154841 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 31320/63150: {'accuracy': 0.8440366972477065} +031325/063150, loss: 0.024456, avg_loss: 0.154826 +031330/063150, loss: 0.057724, avg_loss: 0.154810 +031335/063150, loss: 0.012927, avg_loss: 0.154794 +031340/063150, loss: 0.002259, avg_loss: 0.154781 +031345/063150, loss: 0.018965, avg_loss: 0.154759 +031350/063150, loss: 0.019749, avg_loss: 0.154739 +031355/063150, loss: 0.034422, avg_loss: 0.154720 +031360/063150, loss: 0.012534, avg_loss: 0.154699 +031365/063150, loss: 0.026447, avg_loss: 0.154683 +031370/063150, loss: 0.161248, avg_loss: 0.154674 +031375/063150, loss: 0.037981, avg_loss: 0.154656 +031380/063150, loss: 0.037271, avg_loss: 0.154635 +031385/063150, loss: 0.049310, avg_loss: 0.154615 +031390/063150, loss: 0.015623, avg_loss: 0.154597 +031395/063150, loss: 0.038250, avg_loss: 0.154579 +031400/063150, loss: 0.021816, avg_loss: 0.154557 +031405/063150, loss: 0.018133, avg_loss: 0.154537 +031410/063150, loss: 0.009122, avg_loss: 0.154521 +031415/063150, loss: 0.026153, avg_loss: 0.154502 +031420/063150, loss: 0.002189, avg_loss: 0.154481 +031425/063150, loss: 0.025579, avg_loss: 0.154459 +031430/063150, loss: 0.062954, avg_loss: 0.154443 +031435/063150, loss: 0.015800, avg_loss: 0.154426 +031440/063150, loss: 0.143509, avg_loss: 0.154408 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 31440/63150: {'accuracy': 0.8555045871559633} +031445/063150, loss: 0.022480, avg_loss: 0.154388 +031450/063150, loss: 0.024612, avg_loss: 0.154367 +031455/063150, loss: 0.093458, avg_loss: 0.154359 +031460/063150, loss: 0.006582, avg_loss: 0.154341 +031465/063150, loss: 0.003654, avg_loss: 0.154322 +031470/063150, loss: 0.008506, avg_loss: 0.154314 +031475/063150, loss: 0.025396, avg_loss: 0.154295 +031480/063150, loss: 0.011642, avg_loss: 0.154279 +031485/063150, loss: 0.038146, avg_loss: 0.154260 +031490/063150, loss: 0.011713, avg_loss: 0.154243 +031495/063150, loss: 0.003430, avg_loss: 0.154222 +031500/063150, loss: 0.106159, avg_loss: 0.154207 +031505/063150, loss: 0.032232, avg_loss: 0.154186 +031510/063150, loss: 0.012164, avg_loss: 0.154167 +031515/063150, loss: 0.001131, avg_loss: 0.154148 +031520/063150, loss: 0.018311, avg_loss: 0.154134 +031525/063150, loss: 0.222614, avg_loss: 0.154119 +031530/063150, loss: 0.067950, avg_loss: 0.154101 +031535/063150, loss: 0.002931, avg_loss: 0.154081 +031540/063150, loss: 0.037557, avg_loss: 0.154062 +031545/063150, loss: 0.054188, avg_loss: 0.154045 +031550/063150, loss: 0.033414, avg_loss: 0.154022 +031555/063150, loss: 0.044104, avg_loss: 0.154006 +031560/063150, loss: 0.139939, avg_loss: 0.153988 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 14, step 31560/63150: {'accuracy': 0.8555045871559633} +031565/063150, loss: 0.069428, avg_loss: 0.153969 +031570/063150, loss: 0.048148, avg_loss: 0.153949 +031575/063150, loss: 0.017297, avg_loss: 0.153933 +031580/063150, loss: 0.215259, avg_loss: 0.153925 +031585/063150, loss: 0.006609, avg_loss: 0.153902 +031590/063150, loss: 0.011902, avg_loss: 0.153883 +031595/063150, loss: 0.012367, avg_loss: 0.153866 +031600/063150, loss: 0.079808, avg_loss: 0.153848 +031605/063150, loss: 0.017448, avg_loss: 0.153827 +031610/063150, loss: 0.004014, avg_loss: 0.153806 +031615/063150, loss: 0.050524, avg_loss: 0.153787 +031620/063150, loss: 0.003370, avg_loss: 0.153766 +031625/063150, loss: 0.014273, avg_loss: 0.153750 +031630/063150, loss: 0.037066, avg_loss: 0.153731 +031635/063150, loss: 0.022845, avg_loss: 0.153709 +031640/063150, loss: 0.223372, avg_loss: 0.153694 +031645/063150, loss: 0.006216, avg_loss: 0.153673 +031650/063150, loss: 0.020023, avg_loss: 0.153654 +031655/063150, loss: 0.019544, avg_loss: 0.153632 +031660/063150, loss: 0.001398, avg_loss: 0.153611 +031665/063150, loss: 0.000737, avg_loss: 0.153588 +031670/063150, loss: 0.003803, avg_loss: 0.153569 +031675/063150, loss: 0.003401, avg_loss: 0.153549 +031680/063150, loss: 0.003137, avg_loss: 0.153528 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 31680/63150: {'accuracy': 0.8451834862385321} +031685/063150, loss: 0.007107, avg_loss: 0.153509 +031690/063150, loss: 0.000546, avg_loss: 0.153487 +031695/063150, loss: 0.029930, avg_loss: 0.153466 +031700/063150, loss: 0.010998, avg_loss: 0.153443 +031705/063150, loss: 0.007357, avg_loss: 0.153426 +031710/063150, loss: 0.001457, avg_loss: 0.153406 +031715/063150, loss: 0.002038, avg_loss: 0.153384 +031720/063150, loss: 0.018246, avg_loss: 0.153362 +031725/063150, loss: 0.062755, avg_loss: 0.153349 +031730/063150, loss: 0.029712, avg_loss: 0.153329 +031735/063150, loss: 0.002405, avg_loss: 0.153309 +031740/063150, loss: 0.023896, avg_loss: 0.153296 +031745/063150, loss: 0.225884, avg_loss: 0.153281 +031750/063150, loss: 0.004821, avg_loss: 0.153262 +031755/063150, loss: 0.021150, avg_loss: 0.153242 +031760/063150, loss: 0.071390, avg_loss: 0.153223 +031765/063150, loss: 0.035106, avg_loss: 0.153204 +031770/063150, loss: 0.001874, avg_loss: 0.153187 +031775/063150, loss: 0.014004, avg_loss: 0.153170 +031780/063150, loss: 0.078600, avg_loss: 0.153149 +031785/063150, loss: 0.036884, avg_loss: 0.153129 +031790/063150, loss: 0.012744, avg_loss: 0.153108 +031795/063150, loss: 0.005733, avg_loss: 0.153090 +031800/063150, loss: 0.260414, avg_loss: 0.153080 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 31800/63150: {'accuracy': 0.8543577981651376} +031805/063150, loss: 0.014929, avg_loss: 0.153064 +031810/063150, loss: 0.009144, avg_loss: 0.153042 +031815/063150, loss: 0.055491, avg_loss: 0.153022 +031820/063150, loss: 0.065123, avg_loss: 0.153006 +031825/063150, loss: 0.009100, avg_loss: 0.152984 +031830/063150, loss: 0.122980, avg_loss: 0.152968 +031835/063150, loss: 0.118446, avg_loss: 0.152954 +031840/063150, loss: 0.000656, avg_loss: 0.152932 +031845/063150, loss: 0.150223, avg_loss: 0.152917 +031850/063150, loss: 0.139656, avg_loss: 0.152903 +031855/063150, loss: 0.012258, avg_loss: 0.152886 +031860/063150, loss: 0.054694, avg_loss: 0.152870 +031865/063150, loss: 0.014799, avg_loss: 0.152848 +031870/063150, loss: 0.005471, avg_loss: 0.152827 +031875/063150, loss: 0.005437, avg_loss: 0.152812 +031880/063150, loss: 0.006934, avg_loss: 0.152792 +031885/063150, loss: 0.033596, avg_loss: 0.152770 +031890/063150, loss: 0.013615, avg_loss: 0.152750 +031895/063150, loss: 0.005589, avg_loss: 0.152731 +031900/063150, loss: 0.066042, avg_loss: 0.152715 +031905/063150, loss: 0.010067, avg_loss: 0.152696 +031910/063150, loss: 0.005534, avg_loss: 0.152678 +031915/063150, loss: 0.031302, avg_loss: 0.152659 +031920/063150, loss: 0.015648, avg_loss: 0.152647 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 31920/63150: {'accuracy': 0.856651376146789} +031925/063150, loss: 0.004087, avg_loss: 0.152628 +031930/063150, loss: 0.012033, avg_loss: 0.152606 +031935/063150, loss: 0.132375, avg_loss: 0.152587 +031940/063150, loss: 0.019506, avg_loss: 0.152567 +031945/063150, loss: 0.006165, avg_loss: 0.152546 +031950/063150, loss: 0.023628, avg_loss: 0.152526 +031955/063150, loss: 0.029892, avg_loss: 0.152509 +031960/063150, loss: 0.002362, avg_loss: 0.152489 +031965/063150, loss: 0.088375, avg_loss: 0.152477 +031970/063150, loss: 0.103584, avg_loss: 0.152460 +031975/063150, loss: 0.006373, avg_loss: 0.152443 +031980/063150, loss: 0.004424, avg_loss: 0.152429 +031985/063150, loss: 0.005649, avg_loss: 0.152414 +031990/063150, loss: 0.004369, avg_loss: 0.152393 +031995/063150, loss: 0.003095, avg_loss: 0.152369 +032000/063150, loss: 0.003324, avg_loss: 0.152346 +032005/063150, loss: 0.007763, avg_loss: 0.152327 +032010/063150, loss: 0.002517, avg_loss: 0.152310 +032015/063150, loss: 0.008595, avg_loss: 0.152290 +032020/063150, loss: 0.004567, avg_loss: 0.152270 +032025/063150, loss: 0.001465, avg_loss: 0.152249 +032030/063150, loss: 0.003452, avg_loss: 0.152227 +032035/063150, loss: 0.001302, avg_loss: 0.152207 +032040/063150, loss: 0.014194, avg_loss: 0.152194 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 32040/63150: {'accuracy': 0.8555045871559633} +032045/063150, loss: 0.046692, avg_loss: 0.152179 +032050/063150, loss: 0.014825, avg_loss: 0.152162 +032055/063150, loss: 0.096797, avg_loss: 0.152145 +032060/063150, loss: 0.039867, avg_loss: 0.152127 +032065/063150, loss: 0.004809, avg_loss: 0.152105 +032070/063150, loss: 0.074303, avg_loss: 0.152085 +032075/063150, loss: 0.021751, avg_loss: 0.152063 +032080/063150, loss: 0.069260, avg_loss: 0.152045 +032085/063150, loss: 0.020678, avg_loss: 0.152023 +032090/063150, loss: 0.063975, avg_loss: 0.152004 +032095/063150, loss: 0.003220, avg_loss: 0.151983 +032100/063150, loss: 0.014715, avg_loss: 0.151964 +032105/063150, loss: 0.004824, avg_loss: 0.151947 +032110/063150, loss: 0.166059, avg_loss: 0.151934 +032115/063150, loss: 0.011776, avg_loss: 0.151914 +032120/063150, loss: 0.001186, avg_loss: 0.151897 +032125/063150, loss: 0.003742, avg_loss: 0.151875 +032130/063150, loss: 0.009916, avg_loss: 0.151852 +032135/063150, loss: 0.003336, avg_loss: 0.151831 +032140/063150, loss: 0.033249, avg_loss: 0.151830 +032145/063150, loss: 0.048928, avg_loss: 0.151817 +032150/063150, loss: 0.006084, avg_loss: 0.151797 +032155/063150, loss: 0.050117, avg_loss: 0.151781 +032160/063150, loss: 0.009630, avg_loss: 0.151763 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 32160/63150: {'accuracy': 0.8577981651376146} +032165/063150, loss: 0.082680, avg_loss: 0.151745 +032170/063150, loss: 0.006695, avg_loss: 0.151725 +032175/063150, loss: 0.101264, avg_loss: 0.151710 +032180/063150, loss: 0.037066, avg_loss: 0.151693 +032185/063150, loss: 0.001105, avg_loss: 0.151671 +032190/063150, loss: 0.036753, avg_loss: 0.151651 +032195/063150, loss: 0.001140, avg_loss: 0.151638 +032200/063150, loss: 0.080171, avg_loss: 0.151621 +032205/063150, loss: 0.024211, avg_loss: 0.151607 +032210/063150, loss: 0.019681, avg_loss: 0.151589 +032215/063150, loss: 0.068347, avg_loss: 0.151570 +032220/063150, loss: 0.052649, avg_loss: 0.151551 +032225/063150, loss: 0.117048, avg_loss: 0.151533 +032230/063150, loss: 0.061031, avg_loss: 0.151519 +032235/063150, loss: 0.031470, avg_loss: 0.151498 +032240/063150, loss: 0.004966, avg_loss: 0.151483 +032245/063150, loss: 0.010349, avg_loss: 0.151467 +032250/063150, loss: 0.028626, avg_loss: 0.151450 +032255/063150, loss: 0.022142, avg_loss: 0.151434 +032260/063150, loss: 0.040613, avg_loss: 0.151415 +032265/063150, loss: 0.032994, avg_loss: 0.151396 +032270/063150, loss: 0.002019, avg_loss: 0.151375 +032275/063150, loss: 0.005279, avg_loss: 0.151356 +032280/063150, loss: 0.212202, avg_loss: 0.151347 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 32280/63150: {'accuracy': 0.8532110091743119} +032285/063150, loss: 0.012638, avg_loss: 0.151324 +032290/063150, loss: 0.064646, avg_loss: 0.151308 +032295/063150, loss: 0.081643, avg_loss: 0.151295 +032300/063150, loss: 0.003165, avg_loss: 0.151274 +032305/063150, loss: 0.011685, avg_loss: 0.151254 +032310/063150, loss: 0.051202, avg_loss: 0.151238 +032315/063150, loss: 0.044343, avg_loss: 0.151217 +032320/063150, loss: 0.002301, avg_loss: 0.151197 +032325/063150, loss: 0.001429, avg_loss: 0.151179 +032330/063150, loss: 0.160796, avg_loss: 0.151165 +032335/063150, loss: 0.070961, avg_loss: 0.151147 +032340/063150, loss: 0.048449, avg_loss: 0.151134 +032345/063150, loss: 0.008753, avg_loss: 0.151117 +032350/063150, loss: 0.030852, avg_loss: 0.151100 +032355/063150, loss: 0.004543, avg_loss: 0.151085 +032360/063150, loss: 0.011918, avg_loss: 0.151068 +032365/063150, loss: 0.197130, avg_loss: 0.151057 +032370/063150, loss: 0.018620, avg_loss: 0.151041 +032375/063150, loss: 0.087574, avg_loss: 0.151022 +032380/063150, loss: 0.026363, avg_loss: 0.151001 +032385/063150, loss: 0.017810, avg_loss: 0.150980 +032390/063150, loss: 0.061095, avg_loss: 0.150962 +032395/063150, loss: 0.004592, avg_loss: 0.150945 +032400/063150, loss: 0.314244, avg_loss: 0.150932 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 32400/63150: {'accuracy': 0.8428899082568807} +032405/063150, loss: 0.010465, avg_loss: 0.150910 +032410/063150, loss: 0.034104, avg_loss: 0.150891 +032415/063150, loss: 0.080379, avg_loss: 0.150884 +032420/063150, loss: 0.005336, avg_loss: 0.150861 +032425/063150, loss: 0.018968, avg_loss: 0.150842 +032430/063150, loss: 0.045775, avg_loss: 0.150830 +032435/063150, loss: 0.010366, avg_loss: 0.150813 +032440/063150, loss: 0.017216, avg_loss: 0.150793 +032445/063150, loss: 0.062167, avg_loss: 0.150775 +032450/063150, loss: 0.048706, avg_loss: 0.150764 +032455/063150, loss: 0.001584, avg_loss: 0.150744 +032460/063150, loss: 0.012657, avg_loss: 0.150726 +032465/063150, loss: 0.042367, avg_loss: 0.150712 +032470/063150, loss: 0.004825, avg_loss: 0.150695 +032475/063150, loss: 0.081176, avg_loss: 0.150676 +032480/063150, loss: 0.038419, avg_loss: 0.150656 +032485/063150, loss: 0.089686, avg_loss: 0.150640 +032490/063150, loss: 0.000725, avg_loss: 0.150622 +032495/063150, loss: 0.151417, avg_loss: 0.150608 +032500/063150, loss: 0.016084, avg_loss: 0.150590 +032505/063150, loss: 0.006126, avg_loss: 0.150571 +032510/063150, loss: 0.013508, avg_loss: 0.150555 +032515/063150, loss: 0.030435, avg_loss: 0.150539 +032520/063150, loss: 0.013757, avg_loss: 0.150518 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 32520/63150: {'accuracy': 0.8555045871559633} +032525/063150, loss: 0.002705, avg_loss: 0.150498 +032530/063150, loss: 0.094880, avg_loss: 0.150482 +032535/063150, loss: 0.022475, avg_loss: 0.150466 +032540/063150, loss: 0.010388, avg_loss: 0.150445 +032545/063150, loss: 0.002501, avg_loss: 0.150424 +032550/063150, loss: 0.009591, avg_loss: 0.150403 +032555/063150, loss: 0.019016, avg_loss: 0.150382 +032560/063150, loss: 0.012960, avg_loss: 0.150366 +032565/063150, loss: 0.003944, avg_loss: 0.150349 +032570/063150, loss: 0.041830, avg_loss: 0.150334 +032575/063150, loss: 0.011738, avg_loss: 0.150323 +032580/063150, loss: 0.001348, avg_loss: 0.150301 +032585/063150, loss: 0.001198, avg_loss: 0.150283 +032590/063150, loss: 0.184000, avg_loss: 0.150276 +032595/063150, loss: 0.040405, avg_loss: 0.150259 +032600/063150, loss: 0.017433, avg_loss: 0.150242 +032605/063150, loss: 0.064917, avg_loss: 0.150224 +032610/063150, loss: 0.016867, avg_loss: 0.150204 +032615/063150, loss: 0.039404, avg_loss: 0.150187 +032620/063150, loss: 0.023279, avg_loss: 0.150171 +032625/063150, loss: 0.009219, avg_loss: 0.150151 +032630/063150, loss: 0.078166, avg_loss: 0.150141 +032635/063150, loss: 0.018076, avg_loss: 0.150125 +032640/063150, loss: 0.019608, avg_loss: 0.150108 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 32640/63150: {'accuracy': 0.8520642201834863} +032645/063150, loss: 0.029135, avg_loss: 0.150089 +032650/063150, loss: 0.023187, avg_loss: 0.150068 +032655/063150, loss: 0.071713, avg_loss: 0.150059 +032660/063150, loss: 0.051653, avg_loss: 0.150044 +032665/063150, loss: 0.110812, avg_loss: 0.150026 +032670/063150, loss: 0.077080, avg_loss: 0.150008 +032675/063150, loss: 0.020345, avg_loss: 0.149992 +032680/063150, loss: 0.011876, avg_loss: 0.149973 +032685/063150, loss: 0.014235, avg_loss: 0.149954 +032690/063150, loss: 0.049570, avg_loss: 0.149937 +032695/063150, loss: 0.004712, avg_loss: 0.149918 +032700/063150, loss: 0.064658, avg_loss: 0.149900 +032705/063150, loss: 0.001231, avg_loss: 0.149881 +032710/063150, loss: 0.004263, avg_loss: 0.149872 +032715/063150, loss: 0.040360, avg_loss: 0.149854 +032720/063150, loss: 0.077648, avg_loss: 0.149836 +032725/063150, loss: 0.020749, avg_loss: 0.149818 +032730/063150, loss: 0.003180, avg_loss: 0.149799 +032735/063150, loss: 0.088896, avg_loss: 0.149782 +032740/063150, loss: 0.025926, avg_loss: 0.149767 +032745/063150, loss: 0.080748, avg_loss: 0.149752 +032750/063150, loss: 0.010617, avg_loss: 0.149745 +032755/063150, loss: 0.017792, avg_loss: 0.149724 +032760/063150, loss: 0.011277, avg_loss: 0.149707 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 32760/63150: {'accuracy': 0.8153669724770642} +032765/063150, loss: 0.095582, avg_loss: 0.149694 +032770/063150, loss: 0.001406, avg_loss: 0.149675 +032775/063150, loss: 0.013481, avg_loss: 0.149656 +032780/063150, loss: 0.011827, avg_loss: 0.149641 +032785/063150, loss: 0.023711, avg_loss: 0.149623 +032790/063150, loss: 0.025093, avg_loss: 0.149606 +032795/063150, loss: 0.023212, avg_loss: 0.149588 +032800/063150, loss: 0.001642, avg_loss: 0.149569 +032805/063150, loss: 0.019108, avg_loss: 0.149552 +032810/063150, loss: 0.000516, avg_loss: 0.149532 +032815/063150, loss: 0.000591, avg_loss: 0.149515 +032820/063150, loss: 0.015859, avg_loss: 0.149500 +032825/063150, loss: 0.001923, avg_loss: 0.149486 +032830/063150, loss: 0.030517, avg_loss: 0.149465 +032835/063150, loss: 0.026463, avg_loss: 0.149448 +032840/063150, loss: 0.005815, avg_loss: 0.149431 +032845/063150, loss: 0.103565, avg_loss: 0.149418 +032850/063150, loss: 0.018569, avg_loss: 0.149401 +032855/063150, loss: 0.013975, avg_loss: 0.149383 +032860/063150, loss: 0.011660, avg_loss: 0.149368 +032865/063150, loss: 0.170284, avg_loss: 0.149356 +032870/063150, loss: 0.011980, avg_loss: 0.149342 +032875/063150, loss: 0.001372, avg_loss: 0.149327 +032880/063150, loss: 0.019010, avg_loss: 0.149306 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 32880/63150: {'accuracy': 0.8532110091743119} +032885/063150, loss: 0.014984, avg_loss: 0.149288 +032890/063150, loss: 0.007146, avg_loss: 0.149268 +032895/063150, loss: 0.014933, avg_loss: 0.149251 +032900/063150, loss: 0.012553, avg_loss: 0.149233 +032905/063150, loss: 0.001936, avg_loss: 0.149215 +032910/063150, loss: 0.001917, avg_loss: 0.149205 +032915/063150, loss: 0.030596, avg_loss: 0.149185 +032920/063150, loss: 0.016542, avg_loss: 0.149166 +032925/063150, loss: 0.052967, avg_loss: 0.149150 +032930/063150, loss: 0.017940, avg_loss: 0.149130 +032935/063150, loss: 0.054746, avg_loss: 0.149111 +032940/063150, loss: 0.001472, avg_loss: 0.149093 +032945/063150, loss: 0.006528, avg_loss: 0.149071 +032950/063150, loss: 0.008536, avg_loss: 0.149051 +032955/063150, loss: 0.106602, avg_loss: 0.149036 +032960/063150, loss: 0.002405, avg_loss: 0.149015 +032965/063150, loss: 0.057453, avg_loss: 0.149004 +032970/063150, loss: 0.119037, avg_loss: 0.148987 +032975/063150, loss: 0.001901, avg_loss: 0.148975 +032980/063150, loss: 0.049795, avg_loss: 0.148958 +032985/063150, loss: 0.057811, avg_loss: 0.148943 +032990/063150, loss: 0.002982, avg_loss: 0.148922 +032995/063150, loss: 0.009201, avg_loss: 0.148915 +033000/063150, loss: 0.017153, avg_loss: 0.148894 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 33000/63150: {'accuracy': 0.8577981651376146} +033005/063150, loss: 0.043624, avg_loss: 0.148876 +033010/063150, loss: 0.025370, avg_loss: 0.148856 +033015/063150, loss: 0.041446, avg_loss: 0.148838 +033020/063150, loss: 0.050597, avg_loss: 0.148818 +033025/063150, loss: 0.001196, avg_loss: 0.148798 +033030/063150, loss: 0.004848, avg_loss: 0.148788 +033035/063150, loss: 0.032267, avg_loss: 0.148768 +033040/063150, loss: 0.004520, avg_loss: 0.148750 +033045/063150, loss: 0.029751, avg_loss: 0.148732 +033050/063150, loss: 0.013777, avg_loss: 0.148715 +033055/063150, loss: 0.061514, avg_loss: 0.148701 +033060/063150, loss: 0.012740, avg_loss: 0.148700 +033065/063150, loss: 0.014615, avg_loss: 0.148686 +033070/063150, loss: 0.011086, avg_loss: 0.148673 +033075/063150, loss: 0.100312, avg_loss: 0.148656 +033080/063150, loss: 0.040799, avg_loss: 0.148638 +033085/063150, loss: 0.013348, avg_loss: 0.148629 +033090/063150, loss: 0.068454, avg_loss: 0.148613 +033095/063150, loss: 0.068554, avg_loss: 0.148594 +033100/063150, loss: 0.010253, avg_loss: 0.148580 +033105/063150, loss: 0.032851, avg_loss: 0.148562 +033110/063150, loss: 0.045854, avg_loss: 0.148541 +033115/063150, loss: 0.023658, avg_loss: 0.148521 +033120/063150, loss: 0.024104, avg_loss: 0.148503 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 33120/63150: {'accuracy': 0.8463302752293578} +033125/063150, loss: 0.129521, avg_loss: 0.148490 +033130/063150, loss: 0.004246, avg_loss: 0.148472 +033135/063150, loss: 0.005289, avg_loss: 0.148455 +033140/063150, loss: 0.003993, avg_loss: 0.148434 +033145/063150, loss: 0.039762, avg_loss: 0.148418 +033150/063150, loss: 0.043979, avg_loss: 0.148397 +033155/063150, loss: 0.001181, avg_loss: 0.148380 +033160/063150, loss: 0.168182, avg_loss: 0.148368 +033165/063150, loss: 0.029405, avg_loss: 0.148351 +033170/063150, loss: 0.008169, avg_loss: 0.148333 +033175/063150, loss: 0.083457, avg_loss: 0.148316 +033180/063150, loss: 0.002800, avg_loss: 0.148295 +033185/063150, loss: 0.001302, avg_loss: 0.148276 +033190/063150, loss: 0.016164, avg_loss: 0.148262 +033195/063150, loss: 0.010633, avg_loss: 0.148241 +033200/063150, loss: 0.004194, avg_loss: 0.148221 +033205/063150, loss: 0.006342, avg_loss: 0.148207 +033210/063150, loss: 0.060946, avg_loss: 0.148197 +033215/063150, loss: 0.021701, avg_loss: 0.148183 +033220/063150, loss: 0.091852, avg_loss: 0.148174 +033225/063150, loss: 0.028652, avg_loss: 0.148159 +033230/063150, loss: 0.006561, avg_loss: 0.148144 +033235/063150, loss: 0.036666, avg_loss: 0.148125 +033240/063150, loss: 0.005548, avg_loss: 0.148108 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 33240/63150: {'accuracy': 0.8486238532110092} +033245/063150, loss: 0.121300, avg_loss: 0.148095 +033250/063150, loss: 0.002188, avg_loss: 0.148078 +033255/063150, loss: 0.022848, avg_loss: 0.148059 +033260/063150, loss: 0.004626, avg_loss: 0.148040 +033265/063150, loss: 0.016482, avg_loss: 0.148022 +033270/063150, loss: 0.002042, avg_loss: 0.148006 +033275/063150, loss: 0.043554, avg_loss: 0.147990 +033280/063150, loss: 0.008069, avg_loss: 0.147970 +033285/063150, loss: 0.020968, avg_loss: 0.147955 +033290/063150, loss: 0.097583, avg_loss: 0.147939 +033295/063150, loss: 0.148115, avg_loss: 0.147933 +033300/063150, loss: 0.018813, avg_loss: 0.147913 +033305/063150, loss: 0.116478, avg_loss: 0.147898 +033310/063150, loss: 0.088976, avg_loss: 0.147882 +033315/063150, loss: 0.014793, avg_loss: 0.147863 +033320/063150, loss: 0.031540, avg_loss: 0.147848 +033325/063150, loss: 0.001784, avg_loss: 0.147831 +033330/063150, loss: 0.023705, avg_loss: 0.147810 +033335/063150, loss: 0.001648, avg_loss: 0.147792 +033340/063150, loss: 0.038400, avg_loss: 0.147777 +033345/063150, loss: 0.038439, avg_loss: 0.147758 +033350/063150, loss: 0.001341, avg_loss: 0.147745 +033355/063150, loss: 0.042025, avg_loss: 0.147730 +033360/063150, loss: 0.021579, avg_loss: 0.147716 +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +epoch 15, step 33360/63150: {'accuracy': 0.841743119266055} +***** Running train evaluation ***** + Num examples = 67349 + Instantaneous batch size per device = 32 +Train Dataset Result: {'accuracy': 0.9946992531440705} +***** Running dev evaluation ***** + Num examples = 872 + Instantaneous batch size per device = 32 +Dev Dataset Result: {'accuracy': 0.841743119266055} +DEV Best Result: accuracy, 0.8727064220183486 +Training time 0:35:24