------------> log file ==runs2/sst2/1/log_bs32_lr3e-05_20221118_065016_906968.txt Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/SST-2', do_eval=False, early_stop=True, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/sst2/1', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='sst2', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0) Distributed environment: NO Num processes: 1 Process index: 0 Local process index: 0 Device: cuda Mixed precision type: fp16 Sample 40563 of the training set: (tensor([ 101, 2003, 1037, 21207, 2121, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(0)). Sample 624 of the training set: (tensor([ 101, 2008, 4654, 17847, 2015, 1996, 9647, 1998, 16356, 4244, 1996, 6057, 5923, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)). Sample 42386 of the training set: (tensor([ 101, 1996, 5896, 1010, 1996, 18201, 2015, 1010, 1996, 3494, 2024, 2035, 3622, 1011, 2000, 1011, 2678, 4933, 1010, 1998, 2008, 1005, 1055, 2073, 2023, 2143, 2323, 2031, 2815, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(0)). ***** Running training ***** Num examples = 67349 Num Epochs = 30 Instantaneous batch size per device = 32 Total train batch size (w. parallel, distributed & accumulation) = 32 Gradient Accumulation steps = 1 Total optimization steps = 63150 000005/063150, loss: 0.699066, avg_loss: 0.698294 000010/063150, loss: 0.684998, avg_loss: 0.702415 000015/063150, loss: 0.677322, avg_loss: 0.700488 000020/063150, loss: 0.696426, avg_loss: 0.703039 000025/063150, loss: 0.706192, avg_loss: 0.702227 000030/063150, loss: 0.705933, avg_loss: 0.702650 000035/063150, loss: 0.722244, avg_loss: 0.703319 000040/063150, loss: 0.702194, avg_loss: 0.701807 000045/063150, loss: 0.711319, avg_loss: 0.702284 000050/063150, loss: 0.684219, avg_loss: 0.701999 000055/063150, loss: 0.694305, avg_loss: 0.701389 000060/063150, loss: 0.688171, avg_loss: 0.701232 000065/063150, loss: 0.704636, avg_loss: 0.701469 000070/063150, loss: 0.708710, avg_loss: 0.701263 000075/063150, loss: 0.685791, avg_loss: 0.700925 000080/063150, loss: 0.703445, avg_loss: 0.700651 000085/063150, loss: 0.714600, avg_loss: 0.700922 000090/063150, loss: 0.695724, avg_loss: 0.701068 000095/063150, loss: 0.694199, avg_loss: 0.700950 000100/063150, loss: 0.694672, avg_loss: 0.700794 000105/063150, loss: 0.684280, avg_loss: 0.700587 000110/063150, loss: 0.696747, avg_loss: 0.700711 000115/063150, loss: 0.696472, avg_loss: 0.700771 000120/063150, loss: 0.693542, avg_loss: 0.700848 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 120/63150: {'accuracy': 0.4908256880733945} 000125/063150, loss: 0.701477, avg_loss: 0.700951 000130/063150, loss: 0.680710, avg_loss: 0.700872 000135/063150, loss: 0.675705, avg_loss: 0.700665 000140/063150, loss: 0.709198, avg_loss: 0.700597 000145/063150, loss: 0.681229, avg_loss: 0.700399 000150/063150, loss: 0.696289, avg_loss: 0.700279 000155/063150, loss: 0.697922, avg_loss: 0.700317 000160/063150, loss: 0.707993, avg_loss: 0.700207 000165/063150, loss: 0.698456, avg_loss: 0.700098 000170/063150, loss: 0.706284, avg_loss: 0.699953 000175/063150, loss: 0.706757, avg_loss: 0.699989 000180/063150, loss: 0.695663, avg_loss: 0.699971 000185/063150, loss: 0.695282, avg_loss: 0.699929 000190/063150, loss: 0.700256, avg_loss: 0.699896 000195/063150, loss: 0.701035, avg_loss: 0.699996 000200/063150, loss: 0.701126, avg_loss: 0.700045 000205/063150, loss: 0.697083, avg_loss: 0.700042 000210/063150, loss: 0.698654, avg_loss: 0.700030 000215/063150, loss: 0.697266, avg_loss: 0.699913 000220/063150, loss: 0.702286, avg_loss: 0.699880 000225/063150, loss: 0.693466, avg_loss: 0.699792 000230/063150, loss: 0.698135, avg_loss: 0.699732 000235/063150, loss: 0.682785, avg_loss: 0.699647 000240/063150, loss: 0.692719, avg_loss: 0.699636 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 240/63150: {'accuracy': 0.4908256880733945} 000245/063150, loss: 0.701706, avg_loss: 0.699525 000250/063150, loss: 0.694305, avg_loss: 0.699331 000255/063150, loss: 0.693481, avg_loss: 0.699234 000260/063150, loss: 0.695450, avg_loss: 0.699185 000265/063150, loss: 0.690948, avg_loss: 0.699078 000270/063150, loss: 0.692719, avg_loss: 0.698942 000275/063150, loss: 0.692352, avg_loss: 0.698819 000280/063150, loss: 0.694565, avg_loss: 0.698731 000285/063150, loss: 0.690460, avg_loss: 0.698634 000290/063150, loss: 0.690430, avg_loss: 0.698527 000295/063150, loss: 0.695435, avg_loss: 0.698426 000300/063150, loss: 0.688797, avg_loss: 0.698297 000305/063150, loss: 0.692612, avg_loss: 0.698205 000310/063150, loss: 0.692673, avg_loss: 0.698092 000315/063150, loss: 0.691071, avg_loss: 0.698000 000320/063150, loss: 0.692902, avg_loss: 0.697890 000325/063150, loss: 0.691315, avg_loss: 0.697769 000330/063150, loss: 0.688202, avg_loss: 0.697666 000335/063150, loss: 0.692764, avg_loss: 0.697577 000340/063150, loss: 0.691727, avg_loss: 0.697481 000345/063150, loss: 0.690247, avg_loss: 0.697397 000350/063150, loss: 0.688766, avg_loss: 0.697331 000355/063150, loss: 0.689789, avg_loss: 0.697179 000360/063150, loss: 0.691925, avg_loss: 0.697088 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 360/63150: {'accuracy': 0.5355504587155964} 000365/063150, loss: 0.690979, avg_loss: 0.696966 000370/063150, loss: 0.682373, avg_loss: 0.696839 000375/063150, loss: 0.691315, avg_loss: 0.696746 000380/063150, loss: 0.700912, avg_loss: 0.696692 000385/063150, loss: 0.694580, avg_loss: 0.696592 000390/063150, loss: 0.679520, avg_loss: 0.696474 000395/063150, loss: 0.685104, avg_loss: 0.696315 000400/063150, loss: 0.683975, avg_loss: 0.696200 000405/063150, loss: 0.689697, avg_loss: 0.696104 000410/063150, loss: 0.687210, avg_loss: 0.696001 000415/063150, loss: 0.682480, avg_loss: 0.695870 000420/063150, loss: 0.700912, avg_loss: 0.695806 000425/063150, loss: 0.688156, avg_loss: 0.695680 000430/063150, loss: 0.684021, avg_loss: 0.695526 000435/063150, loss: 0.678650, avg_loss: 0.695443 000440/063150, loss: 0.687790, avg_loss: 0.695313 000445/063150, loss: 0.677887, avg_loss: 0.695127 000450/063150, loss: 0.684143, avg_loss: 0.695000 000455/063150, loss: 0.682251, avg_loss: 0.694790 000460/063150, loss: 0.689774, avg_loss: 0.694652 000465/063150, loss: 0.697784, avg_loss: 0.694567 000470/063150, loss: 0.689697, avg_loss: 0.694421 000475/063150, loss: 0.688354, avg_loss: 0.694421 000480/063150, loss: 0.657639, avg_loss: 0.694245 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 480/63150: {'accuracy': 0.5091743119266054} 000485/063150, loss: 0.679733, avg_loss: 0.694141 000490/063150, loss: 0.674637, avg_loss: 0.693992 000495/063150, loss: 0.725555, avg_loss: 0.694000 000500/063150, loss: 0.683044, avg_loss: 0.693896 000505/063150, loss: 0.684753, avg_loss: 0.693714 000510/063150, loss: 0.677780, avg_loss: 0.693572 000515/063150, loss: 0.664780, avg_loss: 0.693277 000520/063150, loss: 0.703796, avg_loss: 0.693264 000525/063150, loss: 0.671082, avg_loss: 0.693133 000530/063150, loss: 0.678345, avg_loss: 0.692987 000535/063150, loss: 0.662933, avg_loss: 0.692878 000540/063150, loss: 0.676666, avg_loss: 0.692811 000545/063150, loss: 0.657776, avg_loss: 0.692641 000550/063150, loss: 0.690552, avg_loss: 0.692577 000555/063150, loss: 0.693863, avg_loss: 0.692609 000560/063150, loss: 0.645691, avg_loss: 0.692455 000565/063150, loss: 0.684433, avg_loss: 0.692544 000570/063150, loss: 0.700058, avg_loss: 0.692548 000575/063150, loss: 0.685699, avg_loss: 0.692384 000580/063150, loss: 0.676178, avg_loss: 0.692270 000585/063150, loss: 0.685501, avg_loss: 0.692076 000590/063150, loss: 0.679108, avg_loss: 0.691955 000595/063150, loss: 0.672958, avg_loss: 0.691839 000600/063150, loss: 0.684540, avg_loss: 0.691661 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 600/63150: {'accuracy': 0.5091743119266054} 000605/063150, loss: 0.698410, avg_loss: 0.691549 000610/063150, loss: 0.678619, avg_loss: 0.691446 000615/063150, loss: 0.690140, avg_loss: 0.691291 000620/063150, loss: 0.676254, avg_loss: 0.691128 000625/063150, loss: 0.682373, avg_loss: 0.691073 000630/063150, loss: 0.704193, avg_loss: 0.690986 000635/063150, loss: 0.681549, avg_loss: 0.690874 000640/063150, loss: 0.675903, avg_loss: 0.690731 000645/063150, loss: 0.663528, avg_loss: 0.690676 000650/063150, loss: 0.669769, avg_loss: 0.690538 000655/063150, loss: 0.670044, avg_loss: 0.690407 000660/063150, loss: 0.646988, avg_loss: 0.690206 000665/063150, loss: 0.662003, avg_loss: 0.690063 000670/063150, loss: 0.654678, avg_loss: 0.689824 000675/063150, loss: 0.673439, avg_loss: 0.689577 000680/063150, loss: 0.652298, avg_loss: 0.689503 000685/063150, loss: 0.651001, avg_loss: 0.689241 000690/063150, loss: 0.686157, avg_loss: 0.689081 000695/063150, loss: 0.673027, avg_loss: 0.688830 000700/063150, loss: 0.661835, avg_loss: 0.688696 000705/063150, loss: 0.658730, avg_loss: 0.688453 000710/063150, loss: 0.670929, avg_loss: 0.688235 000715/063150, loss: 0.684013, avg_loss: 0.688093 000720/063150, loss: 0.672012, avg_loss: 0.687874 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 720/63150: {'accuracy': 0.6284403669724771} 000725/063150, loss: 0.673714, avg_loss: 0.687796 000730/063150, loss: 0.683487, avg_loss: 0.687649 000735/063150, loss: 0.651962, avg_loss: 0.687393 000740/063150, loss: 0.651978, avg_loss: 0.687152 000745/063150, loss: 0.671097, avg_loss: 0.686865 000750/063150, loss: 0.682686, avg_loss: 0.686692 000755/063150, loss: 0.671722, avg_loss: 0.686502 000760/063150, loss: 0.625923, avg_loss: 0.686107 000765/063150, loss: 0.631691, avg_loss: 0.685808 000770/063150, loss: 0.647720, avg_loss: 0.685372 000775/063150, loss: 0.630836, avg_loss: 0.685044 000780/063150, loss: 0.600067, avg_loss: 0.684657 000785/063150, loss: 0.649406, avg_loss: 0.684341 000790/063150, loss: 0.620857, avg_loss: 0.684093 000795/063150, loss: 0.616707, avg_loss: 0.683665 000800/063150, loss: 0.597076, avg_loss: 0.683302 000805/063150, loss: 0.605042, avg_loss: 0.682946 000810/063150, loss: 0.589241, avg_loss: 0.682349 000815/063150, loss: 0.635567, avg_loss: 0.682020 000820/063150, loss: 0.589584, avg_loss: 0.681673 000825/063150, loss: 0.651291, avg_loss: 0.681407 000830/063150, loss: 0.688423, avg_loss: 0.681157 000835/063150, loss: 0.563934, avg_loss: 0.680670 000840/063150, loss: 0.631546, avg_loss: 0.680357 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 840/63150: {'accuracy': 0.7236238532110092} 000845/063150, loss: 0.662323, avg_loss: 0.679958 000850/063150, loss: 0.561371, avg_loss: 0.679360 000855/063150, loss: 0.537651, avg_loss: 0.678845 000860/063150, loss: 0.600845, avg_loss: 0.678364 000865/063150, loss: 0.524681, avg_loss: 0.677886 000870/063150, loss: 0.618294, avg_loss: 0.677480 000875/063150, loss: 0.550529, avg_loss: 0.677040 000880/063150, loss: 0.562836, avg_loss: 0.676556 000885/063150, loss: 0.584137, avg_loss: 0.676117 000890/063150, loss: 0.534386, avg_loss: 0.675487 000895/063150, loss: 0.547890, avg_loss: 0.674789 000900/063150, loss: 0.572800, avg_loss: 0.674201 000905/063150, loss: 0.594093, avg_loss: 0.673920 000910/063150, loss: 0.577248, avg_loss: 0.673406 000915/063150, loss: 0.521347, avg_loss: 0.672878 000920/063150, loss: 0.583481, avg_loss: 0.672350 000925/063150, loss: 0.553391, avg_loss: 0.671855 000930/063150, loss: 0.561745, avg_loss: 0.671272 000935/063150, loss: 0.524872, avg_loss: 0.670658 000940/063150, loss: 0.522697, avg_loss: 0.670148 000945/063150, loss: 0.662201, avg_loss: 0.669857 000950/063150, loss: 0.665039, avg_loss: 0.669415 000955/063150, loss: 0.541252, avg_loss: 0.668925 000960/063150, loss: 0.529617, avg_loss: 0.668321 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 960/63150: {'accuracy': 0.7385321100917431} 000965/063150, loss: 0.646500, avg_loss: 0.667651 000970/063150, loss: 0.635612, avg_loss: 0.666934 000975/063150, loss: 0.485619, avg_loss: 0.666445 000980/063150, loss: 0.481720, avg_loss: 0.666025 000985/063150, loss: 0.623451, avg_loss: 0.665369 000990/063150, loss: 0.556183, avg_loss: 0.664912 000995/063150, loss: 0.594429, avg_loss: 0.664675 001000/063150, loss: 0.642525, avg_loss: 0.664227 001005/063150, loss: 0.507896, avg_loss: 0.663682 001010/063150, loss: 0.559967, avg_loss: 0.663286 001015/063150, loss: 0.537098, avg_loss: 0.662654 001020/063150, loss: 0.538918, avg_loss: 0.662011 001025/063150, loss: 0.477783, avg_loss: 0.661387 001030/063150, loss: 0.640617, avg_loss: 0.660972 001035/063150, loss: 0.559074, avg_loss: 0.660316 001040/063150, loss: 0.640266, avg_loss: 0.659892 001045/063150, loss: 0.534710, avg_loss: 0.659427 001050/063150, loss: 0.484222, avg_loss: 0.658888 001055/063150, loss: 0.581760, avg_loss: 0.658355 001060/063150, loss: 0.561779, avg_loss: 0.657932 001065/063150, loss: 0.554047, avg_loss: 0.657404 001070/063150, loss: 0.572773, avg_loss: 0.656937 001075/063150, loss: 0.647923, avg_loss: 0.656480 001080/063150, loss: 0.425316, avg_loss: 0.655995 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 1080/63150: {'accuracy': 0.7568807339449541} 001085/063150, loss: 0.590240, avg_loss: 0.655601 001090/063150, loss: 0.457207, avg_loss: 0.655022 001095/063150, loss: 0.579010, avg_loss: 0.654627 001100/063150, loss: 0.607227, avg_loss: 0.654075 001105/063150, loss: 0.619682, avg_loss: 0.653523 001110/063150, loss: 0.546398, avg_loss: 0.653132 001115/063150, loss: 0.640785, avg_loss: 0.652507 001120/063150, loss: 0.405167, avg_loss: 0.652141 001125/063150, loss: 0.521263, avg_loss: 0.651874 001130/063150, loss: 0.520802, avg_loss: 0.651417 001135/063150, loss: 0.553818, avg_loss: 0.650836 001140/063150, loss: 0.636532, avg_loss: 0.650372 001145/063150, loss: 0.650009, avg_loss: 0.649899 001150/063150, loss: 0.454395, avg_loss: 0.649468 001155/063150, loss: 0.478718, avg_loss: 0.648877 001160/063150, loss: 0.586956, avg_loss: 0.648443 001165/063150, loss: 0.495560, avg_loss: 0.648029 001170/063150, loss: 0.499985, avg_loss: 0.647654 001175/063150, loss: 0.540573, avg_loss: 0.647043 001180/063150, loss: 0.425880, avg_loss: 0.646502 001185/063150, loss: 0.578789, avg_loss: 0.646155 001190/063150, loss: 0.518787, avg_loss: 0.645670 001195/063150, loss: 0.532078, avg_loss: 0.645171 001200/063150, loss: 0.442375, avg_loss: 0.644458 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 1200/63150: {'accuracy': 0.7637614678899083} 001205/063150, loss: 0.529041, avg_loss: 0.644026 001210/063150, loss: 0.481709, avg_loss: 0.643462 001215/063150, loss: 0.421642, avg_loss: 0.642747 001220/063150, loss: 0.436317, avg_loss: 0.642176 001225/063150, loss: 0.537792, avg_loss: 0.641646 001230/063150, loss: 0.482212, avg_loss: 0.641234 001235/063150, loss: 0.506683, avg_loss: 0.640627 001240/063150, loss: 0.423000, avg_loss: 0.640062 001245/063150, loss: 0.452396, avg_loss: 0.639454 001250/063150, loss: 0.459133, avg_loss: 0.638942 001255/063150, loss: 0.682259, avg_loss: 0.638437 001260/063150, loss: 0.492313, avg_loss: 0.637797 001265/063150, loss: 0.612106, avg_loss: 0.637475 001270/063150, loss: 0.371552, avg_loss: 0.636980 001275/063150, loss: 0.442150, avg_loss: 0.636261 001280/063150, loss: 0.531754, avg_loss: 0.635836 001285/063150, loss: 0.519829, avg_loss: 0.635495 001290/063150, loss: 0.499115, avg_loss: 0.634939 001295/063150, loss: 0.536774, avg_loss: 0.634752 001300/063150, loss: 0.503185, avg_loss: 0.634372 001305/063150, loss: 0.602249, avg_loss: 0.633995 001310/063150, loss: 0.442337, avg_loss: 0.633510 001315/063150, loss: 0.642811, avg_loss: 0.633124 001320/063150, loss: 0.414825, avg_loss: 0.632533 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 1320/63150: {'accuracy': 0.7752293577981652} 001325/063150, loss: 0.481556, avg_loss: 0.631990 001330/063150, loss: 0.448387, avg_loss: 0.631533 001335/063150, loss: 0.401711, avg_loss: 0.631070 001340/063150, loss: 0.508049, avg_loss: 0.630476 001345/063150, loss: 0.568802, avg_loss: 0.630039 001350/063150, loss: 0.561794, avg_loss: 0.629707 001355/063150, loss: 0.542850, avg_loss: 0.629050 001360/063150, loss: 0.484318, avg_loss: 0.628416 001365/063150, loss: 0.695049, avg_loss: 0.628053 001370/063150, loss: 0.527977, avg_loss: 0.627550 001375/063150, loss: 0.557415, avg_loss: 0.627135 001380/063150, loss: 0.457073, avg_loss: 0.626956 001385/063150, loss: 0.438343, avg_loss: 0.626553 001390/063150, loss: 0.563004, avg_loss: 0.625970 001395/063150, loss: 0.654701, avg_loss: 0.625507 001400/063150, loss: 0.474880, avg_loss: 0.624905 001405/063150, loss: 0.408585, avg_loss: 0.624406 001410/063150, loss: 0.475113, avg_loss: 0.623802 001415/063150, loss: 0.495762, avg_loss: 0.623506 001420/063150, loss: 0.466187, avg_loss: 0.622912 001425/063150, loss: 0.562550, avg_loss: 0.622626 001430/063150, loss: 0.634254, avg_loss: 0.622368 001435/063150, loss: 0.520088, avg_loss: 0.621966 001440/063150, loss: 0.551281, avg_loss: 0.621461 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 1440/63150: {'accuracy': 0.7717889908256881} 001445/063150, loss: 0.396290, avg_loss: 0.620777 001450/063150, loss: 0.437672, avg_loss: 0.620369 001455/063150, loss: 0.466106, avg_loss: 0.619835 001460/063150, loss: 0.512569, avg_loss: 0.619407 001465/063150, loss: 0.454067, avg_loss: 0.619037 001470/063150, loss: 0.506660, avg_loss: 0.618636 001475/063150, loss: 0.539684, avg_loss: 0.618245 001480/063150, loss: 0.378353, avg_loss: 0.617922 001485/063150, loss: 0.325607, avg_loss: 0.617461 001490/063150, loss: 0.623295, avg_loss: 0.617021 001495/063150, loss: 0.510307, avg_loss: 0.616807 001500/063150, loss: 0.357586, avg_loss: 0.616374 001505/063150, loss: 0.481606, avg_loss: 0.616216 001510/063150, loss: 0.558796, avg_loss: 0.615956 001515/063150, loss: 0.619362, avg_loss: 0.615500 001520/063150, loss: 0.502560, avg_loss: 0.615003 001525/063150, loss: 0.419495, avg_loss: 0.614568 001530/063150, loss: 0.462114, avg_loss: 0.613972 001535/063150, loss: 0.514221, avg_loss: 0.613483 001540/063150, loss: 0.531858, avg_loss: 0.612844 001545/063150, loss: 0.456753, avg_loss: 0.612303 001550/063150, loss: 0.461109, avg_loss: 0.611709 001555/063150, loss: 0.595768, avg_loss: 0.611489 001560/063150, loss: 0.481895, avg_loss: 0.611150 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 1560/63150: {'accuracy': 0.7752293577981652} 001565/063150, loss: 0.421822, avg_loss: 0.610683 001570/063150, loss: 0.454964, avg_loss: 0.610350 001575/063150, loss: 0.365948, avg_loss: 0.609845 001580/063150, loss: 0.445292, avg_loss: 0.609434 001585/063150, loss: 0.476309, avg_loss: 0.608933 001590/063150, loss: 0.493601, avg_loss: 0.608611 001595/063150, loss: 0.593845, avg_loss: 0.608162 001600/063150, loss: 0.369373, avg_loss: 0.607726 001605/063150, loss: 0.587719, avg_loss: 0.607384 001610/063150, loss: 0.492731, avg_loss: 0.606924 001615/063150, loss: 0.502428, avg_loss: 0.606412 001620/063150, loss: 0.537010, avg_loss: 0.606198 001625/063150, loss: 0.566477, avg_loss: 0.605799 001630/063150, loss: 0.476679, avg_loss: 0.605363 001635/063150, loss: 0.475550, avg_loss: 0.604885 001640/063150, loss: 0.349373, avg_loss: 0.604407 001645/063150, loss: 0.377903, avg_loss: 0.603836 001650/063150, loss: 0.411657, avg_loss: 0.603554 001655/063150, loss: 0.392540, avg_loss: 0.602957 001660/063150, loss: 0.529764, avg_loss: 0.602509 001665/063150, loss: 0.566372, avg_loss: 0.602353 001670/063150, loss: 0.450151, avg_loss: 0.601893 001675/063150, loss: 0.472094, avg_loss: 0.601507 001680/063150, loss: 0.535753, avg_loss: 0.601047 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 1680/63150: {'accuracy': 0.7798165137614679} 001685/063150, loss: 0.471460, avg_loss: 0.600612 001690/063150, loss: 0.621786, avg_loss: 0.600232 001695/063150, loss: 0.503521, avg_loss: 0.599750 001700/063150, loss: 0.539293, avg_loss: 0.599262 001705/063150, loss: 0.464657, avg_loss: 0.598757 001710/063150, loss: 0.486265, avg_loss: 0.598326 001715/063150, loss: 0.533688, avg_loss: 0.597988 001720/063150, loss: 0.421272, avg_loss: 0.597682 001725/063150, loss: 0.376846, avg_loss: 0.597393 001730/063150, loss: 0.492756, avg_loss: 0.596847 001735/063150, loss: 0.332472, avg_loss: 0.596607 001740/063150, loss: 0.479696, avg_loss: 0.596175 001745/063150, loss: 0.427999, avg_loss: 0.595669 001750/063150, loss: 0.527891, avg_loss: 0.595335 001755/063150, loss: 0.379955, avg_loss: 0.594892 001760/063150, loss: 0.599617, avg_loss: 0.594601 001765/063150, loss: 0.576731, avg_loss: 0.594364 001770/063150, loss: 0.566746, avg_loss: 0.594008 001775/063150, loss: 0.606173, avg_loss: 0.593482 001780/063150, loss: 0.411125, avg_loss: 0.593049 001785/063150, loss: 0.559351, avg_loss: 0.592689 001790/063150, loss: 0.489662, avg_loss: 0.592523 001795/063150, loss: 0.438231, avg_loss: 0.592393 001800/063150, loss: 0.439730, avg_loss: 0.591988 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 1800/63150: {'accuracy': 0.7821100917431193} 001805/063150, loss: 0.457642, avg_loss: 0.591447 001810/063150, loss: 0.474457, avg_loss: 0.591127 001815/063150, loss: 0.446978, avg_loss: 0.590701 001820/063150, loss: 0.360235, avg_loss: 0.590146 001825/063150, loss: 0.522787, avg_loss: 0.589913 001830/063150, loss: 0.420828, avg_loss: 0.589691 001835/063150, loss: 0.576784, avg_loss: 0.589292 001840/063150, loss: 0.455002, avg_loss: 0.588905 001845/063150, loss: 0.463318, avg_loss: 0.588360 001850/063150, loss: 0.538891, avg_loss: 0.588076 001855/063150, loss: 0.442177, avg_loss: 0.587478 001860/063150, loss: 0.598307, avg_loss: 0.587336 001865/063150, loss: 0.594749, avg_loss: 0.587073 001870/063150, loss: 0.438782, avg_loss: 0.586785 001875/063150, loss: 0.397961, avg_loss: 0.586557 001880/063150, loss: 0.396614, avg_loss: 0.586146 001885/063150, loss: 0.398376, avg_loss: 0.585721 001890/063150, loss: 0.330936, avg_loss: 0.585311 001895/063150, loss: 0.270622, avg_loss: 0.584947 001900/063150, loss: 0.645473, avg_loss: 0.584804 001905/063150, loss: 0.466560, avg_loss: 0.584304 001910/063150, loss: 0.390421, avg_loss: 0.583849 001915/063150, loss: 0.428555, avg_loss: 0.583479 001920/063150, loss: 0.465866, avg_loss: 0.582975 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 1920/63150: {'accuracy': 0.7775229357798165} 001925/063150, loss: 0.476524, avg_loss: 0.582740 001930/063150, loss: 0.404989, avg_loss: 0.582187 001935/063150, loss: 0.563856, avg_loss: 0.581866 001940/063150, loss: 0.461561, avg_loss: 0.581485 001945/063150, loss: 0.463858, avg_loss: 0.581072 001950/063150, loss: 0.476309, avg_loss: 0.580758 001955/063150, loss: 0.419500, avg_loss: 0.580453 001960/063150, loss: 0.425861, avg_loss: 0.580091 001965/063150, loss: 0.608536, avg_loss: 0.579938 001970/063150, loss: 0.318991, avg_loss: 0.579540 001975/063150, loss: 0.358694, avg_loss: 0.579218 001980/063150, loss: 0.287941, avg_loss: 0.578818 001985/063150, loss: 0.434204, avg_loss: 0.578520 001990/063150, loss: 0.598541, avg_loss: 0.578185 001995/063150, loss: 0.411554, avg_loss: 0.577992 002000/063150, loss: 0.554369, avg_loss: 0.577653 002005/063150, loss: 0.444473, avg_loss: 0.577298 002010/063150, loss: 0.464811, avg_loss: 0.576906 002015/063150, loss: 0.500570, avg_loss: 0.576662 002020/063150, loss: 0.327007, avg_loss: 0.576124 002025/063150, loss: 0.424227, avg_loss: 0.575725 002030/063150, loss: 0.331482, avg_loss: 0.575304 002035/063150, loss: 0.371761, avg_loss: 0.575046 002040/063150, loss: 0.376259, avg_loss: 0.574788 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 0, step 2040/63150: {'accuracy': 0.7993119266055045} 002045/063150, loss: 0.360487, avg_loss: 0.574369 002050/063150, loss: 0.474432, avg_loss: 0.573938 002055/063150, loss: 0.277416, avg_loss: 0.573426 002060/063150, loss: 0.405094, avg_loss: 0.572981 002065/063150, loss: 0.437620, avg_loss: 0.572760 002070/063150, loss: 0.336487, avg_loss: 0.572440 002075/063150, loss: 0.419630, avg_loss: 0.572151 002080/063150, loss: 0.321566, avg_loss: 0.571714 002085/063150, loss: 0.615108, avg_loss: 0.571479 002090/063150, loss: 0.541786, avg_loss: 0.571437 002095/063150, loss: 0.357214, avg_loss: 0.570996 002100/063150, loss: 0.414228, avg_loss: 0.570739 002105/063150, loss: 0.466367, avg_loss: 0.570469 002110/063150, loss: 0.284697, avg_loss: 0.569925 002115/063150, loss: 0.632372, avg_loss: 0.569449 002120/063150, loss: 0.567448, avg_loss: 0.569101 002125/063150, loss: 0.301064, avg_loss: 0.568673 002130/063150, loss: 0.466331, avg_loss: 0.568429 002135/063150, loss: 0.318399, avg_loss: 0.567998 002140/063150, loss: 0.457193, avg_loss: 0.567648 002145/063150, loss: 0.402014, avg_loss: 0.567166 002150/063150, loss: 0.565275, avg_loss: 0.566827 002155/063150, loss: 0.353403, avg_loss: 0.566410 002160/063150, loss: 0.412094, avg_loss: 0.566103 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 2160/63150: {'accuracy': 0.7993119266055045} 002165/063150, loss: 0.530349, avg_loss: 0.565792 002170/063150, loss: 0.482883, avg_loss: 0.565460 002175/063150, loss: 0.410007, avg_loss: 0.565040 002180/063150, loss: 0.475971, avg_loss: 0.564747 002185/063150, loss: 0.532206, avg_loss: 0.564487 002190/063150, loss: 0.375374, avg_loss: 0.563967 002195/063150, loss: 0.367553, avg_loss: 0.563401 002200/063150, loss: 0.443151, avg_loss: 0.563005 002205/063150, loss: 0.410225, avg_loss: 0.562641 002210/063150, loss: 0.489100, avg_loss: 0.562480 002215/063150, loss: 0.289569, avg_loss: 0.562061 002220/063150, loss: 0.304693, avg_loss: 0.561644 002225/063150, loss: 0.318733, avg_loss: 0.561267 002230/063150, loss: 0.599303, avg_loss: 0.560921 002235/063150, loss: 0.554891, avg_loss: 0.560554 002240/063150, loss: 0.523050, avg_loss: 0.560261 002245/063150, loss: 0.541603, avg_loss: 0.560032 002250/063150, loss: 0.339924, avg_loss: 0.559689 002255/063150, loss: 0.333566, avg_loss: 0.559446 002260/063150, loss: 0.387396, avg_loss: 0.559056 002265/063150, loss: 0.391576, avg_loss: 0.558933 002270/063150, loss: 0.431360, avg_loss: 0.558750 002275/063150, loss: 0.467025, avg_loss: 0.558452 002280/063150, loss: 0.440957, avg_loss: 0.557994 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 2280/63150: {'accuracy': 0.801605504587156} 002285/063150, loss: 0.334400, avg_loss: 0.557650 002290/063150, loss: 0.362084, avg_loss: 0.557309 002295/063150, loss: 0.456802, avg_loss: 0.557083 002300/063150, loss: 0.487686, avg_loss: 0.556850 002305/063150, loss: 0.195270, avg_loss: 0.556443 002310/063150, loss: 0.320818, avg_loss: 0.556251 002315/063150, loss: 0.297004, avg_loss: 0.555843 002320/063150, loss: 0.271812, avg_loss: 0.555569 002325/063150, loss: 0.534525, avg_loss: 0.555352 002330/063150, loss: 0.517542, avg_loss: 0.555115 002335/063150, loss: 0.236243, avg_loss: 0.554895 002340/063150, loss: 0.503995, avg_loss: 0.554608 002345/063150, loss: 0.534254, avg_loss: 0.554290 002350/063150, loss: 0.401686, avg_loss: 0.554065 002355/063150, loss: 0.400046, avg_loss: 0.553681 002360/063150, loss: 0.337936, avg_loss: 0.553515 002365/063150, loss: 0.360080, avg_loss: 0.553164 002370/063150, loss: 0.282468, avg_loss: 0.552726 002375/063150, loss: 0.511618, avg_loss: 0.552363 002380/063150, loss: 0.393131, avg_loss: 0.552091 002385/063150, loss: 0.312490, avg_loss: 0.551643 002390/063150, loss: 0.433064, avg_loss: 0.551285 002395/063150, loss: 0.403099, avg_loss: 0.550956 002400/063150, loss: 0.510655, avg_loss: 0.550642 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 2400/63150: {'accuracy': 0.8096330275229358} 002405/063150, loss: 0.359091, avg_loss: 0.550244 002410/063150, loss: 0.381485, avg_loss: 0.550031 002415/063150, loss: 0.355571, avg_loss: 0.549686 002420/063150, loss: 0.296953, avg_loss: 0.549320 002425/063150, loss: 0.291162, avg_loss: 0.548987 002430/063150, loss: 0.388165, avg_loss: 0.548686 002435/063150, loss: 0.431476, avg_loss: 0.548367 002440/063150, loss: 0.324961, avg_loss: 0.548020 002445/063150, loss: 0.248220, avg_loss: 0.547515 002450/063150, loss: 0.399220, avg_loss: 0.547195 002455/063150, loss: 0.300820, avg_loss: 0.546632 002460/063150, loss: 0.412463, avg_loss: 0.546405 002465/063150, loss: 0.343831, avg_loss: 0.546263 002470/063150, loss: 0.325924, avg_loss: 0.546023 002475/063150, loss: 0.294374, avg_loss: 0.545837 002480/063150, loss: 0.249588, avg_loss: 0.545485 002485/063150, loss: 0.373238, avg_loss: 0.545193 002490/063150, loss: 0.418461, avg_loss: 0.544885 002495/063150, loss: 0.604065, avg_loss: 0.544701 002500/063150, loss: 0.383909, avg_loss: 0.544381 002505/063150, loss: 0.553466, avg_loss: 0.544107 002510/063150, loss: 0.362100, avg_loss: 0.543708 002515/063150, loss: 0.401723, avg_loss: 0.543476 002520/063150, loss: 0.420690, avg_loss: 0.543165 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 2520/63150: {'accuracy': 0.8084862385321101} 002525/063150, loss: 0.322749, avg_loss: 0.542914 002530/063150, loss: 0.446834, avg_loss: 0.542709 002535/063150, loss: 0.247090, avg_loss: 0.542372 002540/063150, loss: 0.217799, avg_loss: 0.541945 002545/063150, loss: 0.416651, avg_loss: 0.541675 002550/063150, loss: 0.296513, avg_loss: 0.541274 002555/063150, loss: 0.326615, avg_loss: 0.540952 002560/063150, loss: 0.416657, avg_loss: 0.540617 002565/063150, loss: 0.353575, avg_loss: 0.540476 002570/063150, loss: 0.208168, avg_loss: 0.540278 002575/063150, loss: 0.510936, avg_loss: 0.539868 002580/063150, loss: 0.405255, avg_loss: 0.539554 002585/063150, loss: 0.572215, avg_loss: 0.539296 002590/063150, loss: 0.581574, avg_loss: 0.539011 002595/063150, loss: 0.320257, avg_loss: 0.538764 002600/063150, loss: 0.536942, avg_loss: 0.538634 002605/063150, loss: 0.352522, avg_loss: 0.538339 002610/063150, loss: 0.361156, avg_loss: 0.538046 002615/063150, loss: 0.265065, avg_loss: 0.537737 002620/063150, loss: 0.390367, avg_loss: 0.537376 002625/063150, loss: 0.417925, avg_loss: 0.537277 002630/063150, loss: 0.294712, avg_loss: 0.536958 002635/063150, loss: 0.376154, avg_loss: 0.536549 002640/063150, loss: 0.456254, avg_loss: 0.536370 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 2640/63150: {'accuracy': 0.819954128440367} 002645/063150, loss: 0.255370, avg_loss: 0.536081 002650/063150, loss: 0.320479, avg_loss: 0.535787 002655/063150, loss: 0.377732, avg_loss: 0.535443 002660/063150, loss: 0.252942, avg_loss: 0.535109 002665/063150, loss: 0.396996, avg_loss: 0.534829 002670/063150, loss: 0.362296, avg_loss: 0.534474 002675/063150, loss: 0.425987, avg_loss: 0.534196 002680/063150, loss: 0.378053, avg_loss: 0.533962 002685/063150, loss: 0.348865, avg_loss: 0.533647 002690/063150, loss: 0.442365, avg_loss: 0.533353 002695/063150, loss: 0.272227, avg_loss: 0.533050 002700/063150, loss: 0.342094, avg_loss: 0.532640 002705/063150, loss: 0.345610, avg_loss: 0.532358 002710/063150, loss: 0.360603, avg_loss: 0.532106 002715/063150, loss: 0.485757, avg_loss: 0.531911 002720/063150, loss: 0.406185, avg_loss: 0.531572 002725/063150, loss: 0.324953, avg_loss: 0.531308 002730/063150, loss: 0.280637, avg_loss: 0.531046 002735/063150, loss: 0.210205, avg_loss: 0.530774 002740/063150, loss: 0.203700, avg_loss: 0.530357 002745/063150, loss: 0.260636, avg_loss: 0.530014 002750/063150, loss: 0.429950, avg_loss: 0.529847 002755/063150, loss: 0.711760, avg_loss: 0.529698 002760/063150, loss: 0.410009, avg_loss: 0.529382 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 2760/63150: {'accuracy': 0.8142201834862385} 002765/063150, loss: 0.331034, avg_loss: 0.529057 002770/063150, loss: 0.261881, avg_loss: 0.528702 002775/063150, loss: 0.418627, avg_loss: 0.528421 002780/063150, loss: 0.285539, avg_loss: 0.528087 002785/063150, loss: 0.410718, avg_loss: 0.527839 002790/063150, loss: 0.275710, avg_loss: 0.527568 002795/063150, loss: 0.596097, avg_loss: 0.527231 002800/063150, loss: 0.462545, avg_loss: 0.526970 002805/063150, loss: 0.427958, avg_loss: 0.526764 002810/063150, loss: 0.438083, avg_loss: 0.526567 002815/063150, loss: 0.441787, avg_loss: 0.526307 002820/063150, loss: 0.442998, avg_loss: 0.526128 002825/063150, loss: 0.377224, avg_loss: 0.525880 002830/063150, loss: 0.289482, avg_loss: 0.525707 002835/063150, loss: 0.354323, avg_loss: 0.525438 002840/063150, loss: 0.356212, avg_loss: 0.525252 002845/063150, loss: 0.291039, avg_loss: 0.524944 002850/063150, loss: 0.384137, avg_loss: 0.524755 002855/063150, loss: 0.543154, avg_loss: 0.524567 002860/063150, loss: 0.318191, avg_loss: 0.524278 002865/063150, loss: 0.224320, avg_loss: 0.523993 002870/063150, loss: 0.429259, avg_loss: 0.523760 002875/063150, loss: 0.394798, avg_loss: 0.523539 002880/063150, loss: 0.289711, avg_loss: 0.523362 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 2880/63150: {'accuracy': 0.819954128440367} 002885/063150, loss: 0.423884, avg_loss: 0.523056 002890/063150, loss: 0.294128, avg_loss: 0.522815 002895/063150, loss: 0.219022, avg_loss: 0.522560 002900/063150, loss: 0.478649, avg_loss: 0.522309 002905/063150, loss: 0.404638, avg_loss: 0.522083 002910/063150, loss: 0.420214, avg_loss: 0.521777 002915/063150, loss: 0.392127, avg_loss: 0.521563 002920/063150, loss: 0.239749, avg_loss: 0.521218 002925/063150, loss: 0.458743, avg_loss: 0.520987 002930/063150, loss: 0.178244, avg_loss: 0.520637 002935/063150, loss: 0.507570, avg_loss: 0.520337 002940/063150, loss: 0.430135, avg_loss: 0.520116 002945/063150, loss: 0.311901, avg_loss: 0.519821 002950/063150, loss: 0.385186, avg_loss: 0.519534 002955/063150, loss: 0.530890, avg_loss: 0.519227 002960/063150, loss: 0.282289, avg_loss: 0.518936 002965/063150, loss: 0.295678, avg_loss: 0.518628 002970/063150, loss: 0.287114, avg_loss: 0.518313 002975/063150, loss: 0.177844, avg_loss: 0.517955 002980/063150, loss: 0.492239, avg_loss: 0.517671 002985/063150, loss: 0.366426, avg_loss: 0.517487 002990/063150, loss: 0.343064, avg_loss: 0.517248 002995/063150, loss: 0.319809, avg_loss: 0.516827 003000/063150, loss: 0.431824, avg_loss: 0.516586 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3000/63150: {'accuracy': 0.8245412844036697} 003005/063150, loss: 0.533326, avg_loss: 0.516348 003010/063150, loss: 0.299756, avg_loss: 0.516209 003015/063150, loss: 0.238364, avg_loss: 0.516007 003020/063150, loss: 0.465790, avg_loss: 0.515813 003025/063150, loss: 0.468388, avg_loss: 0.515642 003030/063150, loss: 0.334590, avg_loss: 0.515271 003035/063150, loss: 0.352698, avg_loss: 0.515002 003040/063150, loss: 0.394218, avg_loss: 0.514811 003045/063150, loss: 0.400462, avg_loss: 0.514603 003050/063150, loss: 0.174596, avg_loss: 0.514233 003055/063150, loss: 0.380684, avg_loss: 0.513951 003060/063150, loss: 0.443890, avg_loss: 0.513707 003065/063150, loss: 0.350919, avg_loss: 0.513352 003070/063150, loss: 0.367730, avg_loss: 0.513119 003075/063150, loss: 0.303897, avg_loss: 0.512880 003080/063150, loss: 0.339126, avg_loss: 0.512622 003085/063150, loss: 0.172532, avg_loss: 0.512337 003090/063150, loss: 0.296662, avg_loss: 0.511963 003095/063150, loss: 0.265311, avg_loss: 0.511643 003100/063150, loss: 0.547116, avg_loss: 0.511435 003105/063150, loss: 0.486903, avg_loss: 0.511267 003110/063150, loss: 0.357828, avg_loss: 0.511004 003115/063150, loss: 0.288122, avg_loss: 0.510716 003120/063150, loss: 0.169984, avg_loss: 0.510339 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3120/63150: {'accuracy': 0.8291284403669725} 003125/063150, loss: 0.391413, avg_loss: 0.510143 003130/063150, loss: 0.410817, avg_loss: 0.509878 003135/063150, loss: 0.227384, avg_loss: 0.509497 003140/063150, loss: 0.245751, avg_loss: 0.509240 003145/063150, loss: 0.329388, avg_loss: 0.509190 003150/063150, loss: 0.517729, avg_loss: 0.508962 003155/063150, loss: 0.410280, avg_loss: 0.508690 003160/063150, loss: 0.255345, avg_loss: 0.508490 003165/063150, loss: 0.467873, avg_loss: 0.508244 003170/063150, loss: 0.609531, avg_loss: 0.508100 003175/063150, loss: 0.207893, avg_loss: 0.507832 003180/063150, loss: 0.461324, avg_loss: 0.507690 003185/063150, loss: 0.293929, avg_loss: 0.507442 003190/063150, loss: 0.342324, avg_loss: 0.507173 003195/063150, loss: 0.364676, avg_loss: 0.506903 003200/063150, loss: 0.273666, avg_loss: 0.506640 003205/063150, loss: 0.239925, avg_loss: 0.506290 003210/063150, loss: 0.255106, avg_loss: 0.506002 003215/063150, loss: 0.288060, avg_loss: 0.505724 003220/063150, loss: 0.405391, avg_loss: 0.505464 003225/063150, loss: 0.269861, avg_loss: 0.505234 003230/063150, loss: 0.193510, avg_loss: 0.504942 003235/063150, loss: 0.433163, avg_loss: 0.504727 003240/063150, loss: 0.211067, avg_loss: 0.504363 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3240/63150: {'accuracy': 0.8291284403669725} 003245/063150, loss: 0.484763, avg_loss: 0.504067 003250/063150, loss: 0.291922, avg_loss: 0.503777 003255/063150, loss: 0.494730, avg_loss: 0.503512 003260/063150, loss: 0.255081, avg_loss: 0.503354 003265/063150, loss: 0.481848, avg_loss: 0.503102 003270/063150, loss: 0.417441, avg_loss: 0.502899 003275/063150, loss: 0.200572, avg_loss: 0.502584 003280/063150, loss: 0.168463, avg_loss: 0.502487 003285/063150, loss: 0.210876, avg_loss: 0.502319 003290/063150, loss: 0.287843, avg_loss: 0.501946 003295/063150, loss: 0.181558, avg_loss: 0.501571 003300/063150, loss: 0.472590, avg_loss: 0.501398 003305/063150, loss: 0.146369, avg_loss: 0.501337 003310/063150, loss: 0.283992, avg_loss: 0.501068 003315/063150, loss: 0.236916, avg_loss: 0.500815 003320/063150, loss: 0.384485, avg_loss: 0.500522 003325/063150, loss: 0.403200, avg_loss: 0.500360 003330/063150, loss: 0.283864, avg_loss: 0.500092 003335/063150, loss: 0.410497, avg_loss: 0.499943 003340/063150, loss: 0.318582, avg_loss: 0.499752 003345/063150, loss: 0.466494, avg_loss: 0.499531 003350/063150, loss: 0.343706, avg_loss: 0.499330 003355/063150, loss: 0.322908, avg_loss: 0.499076 003360/063150, loss: 0.609332, avg_loss: 0.498971 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3360/63150: {'accuracy': 0.8176605504587156} 003365/063150, loss: 0.348086, avg_loss: 0.498690 003370/063150, loss: 0.363859, avg_loss: 0.498392 003375/063150, loss: 0.370918, avg_loss: 0.498175 003380/063150, loss: 0.189442, avg_loss: 0.497851 003385/063150, loss: 0.250529, avg_loss: 0.497506 003390/063150, loss: 0.350957, avg_loss: 0.497272 003395/063150, loss: 0.291620, avg_loss: 0.497034 003400/063150, loss: 0.283865, avg_loss: 0.496783 003405/063150, loss: 0.355665, avg_loss: 0.496519 003410/063150, loss: 0.156031, avg_loss: 0.496212 003415/063150, loss: 0.326745, avg_loss: 0.495975 003420/063150, loss: 0.273052, avg_loss: 0.495651 003425/063150, loss: 0.648556, avg_loss: 0.495533 003430/063150, loss: 0.430986, avg_loss: 0.495275 003435/063150, loss: 0.338636, avg_loss: 0.494997 003440/063150, loss: 0.266915, avg_loss: 0.494701 003445/063150, loss: 0.241296, avg_loss: 0.494522 003450/063150, loss: 0.416952, avg_loss: 0.494355 003455/063150, loss: 0.210606, avg_loss: 0.494159 003460/063150, loss: 0.300003, avg_loss: 0.493918 003465/063150, loss: 0.188219, avg_loss: 0.493744 003470/063150, loss: 0.333949, avg_loss: 0.493513 003475/063150, loss: 0.575965, avg_loss: 0.493269 003480/063150, loss: 0.398163, avg_loss: 0.493014 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3480/63150: {'accuracy': 0.8245412844036697} 003485/063150, loss: 0.329946, avg_loss: 0.492715 003490/063150, loss: 0.276797, avg_loss: 0.492574 003495/063150, loss: 0.271636, avg_loss: 0.492447 003500/063150, loss: 0.318183, avg_loss: 0.492189 003505/063150, loss: 0.287863, avg_loss: 0.491960 003510/063150, loss: 0.241224, avg_loss: 0.491692 003515/063150, loss: 0.248229, avg_loss: 0.491466 003520/063150, loss: 0.203357, avg_loss: 0.491085 003525/063150, loss: 0.334120, avg_loss: 0.490832 003530/063150, loss: 0.816127, avg_loss: 0.490780 003535/063150, loss: 0.374384, avg_loss: 0.490629 003540/063150, loss: 0.251941, avg_loss: 0.490391 003545/063150, loss: 0.275866, avg_loss: 0.490227 003550/063150, loss: 0.276895, avg_loss: 0.489930 003555/063150, loss: 0.383942, avg_loss: 0.489838 003560/063150, loss: 0.227589, avg_loss: 0.489613 003565/063150, loss: 0.407125, avg_loss: 0.489610 003570/063150, loss: 0.258336, avg_loss: 0.489305 003575/063150, loss: 0.380267, avg_loss: 0.489088 003580/063150, loss: 0.297157, avg_loss: 0.488990 003585/063150, loss: 0.397631, avg_loss: 0.488827 003590/063150, loss: 0.203452, avg_loss: 0.488615 003595/063150, loss: 0.439270, avg_loss: 0.488440 003600/063150, loss: 0.205019, avg_loss: 0.488162 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3600/63150: {'accuracy': 0.8084862385321101} 003605/063150, loss: 0.189155, avg_loss: 0.487951 003610/063150, loss: 0.229313, avg_loss: 0.487661 003615/063150, loss: 0.370952, avg_loss: 0.487494 003620/063150, loss: 0.205126, avg_loss: 0.487224 003625/063150, loss: 0.437848, avg_loss: 0.487051 003630/063150, loss: 0.383554, avg_loss: 0.486768 003635/063150, loss: 0.427163, avg_loss: 0.486577 003640/063150, loss: 0.425560, avg_loss: 0.486343 003645/063150, loss: 0.226355, avg_loss: 0.486214 003650/063150, loss: 0.568268, avg_loss: 0.486013 003655/063150, loss: 0.340532, avg_loss: 0.485834 003660/063150, loss: 0.338089, avg_loss: 0.485783 003665/063150, loss: 0.175585, avg_loss: 0.485615 003670/063150, loss: 0.401298, avg_loss: 0.485494 003675/063150, loss: 0.354530, avg_loss: 0.485397 003680/063150, loss: 0.398257, avg_loss: 0.485189 003685/063150, loss: 0.413964, avg_loss: 0.484986 003690/063150, loss: 0.495796, avg_loss: 0.484827 003695/063150, loss: 0.500072, avg_loss: 0.484619 003700/063150, loss: 0.432035, avg_loss: 0.484414 003705/063150, loss: 0.179378, avg_loss: 0.484087 003710/063150, loss: 0.238655, avg_loss: 0.483893 003715/063150, loss: 0.514123, avg_loss: 0.483672 003720/063150, loss: 0.344935, avg_loss: 0.483506 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3720/63150: {'accuracy': 0.8279816513761468} 003725/063150, loss: 0.218731, avg_loss: 0.483263 003730/063150, loss: 0.280191, avg_loss: 0.483006 003735/063150, loss: 0.286088, avg_loss: 0.482821 003740/063150, loss: 0.322779, avg_loss: 0.482655 003745/063150, loss: 0.337575, avg_loss: 0.482444 003750/063150, loss: 0.224053, avg_loss: 0.482281 003755/063150, loss: 0.178751, avg_loss: 0.481987 003760/063150, loss: 0.198184, avg_loss: 0.481745 003765/063150, loss: 0.360390, avg_loss: 0.481527 003770/063150, loss: 0.153401, avg_loss: 0.481235 003775/063150, loss: 0.308784, avg_loss: 0.480957 003780/063150, loss: 0.147440, avg_loss: 0.480649 003785/063150, loss: 0.244711, avg_loss: 0.480516 003790/063150, loss: 0.159410, avg_loss: 0.480247 003795/063150, loss: 0.300179, avg_loss: 0.480061 003800/063150, loss: 0.349719, avg_loss: 0.479742 003805/063150, loss: 0.333127, avg_loss: 0.479573 003810/063150, loss: 0.512503, avg_loss: 0.479473 003815/063150, loss: 0.316065, avg_loss: 0.479315 003820/063150, loss: 0.394462, avg_loss: 0.479157 003825/063150, loss: 0.229573, avg_loss: 0.478914 003830/063150, loss: 0.257653, avg_loss: 0.478606 003835/063150, loss: 0.181622, avg_loss: 0.478397 003840/063150, loss: 0.276539, avg_loss: 0.478220 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3840/63150: {'accuracy': 0.823394495412844} 003845/063150, loss: 0.205788, avg_loss: 0.478044 003850/063150, loss: 0.269425, avg_loss: 0.477838 003855/063150, loss: 0.313539, avg_loss: 0.477612 003860/063150, loss: 0.481410, avg_loss: 0.477505 003865/063150, loss: 0.326335, avg_loss: 0.477307 003870/063150, loss: 0.277892, avg_loss: 0.477151 003875/063150, loss: 0.485127, avg_loss: 0.476976 003880/063150, loss: 0.130770, avg_loss: 0.476737 003885/063150, loss: 0.357989, avg_loss: 0.476603 003890/063150, loss: 0.414372, avg_loss: 0.476369 003895/063150, loss: 0.511294, avg_loss: 0.476176 003900/063150, loss: 0.281438, avg_loss: 0.475922 003905/063150, loss: 0.522367, avg_loss: 0.475730 003910/063150, loss: 0.193221, avg_loss: 0.475454 003915/063150, loss: 0.312487, avg_loss: 0.475274 003920/063150, loss: 0.246824, avg_loss: 0.475143 003925/063150, loss: 0.424371, avg_loss: 0.474956 003930/063150, loss: 0.191638, avg_loss: 0.474817 003935/063150, loss: 0.328832, avg_loss: 0.474663 003940/063150, loss: 0.167447, avg_loss: 0.474381 003945/063150, loss: 0.192950, avg_loss: 0.474199 003950/063150, loss: 0.143713, avg_loss: 0.474006 003955/063150, loss: 0.312656, avg_loss: 0.473838 003960/063150, loss: 0.180376, avg_loss: 0.473684 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 3960/63150: {'accuracy': 0.8142201834862385} 003965/063150, loss: 0.245379, avg_loss: 0.473425 003970/063150, loss: 0.414509, avg_loss: 0.473334 003975/063150, loss: 0.573777, avg_loss: 0.473283 003980/063150, loss: 0.431763, avg_loss: 0.473142 003985/063150, loss: 0.336310, avg_loss: 0.472953 003990/063150, loss: 0.506686, avg_loss: 0.472871 003995/063150, loss: 0.242396, avg_loss: 0.472633 004000/063150, loss: 0.257115, avg_loss: 0.472442 004005/063150, loss: 0.308776, avg_loss: 0.472229 004010/063150, loss: 0.200311, avg_loss: 0.472067 004015/063150, loss: 0.298820, avg_loss: 0.471853 004020/063150, loss: 0.227573, avg_loss: 0.471579 004025/063150, loss: 0.319509, avg_loss: 0.471440 004030/063150, loss: 0.606248, avg_loss: 0.471350 004035/063150, loss: 0.578137, avg_loss: 0.471285 004040/063150, loss: 0.344481, avg_loss: 0.471059 004045/063150, loss: 0.162110, avg_loss: 0.470830 004050/063150, loss: 0.299887, avg_loss: 0.470668 004055/063150, loss: 0.321251, avg_loss: 0.470464 004060/063150, loss: 0.298908, avg_loss: 0.470328 004065/063150, loss: 0.316801, avg_loss: 0.470166 004070/063150, loss: 0.286965, avg_loss: 0.470026 004075/063150, loss: 0.245791, avg_loss: 0.469788 004080/063150, loss: 0.433150, avg_loss: 0.469643 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 4080/63150: {'accuracy': 0.8188073394495413} 004085/063150, loss: 0.402995, avg_loss: 0.469398 004090/063150, loss: 0.283430, avg_loss: 0.469245 004095/063150, loss: 0.270191, avg_loss: 0.469029 004100/063150, loss: 0.261287, avg_loss: 0.468912 004105/063150, loss: 0.254643, avg_loss: 0.468666 004110/063150, loss: 0.412092, avg_loss: 0.468509 004115/063150, loss: 0.427600, avg_loss: 0.468354 004120/063150, loss: 0.224203, avg_loss: 0.468168 004125/063150, loss: 0.184526, avg_loss: 0.467871 004130/063150, loss: 0.336193, avg_loss: 0.467726 004135/063150, loss: 0.524905, avg_loss: 0.467570 004140/063150, loss: 0.208997, avg_loss: 0.467373 004145/063150, loss: 0.213856, avg_loss: 0.467190 004150/063150, loss: 0.441607, avg_loss: 0.467003 004155/063150, loss: 0.515599, avg_loss: 0.466883 004160/063150, loss: 0.283904, avg_loss: 0.466775 004165/063150, loss: 0.197917, avg_loss: 0.466519 004170/063150, loss: 0.310240, avg_loss: 0.466277 004175/063150, loss: 0.189847, avg_loss: 0.466058 004180/063150, loss: 0.162403, avg_loss: 0.465853 004185/063150, loss: 0.368976, avg_loss: 0.465715 004190/063150, loss: 0.149773, avg_loss: 0.465442 004195/063150, loss: 0.170256, avg_loss: 0.465309 004200/063150, loss: 0.327217, avg_loss: 0.465107 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 1, step 4200/63150: {'accuracy': 0.8279816513761468} 004205/063150, loss: 0.534513, avg_loss: 0.464948 004210/063150, loss: 0.280758, avg_loss: 0.464745 004215/063150, loss: 0.419127, avg_loss: 0.464535 004220/063150, loss: 0.263336, avg_loss: 0.464373 004225/063150, loss: 0.216963, avg_loss: 0.464180 004230/063150, loss: 0.202848, avg_loss: 0.463952 004235/063150, loss: 0.252699, avg_loss: 0.463746 004240/063150, loss: 0.280739, avg_loss: 0.463539 004245/063150, loss: 0.243150, avg_loss: 0.463346 004250/063150, loss: 0.222552, avg_loss: 0.463098 004255/063150, loss: 0.158287, avg_loss: 0.462887 004260/063150, loss: 0.394647, avg_loss: 0.462672 004265/063150, loss: 0.295818, avg_loss: 0.462479 004270/063150, loss: 0.453301, avg_loss: 0.462292 004275/063150, loss: 0.273588, avg_loss: 0.462118 004280/063150, loss: 0.347643, avg_loss: 0.461983 004285/063150, loss: 0.333519, avg_loss: 0.461748 004290/063150, loss: 0.097076, avg_loss: 0.461539 004295/063150, loss: 0.366240, avg_loss: 0.461389 004300/063150, loss: 0.298476, avg_loss: 0.461151 004305/063150, loss: 0.467995, avg_loss: 0.460991 004310/063150, loss: 0.200606, avg_loss: 0.460794 004315/063150, loss: 0.245891, avg_loss: 0.460592 004320/063150, loss: 0.358560, avg_loss: 0.460404 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 4320/63150: {'accuracy': 0.8302752293577982} 004325/063150, loss: 0.204014, avg_loss: 0.460226 004330/063150, loss: 0.197800, avg_loss: 0.460075 004335/063150, loss: 0.227778, avg_loss: 0.459917 004340/063150, loss: 0.349984, avg_loss: 0.459709 004345/063150, loss: 0.303346, avg_loss: 0.459513 004350/063150, loss: 0.409578, avg_loss: 0.459373 004355/063150, loss: 0.295931, avg_loss: 0.459159 004360/063150, loss: 0.304503, avg_loss: 0.458994 004365/063150, loss: 0.150120, avg_loss: 0.458815 004370/063150, loss: 0.402723, avg_loss: 0.458620 004375/063150, loss: 0.281013, avg_loss: 0.458396 004380/063150, loss: 0.358963, avg_loss: 0.458200 004385/063150, loss: 0.284151, avg_loss: 0.458011 004390/063150, loss: 0.591740, avg_loss: 0.457902 004395/063150, loss: 0.235476, avg_loss: 0.457620 004400/063150, loss: 0.387758, avg_loss: 0.457485 004405/063150, loss: 0.532757, avg_loss: 0.457315 004410/063150, loss: 0.239686, avg_loss: 0.457112 004415/063150, loss: 0.276296, avg_loss: 0.456937 004420/063150, loss: 0.229210, avg_loss: 0.456741 004425/063150, loss: 0.433873, avg_loss: 0.456632 004430/063150, loss: 0.210645, avg_loss: 0.456422 004435/063150, loss: 0.238904, avg_loss: 0.456262 004440/063150, loss: 0.187905, avg_loss: 0.456049 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 4440/63150: {'accuracy': 0.8371559633027523} 004445/063150, loss: 0.487801, avg_loss: 0.455872 004450/063150, loss: 0.245235, avg_loss: 0.455642 004455/063150, loss: 0.208553, avg_loss: 0.455426 004460/063150, loss: 0.215836, avg_loss: 0.455196 004465/063150, loss: 0.178831, avg_loss: 0.455039 004470/063150, loss: 0.450658, avg_loss: 0.454869 004475/063150, loss: 0.105808, avg_loss: 0.454571 004480/063150, loss: 0.325373, avg_loss: 0.454372 004485/063150, loss: 0.287978, avg_loss: 0.454114 004490/063150, loss: 0.241827, avg_loss: 0.453863 004495/063150, loss: 0.312952, avg_loss: 0.453664 004500/063150, loss: 0.278380, avg_loss: 0.453492 004505/063150, loss: 0.210514, avg_loss: 0.453240 004510/063150, loss: 0.364019, avg_loss: 0.453080 004515/063150, loss: 0.503646, avg_loss: 0.453042 004520/063150, loss: 0.317447, avg_loss: 0.452801 004525/063150, loss: 0.251832, avg_loss: 0.452619 004530/063150, loss: 0.153919, avg_loss: 0.452398 004535/063150, loss: 0.239300, avg_loss: 0.452199 004540/063150, loss: 0.300717, avg_loss: 0.451941 004545/063150, loss: 0.258976, avg_loss: 0.451722 004550/063150, loss: 0.370137, avg_loss: 0.451493 004555/063150, loss: 0.324233, avg_loss: 0.451366 004560/063150, loss: 0.174304, avg_loss: 0.451143 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 4560/63150: {'accuracy': 0.8027522935779816} 004565/063150, loss: 0.263388, avg_loss: 0.450991 004570/063150, loss: 0.153013, avg_loss: 0.450785 004575/063150, loss: 0.285290, avg_loss: 0.450510 004580/063150, loss: 0.049658, avg_loss: 0.450311 004585/063150, loss: 0.401840, avg_loss: 0.450169 004590/063150, loss: 0.418982, avg_loss: 0.450130 004595/063150, loss: 0.198632, avg_loss: 0.449935 004600/063150, loss: 0.458129, avg_loss: 0.449748 004605/063150, loss: 0.417927, avg_loss: 0.449541 004610/063150, loss: 0.203784, avg_loss: 0.449399 004615/063150, loss: 0.186797, avg_loss: 0.449177 004620/063150, loss: 0.146794, avg_loss: 0.449031 004625/063150, loss: 0.146629, avg_loss: 0.448768 004630/063150, loss: 0.333879, avg_loss: 0.448581 004635/063150, loss: 0.261697, avg_loss: 0.448384 004640/063150, loss: 0.289962, avg_loss: 0.448185 004645/063150, loss: 0.289506, avg_loss: 0.447938 004650/063150, loss: 0.339763, avg_loss: 0.447763 004655/063150, loss: 0.183156, avg_loss: 0.447539 004660/063150, loss: 0.454979, avg_loss: 0.447429 004665/063150, loss: 0.425277, avg_loss: 0.447261 004670/063150, loss: 0.166446, avg_loss: 0.447027 004675/063150, loss: 0.427591, avg_loss: 0.446802 004680/063150, loss: 0.432299, avg_loss: 0.446649 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 4680/63150: {'accuracy': 0.8394495412844036} 004685/063150, loss: 0.238948, avg_loss: 0.446484 004690/063150, loss: 0.627134, avg_loss: 0.446360 004695/063150, loss: 0.271760, avg_loss: 0.446131 004700/063150, loss: 0.209785, avg_loss: 0.445915 004705/063150, loss: 0.203467, avg_loss: 0.445696 004710/063150, loss: 0.474783, avg_loss: 0.445573 004715/063150, loss: 0.420667, avg_loss: 0.445427 004720/063150, loss: 0.195672, avg_loss: 0.445222 004725/063150, loss: 0.321105, avg_loss: 0.445016 004730/063150, loss: 0.539818, avg_loss: 0.444896 004735/063150, loss: 0.223064, avg_loss: 0.444749 004740/063150, loss: 0.395136, avg_loss: 0.444562 004745/063150, loss: 0.198465, avg_loss: 0.444423 004750/063150, loss: 0.278540, avg_loss: 0.444251 004755/063150, loss: 0.349364, avg_loss: 0.444091 004760/063150, loss: 0.229996, avg_loss: 0.443912 004765/063150, loss: 0.360927, avg_loss: 0.443755 004770/063150, loss: 0.201597, avg_loss: 0.443511 004775/063150, loss: 0.275852, avg_loss: 0.443293 004780/063150, loss: 0.365218, avg_loss: 0.443081 004785/063150, loss: 0.312531, avg_loss: 0.442895 004790/063150, loss: 0.205163, avg_loss: 0.442677 004795/063150, loss: 0.198468, avg_loss: 0.442459 004800/063150, loss: 0.316452, avg_loss: 0.442294 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 4800/63150: {'accuracy': 0.8428899082568807} 004805/063150, loss: 0.199581, avg_loss: 0.442073 004810/063150, loss: 0.278382, avg_loss: 0.441916 004815/063150, loss: 0.286555, avg_loss: 0.441730 004820/063150, loss: 0.227525, avg_loss: 0.441465 004825/063150, loss: 0.103403, avg_loss: 0.441270 004830/063150, loss: 0.153055, avg_loss: 0.441054 004835/063150, loss: 0.270689, avg_loss: 0.440880 004840/063150, loss: 0.230690, avg_loss: 0.440697 004845/063150, loss: 0.290590, avg_loss: 0.440567 004850/063150, loss: 0.168618, avg_loss: 0.440329 004855/063150, loss: 0.341406, avg_loss: 0.440188 004860/063150, loss: 0.285294, avg_loss: 0.440123 004865/063150, loss: 0.309046, avg_loss: 0.439895 004870/063150, loss: 0.381336, avg_loss: 0.439703 004875/063150, loss: 0.290316, avg_loss: 0.439567 004880/063150, loss: 0.149874, avg_loss: 0.439405 004885/063150, loss: 0.328731, avg_loss: 0.439294 004890/063150, loss: 0.154876, avg_loss: 0.439079 004895/063150, loss: 0.409986, avg_loss: 0.438900 004900/063150, loss: 0.389197, avg_loss: 0.438775 004905/063150, loss: 0.285581, avg_loss: 0.438648 004910/063150, loss: 0.250653, avg_loss: 0.438452 004915/063150, loss: 0.210221, avg_loss: 0.438304 004920/063150, loss: 0.244220, avg_loss: 0.438145 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 4920/63150: {'accuracy': 0.841743119266055} 004925/063150, loss: 0.216394, avg_loss: 0.437980 004930/063150, loss: 0.448297, avg_loss: 0.437806 004935/063150, loss: 0.372978, avg_loss: 0.437691 004940/063150, loss: 0.189112, avg_loss: 0.437556 004945/063150, loss: 0.400141, avg_loss: 0.437408 004950/063150, loss: 0.246144, avg_loss: 0.437198 004955/063150, loss: 0.415682, avg_loss: 0.437082 004960/063150, loss: 0.267845, avg_loss: 0.436933 004965/063150, loss: 0.191768, avg_loss: 0.436742 004970/063150, loss: 0.292821, avg_loss: 0.436566 004975/063150, loss: 0.302429, avg_loss: 0.436411 004980/063150, loss: 0.297605, avg_loss: 0.436181 004985/063150, loss: 0.461785, avg_loss: 0.435991 004990/063150, loss: 0.198792, avg_loss: 0.435716 004995/063150, loss: 0.503127, avg_loss: 0.435565 005000/063150, loss: 0.429058, avg_loss: 0.435420 005005/063150, loss: 0.300414, avg_loss: 0.435222 005010/063150, loss: 0.288963, avg_loss: 0.435010 005015/063150, loss: 0.238046, avg_loss: 0.434812 005020/063150, loss: 0.263563, avg_loss: 0.434594 005025/063150, loss: 0.256921, avg_loss: 0.434476 005030/063150, loss: 0.147477, avg_loss: 0.434275 005035/063150, loss: 0.394888, avg_loss: 0.434118 005040/063150, loss: 0.294512, avg_loss: 0.433977 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 5040/63150: {'accuracy': 0.8474770642201835} 005045/063150, loss: 0.286427, avg_loss: 0.433767 005050/063150, loss: 0.365111, avg_loss: 0.433589 005055/063150, loss: 0.264320, avg_loss: 0.433527 005060/063150, loss: 0.165543, avg_loss: 0.433324 005065/063150, loss: 0.223556, avg_loss: 0.433158 005070/063150, loss: 0.285883, avg_loss: 0.432927 005075/063150, loss: 0.114102, avg_loss: 0.432806 005080/063150, loss: 0.281807, avg_loss: 0.432717 005085/063150, loss: 0.189452, avg_loss: 0.432565 005090/063150, loss: 0.424987, avg_loss: 0.432412 005095/063150, loss: 0.285115, avg_loss: 0.432226 005100/063150, loss: 0.424671, avg_loss: 0.432093 005105/063150, loss: 0.252718, avg_loss: 0.431929 005110/063150, loss: 0.233115, avg_loss: 0.431757 005115/063150, loss: 0.284833, avg_loss: 0.431530 005120/063150, loss: 0.302364, avg_loss: 0.431387 005125/063150, loss: 0.226655, avg_loss: 0.431239 005130/063150, loss: 0.587139, avg_loss: 0.431109 005135/063150, loss: 0.137232, avg_loss: 0.430907 005140/063150, loss: 0.383301, avg_loss: 0.430851 005145/063150, loss: 0.310483, avg_loss: 0.430627 005150/063150, loss: 0.170012, avg_loss: 0.430463 005155/063150, loss: 0.165516, avg_loss: 0.430218 005160/063150, loss: 0.222842, avg_loss: 0.430030 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 5160/63150: {'accuracy': 0.8463302752293578} 005165/063150, loss: 0.238556, avg_loss: 0.429955 005170/063150, loss: 0.465534, avg_loss: 0.429805 005175/063150, loss: 0.270709, avg_loss: 0.429676 005180/063150, loss: 0.161318, avg_loss: 0.429460 005185/063150, loss: 0.235425, avg_loss: 0.429273 005190/063150, loss: 0.214050, avg_loss: 0.429125 005195/063150, loss: 0.371211, avg_loss: 0.429032 005200/063150, loss: 0.305828, avg_loss: 0.428797 005205/063150, loss: 0.262175, avg_loss: 0.428669 005210/063150, loss: 0.165639, avg_loss: 0.428471 005215/063150, loss: 0.287391, avg_loss: 0.428338 005220/063150, loss: 0.165492, avg_loss: 0.428091 005225/063150, loss: 0.211443, avg_loss: 0.427916 005230/063150, loss: 0.269998, avg_loss: 0.427763 005235/063150, loss: 0.255843, avg_loss: 0.427645 005240/063150, loss: 0.229471, avg_loss: 0.427557 005245/063150, loss: 0.088537, avg_loss: 0.427369 005250/063150, loss: 0.264156, avg_loss: 0.427197 005255/063150, loss: 0.191877, avg_loss: 0.426997 005260/063150, loss: 0.126016, avg_loss: 0.426827 005265/063150, loss: 0.335275, avg_loss: 0.426693 005270/063150, loss: 0.229536, avg_loss: 0.426524 005275/063150, loss: 0.235785, avg_loss: 0.426385 005280/063150, loss: 0.276160, avg_loss: 0.426165 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 5280/63150: {'accuracy': 0.8325688073394495} 005285/063150, loss: 0.280916, avg_loss: 0.425973 005290/063150, loss: 0.270638, avg_loss: 0.425829 005295/063150, loss: 0.174957, avg_loss: 0.425669 005300/063150, loss: 0.116287, avg_loss: 0.425525 005305/063150, loss: 0.313111, avg_loss: 0.425400 005310/063150, loss: 0.334110, avg_loss: 0.425252 005315/063150, loss: 0.349562, avg_loss: 0.425126 005320/063150, loss: 0.283849, avg_loss: 0.425032 005325/063150, loss: 0.204222, avg_loss: 0.424851 005330/063150, loss: 0.261867, avg_loss: 0.424644 005335/063150, loss: 0.343752, avg_loss: 0.424509 005340/063150, loss: 0.355949, avg_loss: 0.424404 005345/063150, loss: 0.262267, avg_loss: 0.424234 005350/063150, loss: 0.245239, avg_loss: 0.424023 005355/063150, loss: 0.124010, avg_loss: 0.423851 005360/063150, loss: 0.318031, avg_loss: 0.423722 005365/063150, loss: 0.179093, avg_loss: 0.423529 005370/063150, loss: 0.369743, avg_loss: 0.423343 005375/063150, loss: 0.411769, avg_loss: 0.423239 005380/063150, loss: 0.172745, avg_loss: 0.423072 005385/063150, loss: 0.395810, avg_loss: 0.422913 005390/063150, loss: 0.281775, avg_loss: 0.422846 005395/063150, loss: 0.159977, avg_loss: 0.422638 005400/063150, loss: 0.245816, avg_loss: 0.422545 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 5400/63150: {'accuracy': 0.8474770642201835} 005405/063150, loss: 0.222701, avg_loss: 0.422430 005410/063150, loss: 0.411298, avg_loss: 0.422258 005415/063150, loss: 0.189946, avg_loss: 0.422122 005420/063150, loss: 0.125218, avg_loss: 0.421945 005425/063150, loss: 0.296084, avg_loss: 0.421884 005430/063150, loss: 0.181411, avg_loss: 0.421677 005435/063150, loss: 0.342984, avg_loss: 0.421520 005440/063150, loss: 0.138109, avg_loss: 0.421338 005445/063150, loss: 0.298496, avg_loss: 0.421234 005450/063150, loss: 0.126139, avg_loss: 0.421085 005455/063150, loss: 0.335512, avg_loss: 0.420977 005460/063150, loss: 0.315063, avg_loss: 0.420867 005465/063150, loss: 0.271297, avg_loss: 0.420714 005470/063150, loss: 0.173331, avg_loss: 0.420556 005475/063150, loss: 0.132915, avg_loss: 0.420399 005480/063150, loss: 0.444826, avg_loss: 0.420278 005485/063150, loss: 0.247393, avg_loss: 0.420095 005490/063150, loss: 0.144030, avg_loss: 0.419938 005495/063150, loss: 0.251728, avg_loss: 0.419799 005500/063150, loss: 0.282356, avg_loss: 0.419636 005505/063150, loss: 0.526442, avg_loss: 0.419553 005510/063150, loss: 0.246073, avg_loss: 0.419412 005515/063150, loss: 0.308111, avg_loss: 0.419253 005520/063150, loss: 0.310621, avg_loss: 0.419104 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 5520/63150: {'accuracy': 0.8486238532110092} 005525/063150, loss: 0.310869, avg_loss: 0.418925 005530/063150, loss: 0.256039, avg_loss: 0.418742 005535/063150, loss: 0.245214, avg_loss: 0.418653 005540/063150, loss: 0.375845, avg_loss: 0.418556 005545/063150, loss: 0.386721, avg_loss: 0.418405 005550/063150, loss: 0.076456, avg_loss: 0.418188 005555/063150, loss: 0.301503, avg_loss: 0.418126 005560/063150, loss: 0.111197, avg_loss: 0.417964 005565/063150, loss: 0.360656, avg_loss: 0.417825 005570/063150, loss: 0.236154, avg_loss: 0.417703 005575/063150, loss: 0.489955, avg_loss: 0.417533 005580/063150, loss: 0.419160, avg_loss: 0.417395 005585/063150, loss: 0.252846, avg_loss: 0.417191 005590/063150, loss: 0.378713, avg_loss: 0.417089 005595/063150, loss: 0.242426, avg_loss: 0.416918 005600/063150, loss: 0.101006, avg_loss: 0.416771 005605/063150, loss: 0.310748, avg_loss: 0.416665 005610/063150, loss: 0.314502, avg_loss: 0.416532 005615/063150, loss: 0.134294, avg_loss: 0.416373 005620/063150, loss: 0.111358, avg_loss: 0.416124 005625/063150, loss: 0.200313, avg_loss: 0.415975 005630/063150, loss: 0.152119, avg_loss: 0.415853 005635/063150, loss: 0.359868, avg_loss: 0.415691 005640/063150, loss: 0.263532, avg_loss: 0.415543 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 5640/63150: {'accuracy': 0.8509174311926605} 005645/063150, loss: 0.255680, avg_loss: 0.415401 005650/063150, loss: 0.219280, avg_loss: 0.415199 005655/063150, loss: 0.137261, avg_loss: 0.415025 005660/063150, loss: 0.106693, avg_loss: 0.414909 005665/063150, loss: 0.247748, avg_loss: 0.414795 005670/063150, loss: 0.222624, avg_loss: 0.414654 005675/063150, loss: 0.142155, avg_loss: 0.414477 005680/063150, loss: 0.356671, avg_loss: 0.414298 005685/063150, loss: 0.185805, avg_loss: 0.414131 005690/063150, loss: 0.119556, avg_loss: 0.413944 005695/063150, loss: 0.093402, avg_loss: 0.413751 005700/063150, loss: 0.103428, avg_loss: 0.413550 005705/063150, loss: 0.191362, avg_loss: 0.413359 005710/063150, loss: 0.354794, avg_loss: 0.413163 005715/063150, loss: 0.263408, avg_loss: 0.413012 005720/063150, loss: 0.077534, avg_loss: 0.412811 005725/063150, loss: 0.080398, avg_loss: 0.412594 005730/063150, loss: 0.265768, avg_loss: 0.412435 005735/063150, loss: 0.271794, avg_loss: 0.412280 005740/063150, loss: 0.082713, avg_loss: 0.412085 005745/063150, loss: 0.280030, avg_loss: 0.411907 005750/063150, loss: 0.088473, avg_loss: 0.411682 005755/063150, loss: 0.081587, avg_loss: 0.411499 005760/063150, loss: 0.171882, avg_loss: 0.411279 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 5760/63150: {'accuracy': 0.8474770642201835} 005765/063150, loss: 0.416375, avg_loss: 0.411165 005770/063150, loss: 0.269925, avg_loss: 0.410981 005775/063150, loss: 0.289832, avg_loss: 0.410822 005780/063150, loss: 0.107895, avg_loss: 0.410649 005785/063150, loss: 0.414067, avg_loss: 0.410539 005790/063150, loss: 0.227315, avg_loss: 0.410407 005795/063150, loss: 0.405493, avg_loss: 0.410256 005800/063150, loss: 0.191451, avg_loss: 0.410080 005805/063150, loss: 0.242881, avg_loss: 0.409952 005810/063150, loss: 0.411584, avg_loss: 0.409858 005815/063150, loss: 0.193257, avg_loss: 0.409707 005820/063150, loss: 0.174544, avg_loss: 0.409572 005825/063150, loss: 0.309917, avg_loss: 0.409427 005830/063150, loss: 0.180061, avg_loss: 0.409250 005835/063150, loss: 0.419392, avg_loss: 0.409187 005840/063150, loss: 0.117432, avg_loss: 0.408997 005845/063150, loss: 0.359205, avg_loss: 0.408869 005850/063150, loss: 0.146162, avg_loss: 0.408702 005855/063150, loss: 0.412644, avg_loss: 0.408624 005860/063150, loss: 0.294692, avg_loss: 0.408494 005865/063150, loss: 0.162512, avg_loss: 0.408370 005870/063150, loss: 0.212495, avg_loss: 0.408199 005875/063150, loss: 0.463353, avg_loss: 0.408061 005880/063150, loss: 0.250324, avg_loss: 0.407958 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 5880/63150: {'accuracy': 0.8451834862385321} 005885/063150, loss: 0.373464, avg_loss: 0.407824 005890/063150, loss: 0.064047, avg_loss: 0.407624 005895/063150, loss: 0.139490, avg_loss: 0.407460 005900/063150, loss: 0.152840, avg_loss: 0.407295 005905/063150, loss: 0.198378, avg_loss: 0.407160 005910/063150, loss: 0.096751, avg_loss: 0.407004 005915/063150, loss: 0.259202, avg_loss: 0.406893 005920/063150, loss: 0.214933, avg_loss: 0.406752 005925/063150, loss: 0.333430, avg_loss: 0.406673 005930/063150, loss: 0.246953, avg_loss: 0.406552 005935/063150, loss: 0.163623, avg_loss: 0.406404 005940/063150, loss: 0.180992, avg_loss: 0.406240 005945/063150, loss: 0.252869, avg_loss: 0.406076 005950/063150, loss: 0.334405, avg_loss: 0.405930 005955/063150, loss: 0.406325, avg_loss: 0.405770 005960/063150, loss: 0.252790, avg_loss: 0.405649 005965/063150, loss: 0.133011, avg_loss: 0.405508 005970/063150, loss: 0.165406, avg_loss: 0.405336 005975/063150, loss: 0.296072, avg_loss: 0.405239 005980/063150, loss: 0.252766, avg_loss: 0.405104 005985/063150, loss: 0.248758, avg_loss: 0.404941 005990/063150, loss: 0.183379, avg_loss: 0.404778 005995/063150, loss: 0.332075, avg_loss: 0.404645 006000/063150, loss: 0.190178, avg_loss: 0.404505 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 6000/63150: {'accuracy': 0.8577981651376146} 006005/063150, loss: 0.164176, avg_loss: 0.404329 006010/063150, loss: 0.250037, avg_loss: 0.404126 006015/063150, loss: 0.147910, avg_loss: 0.404039 006020/063150, loss: 0.293327, avg_loss: 0.403912 006025/063150, loss: 0.296977, avg_loss: 0.403782 006030/063150, loss: 0.122955, avg_loss: 0.403677 006035/063150, loss: 0.173654, avg_loss: 0.403527 006040/063150, loss: 0.306564, avg_loss: 0.403507 006045/063150, loss: 0.277148, avg_loss: 0.403381 006050/063150, loss: 0.324062, avg_loss: 0.403264 006055/063150, loss: 0.247942, avg_loss: 0.403152 006060/063150, loss: 0.338937, avg_loss: 0.403021 006065/063150, loss: 0.197391, avg_loss: 0.402836 006070/063150, loss: 0.294998, avg_loss: 0.402756 006075/063150, loss: 0.267503, avg_loss: 0.402596 006080/063150, loss: 0.218786, avg_loss: 0.402422 006085/063150, loss: 0.365590, avg_loss: 0.402392 006090/063150, loss: 0.310873, avg_loss: 0.402275 006095/063150, loss: 0.220314, avg_loss: 0.402125 006100/063150, loss: 0.257281, avg_loss: 0.401961 006105/063150, loss: 0.306239, avg_loss: 0.401818 006110/063150, loss: 0.172037, avg_loss: 0.401694 006115/063150, loss: 0.128923, avg_loss: 0.401562 006120/063150, loss: 0.309872, avg_loss: 0.401441 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 6120/63150: {'accuracy': 0.8440366972477065} 006125/063150, loss: 0.161966, avg_loss: 0.401201 006130/063150, loss: 0.205157, avg_loss: 0.401070 006135/063150, loss: 0.314302, avg_loss: 0.400951 006140/063150, loss: 0.435671, avg_loss: 0.400825 006145/063150, loss: 0.269398, avg_loss: 0.400691 006150/063150, loss: 0.577739, avg_loss: 0.400623 006155/063150, loss: 0.200470, avg_loss: 0.400501 006160/063150, loss: 0.365542, avg_loss: 0.400437 006165/063150, loss: 0.475577, avg_loss: 0.400395 006170/063150, loss: 0.176630, avg_loss: 0.400208 006175/063150, loss: 0.274264, avg_loss: 0.400035 006180/063150, loss: 0.244795, avg_loss: 0.399907 006185/063150, loss: 0.361576, avg_loss: 0.399846 006190/063150, loss: 0.191777, avg_loss: 0.399653 006195/063150, loss: 0.688448, avg_loss: 0.399682 006200/063150, loss: 0.361912, avg_loss: 0.399552 006205/063150, loss: 0.314092, avg_loss: 0.399485 006210/063150, loss: 0.307010, avg_loss: 0.399336 006215/063150, loss: 0.127505, avg_loss: 0.399155 006220/063150, loss: 0.185172, avg_loss: 0.399012 006225/063150, loss: 0.328700, avg_loss: 0.398899 006230/063150, loss: 0.240604, avg_loss: 0.398797 006235/063150, loss: 0.333758, avg_loss: 0.398680 006240/063150, loss: 0.228076, avg_loss: 0.398525 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 2, step 6240/63150: {'accuracy': 0.8520642201834863} 006245/063150, loss: 0.162701, avg_loss: 0.398416 006250/063150, loss: 0.210361, avg_loss: 0.398263 006255/063150, loss: 0.208135, avg_loss: 0.398196 006260/063150, loss: 0.085145, avg_loss: 0.398034 006265/063150, loss: 0.188135, avg_loss: 0.397893 006270/063150, loss: 0.495432, avg_loss: 0.397817 006275/063150, loss: 0.326975, avg_loss: 0.397740 006280/063150, loss: 0.179111, avg_loss: 0.397617 006285/063150, loss: 0.246767, avg_loss: 0.397447 006290/063150, loss: 0.171531, avg_loss: 0.397311 006295/063150, loss: 0.418926, avg_loss: 0.397200 006300/063150, loss: 0.260764, avg_loss: 0.397097 006305/063150, loss: 0.243057, avg_loss: 0.396923 006310/063150, loss: 0.201061, avg_loss: 0.396799 006315/063150, loss: 0.118235, avg_loss: 0.396668 006320/063150, loss: 0.056191, avg_loss: 0.396527 006325/063150, loss: 0.221699, avg_loss: 0.396330 006330/063150, loss: 0.318177, avg_loss: 0.396179 006335/063150, loss: 0.375150, avg_loss: 0.396131 006340/063150, loss: 0.267562, avg_loss: 0.396028 006345/063150, loss: 0.237121, avg_loss: 0.395841 006350/063150, loss: 0.216224, avg_loss: 0.395679 006355/063150, loss: 0.214655, avg_loss: 0.395543 006360/063150, loss: 0.077958, avg_loss: 0.395376 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 6360/63150: {'accuracy': 0.8600917431192661} 006365/063150, loss: 0.326005, avg_loss: 0.395259 006370/063150, loss: 0.341760, avg_loss: 0.395103 006375/063150, loss: 0.220842, avg_loss: 0.394972 006380/063150, loss: 0.145620, avg_loss: 0.394820 006385/063150, loss: 0.249074, avg_loss: 0.394666 006390/063150, loss: 0.275174, avg_loss: 0.394557 006395/063150, loss: 0.315834, avg_loss: 0.394397 006400/063150, loss: 0.279352, avg_loss: 0.394320 006405/063150, loss: 0.128621, avg_loss: 0.394134 006410/063150, loss: 0.124633, avg_loss: 0.394017 006415/063150, loss: 0.106450, avg_loss: 0.393873 006420/063150, loss: 0.171203, avg_loss: 0.393712 006425/063150, loss: 0.282090, avg_loss: 0.393565 006430/063150, loss: 0.530319, avg_loss: 0.393476 006435/063150, loss: 0.118593, avg_loss: 0.393310 006440/063150, loss: 0.232819, avg_loss: 0.393136 006445/063150, loss: 0.038217, avg_loss: 0.393001 006450/063150, loss: 0.043214, avg_loss: 0.392823 006455/063150, loss: 0.226650, avg_loss: 0.392666 006460/063150, loss: 0.179457, avg_loss: 0.392524 006465/063150, loss: 0.193297, avg_loss: 0.392434 006470/063150, loss: 0.154268, avg_loss: 0.392331 006475/063150, loss: 0.241645, avg_loss: 0.392187 006480/063150, loss: 0.245890, avg_loss: 0.392046 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 6480/63150: {'accuracy': 0.8600917431192661} 006485/063150, loss: 0.201324, avg_loss: 0.391934 006490/063150, loss: 0.074267, avg_loss: 0.391731 006495/063150, loss: 0.175214, avg_loss: 0.391588 006500/063150, loss: 0.275347, avg_loss: 0.391462 006505/063150, loss: 0.226976, avg_loss: 0.391336 006510/063150, loss: 0.198437, avg_loss: 0.391155 006515/063150, loss: 0.171466, avg_loss: 0.391018 006520/063150, loss: 0.086039, avg_loss: 0.390852 006525/063150, loss: 0.162816, avg_loss: 0.390692 006530/063150, loss: 0.104518, avg_loss: 0.390587 006535/063150, loss: 0.145672, avg_loss: 0.390452 006540/063150, loss: 0.284854, avg_loss: 0.390359 006545/063150, loss: 0.146214, avg_loss: 0.390216 006550/063150, loss: 0.152664, avg_loss: 0.390071 006555/063150, loss: 0.324599, avg_loss: 0.389960 006560/063150, loss: 0.080889, avg_loss: 0.389772 006565/063150, loss: 0.106667, avg_loss: 0.389588 006570/063150, loss: 0.109226, avg_loss: 0.389452 006575/063150, loss: 0.138286, avg_loss: 0.389298 006580/063150, loss: 0.213736, avg_loss: 0.389130 006585/063150, loss: 0.161279, avg_loss: 0.389022 006590/063150, loss: 0.142323, avg_loss: 0.388865 006595/063150, loss: 0.178648, avg_loss: 0.388785 006600/063150, loss: 0.120253, avg_loss: 0.388600 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 6600/63150: {'accuracy': 0.8474770642201835} 006605/063150, loss: 0.169081, avg_loss: 0.388442 006610/063150, loss: 0.187862, avg_loss: 0.388272 006615/063150, loss: 0.353255, avg_loss: 0.388141 006620/063150, loss: 0.169815, avg_loss: 0.387964 006625/063150, loss: 0.212186, avg_loss: 0.387770 006630/063150, loss: 0.193822, avg_loss: 0.387653 006635/063150, loss: 0.064196, avg_loss: 0.387461 006640/063150, loss: 0.372069, avg_loss: 0.387341 006645/063150, loss: 0.165802, avg_loss: 0.387227 006650/063150, loss: 0.066028, avg_loss: 0.387041 006655/063150, loss: 0.161452, avg_loss: 0.386947 006660/063150, loss: 0.046122, avg_loss: 0.386777 006665/063150, loss: 0.153000, avg_loss: 0.386616 006670/063150, loss: 0.084513, avg_loss: 0.386477 006675/063150, loss: 0.133759, avg_loss: 0.386326 006680/063150, loss: 0.136979, avg_loss: 0.386149 006685/063150, loss: 0.066092, avg_loss: 0.385967 006690/063150, loss: 0.225653, avg_loss: 0.385854 006695/063150, loss: 0.220390, avg_loss: 0.385727 006700/063150, loss: 0.300616, avg_loss: 0.385581 006705/063150, loss: 0.129006, avg_loss: 0.385406 006710/063150, loss: 0.161372, avg_loss: 0.385229 006715/063150, loss: 0.258934, avg_loss: 0.385087 006720/063150, loss: 0.244111, avg_loss: 0.384945 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 6720/63150: {'accuracy': 0.856651376146789} 006725/063150, loss: 0.036864, avg_loss: 0.384814 006730/063150, loss: 0.443971, avg_loss: 0.384680 006735/063150, loss: 0.057744, avg_loss: 0.384503 006740/063150, loss: 0.313349, avg_loss: 0.384390 006745/063150, loss: 0.265652, avg_loss: 0.384227 006750/063150, loss: 0.276788, avg_loss: 0.384119 006755/063150, loss: 0.108872, avg_loss: 0.383934 006760/063150, loss: 0.057640, avg_loss: 0.383787 006765/063150, loss: 0.105560, avg_loss: 0.383644 006770/063150, loss: 0.093812, avg_loss: 0.383464 006775/063150, loss: 0.384844, avg_loss: 0.383349 006780/063150, loss: 0.149802, avg_loss: 0.383238 006785/063150, loss: 0.075268, avg_loss: 0.383104 006790/063150, loss: 0.133740, avg_loss: 0.382931 006795/063150, loss: 0.231310, avg_loss: 0.382738 006800/063150, loss: 0.131188, avg_loss: 0.382557 006805/063150, loss: 0.049656, avg_loss: 0.382455 006810/063150, loss: 0.219370, avg_loss: 0.382292 006815/063150, loss: 0.140352, avg_loss: 0.382165 006820/063150, loss: 0.278672, avg_loss: 0.382028 006825/063150, loss: 0.243699, avg_loss: 0.381963 006830/063150, loss: 0.121104, avg_loss: 0.381795 006835/063150, loss: 0.248670, avg_loss: 0.381691 006840/063150, loss: 0.170353, avg_loss: 0.381565 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 6840/63150: {'accuracy': 0.856651376146789} 006845/063150, loss: 0.065253, avg_loss: 0.381404 006850/063150, loss: 0.199071, avg_loss: 0.381298 006855/063150, loss: 0.311507, avg_loss: 0.381169 006860/063150, loss: 0.148344, avg_loss: 0.381056 006865/063150, loss: 0.229173, avg_loss: 0.380947 006870/063150, loss: 0.164160, avg_loss: 0.380796 006875/063150, loss: 0.204610, avg_loss: 0.380681 006880/063150, loss: 0.256000, avg_loss: 0.380574 006885/063150, loss: 0.200000, avg_loss: 0.380402 006890/063150, loss: 0.464455, avg_loss: 0.380340 006895/063150, loss: 0.164461, avg_loss: 0.380247 006900/063150, loss: 0.221622, avg_loss: 0.380123 006905/063150, loss: 0.136482, avg_loss: 0.379972 006910/063150, loss: 0.122820, avg_loss: 0.379833 006915/063150, loss: 0.266474, avg_loss: 0.379692 006920/063150, loss: 0.173864, avg_loss: 0.379587 006925/063150, loss: 0.168391, avg_loss: 0.379472 006930/063150, loss: 0.115412, avg_loss: 0.379343 006935/063150, loss: 0.157241, avg_loss: 0.379223 006940/063150, loss: 0.296901, avg_loss: 0.379160 006945/063150, loss: 0.209306, avg_loss: 0.379049 006950/063150, loss: 0.244068, avg_loss: 0.378916 006955/063150, loss: 0.093027, avg_loss: 0.378778 006960/063150, loss: 0.159567, avg_loss: 0.378629 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 6960/63150: {'accuracy': 0.8589449541284404} 006965/063150, loss: 0.089753, avg_loss: 0.378511 006970/063150, loss: 0.171301, avg_loss: 0.378378 006975/063150, loss: 0.186785, avg_loss: 0.378223 006980/063150, loss: 0.052185, avg_loss: 0.378052 006985/063150, loss: 0.260831, avg_loss: 0.377963 006990/063150, loss: 0.182956, avg_loss: 0.377833 006995/063150, loss: 0.101096, avg_loss: 0.377710 007000/063150, loss: 0.424678, avg_loss: 0.377606 007005/063150, loss: 0.094043, avg_loss: 0.377463 007010/063150, loss: 0.195814, avg_loss: 0.377289 007015/063150, loss: 0.220344, avg_loss: 0.377167 007020/063150, loss: 0.309708, avg_loss: 0.377044 007025/063150, loss: 0.289091, avg_loss: 0.376925 007030/063150, loss: 0.161398, avg_loss: 0.376872 007035/063150, loss: 0.358189, avg_loss: 0.376792 007040/063150, loss: 0.349344, avg_loss: 0.376711 007045/063150, loss: 0.151659, avg_loss: 0.376567 007050/063150, loss: 0.096523, avg_loss: 0.376454 007055/063150, loss: 0.386761, avg_loss: 0.376361 007060/063150, loss: 0.141258, avg_loss: 0.376228 007065/063150, loss: 0.065083, avg_loss: 0.376047 007070/063150, loss: 0.106668, avg_loss: 0.375880 007075/063150, loss: 0.325593, avg_loss: 0.375737 007080/063150, loss: 0.196084, avg_loss: 0.375609 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 7080/63150: {'accuracy': 0.8497706422018348} 007085/063150, loss: 0.214628, avg_loss: 0.375500 007090/063150, loss: 0.084896, avg_loss: 0.375335 007095/063150, loss: 0.245339, avg_loss: 0.375244 007100/063150, loss: 0.306698, avg_loss: 0.375148 007105/063150, loss: 0.210685, avg_loss: 0.375031 007110/063150, loss: 0.070772, avg_loss: 0.374859 007115/063150, loss: 0.326716, avg_loss: 0.374715 007120/063150, loss: 0.117397, avg_loss: 0.374620 007125/063150, loss: 0.152656, avg_loss: 0.374572 007130/063150, loss: 0.260402, avg_loss: 0.374427 007135/063150, loss: 0.292273, avg_loss: 0.374326 007140/063150, loss: 0.162608, avg_loss: 0.374213 007145/063150, loss: 0.275778, avg_loss: 0.374116 007150/063150, loss: 0.223852, avg_loss: 0.373976 007155/063150, loss: 0.100900, avg_loss: 0.373863 007160/063150, loss: 0.049277, avg_loss: 0.373735 007165/063150, loss: 0.141038, avg_loss: 0.373606 007170/063150, loss: 0.448460, avg_loss: 0.373503 007175/063150, loss: 0.087170, avg_loss: 0.373345 007180/063150, loss: 0.164786, avg_loss: 0.373226 007185/063150, loss: 0.139608, avg_loss: 0.373087 007190/063150, loss: 0.188226, avg_loss: 0.372999 007195/063150, loss: 0.159784, avg_loss: 0.372856 007200/063150, loss: 0.400001, avg_loss: 0.372734 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 7200/63150: {'accuracy': 0.8623853211009175} 007205/063150, loss: 0.205842, avg_loss: 0.372661 007210/063150, loss: 0.371919, avg_loss: 0.372530 007215/063150, loss: 0.170688, avg_loss: 0.372412 007220/063150, loss: 0.118036, avg_loss: 0.372321 007225/063150, loss: 0.167954, avg_loss: 0.372173 007230/063150, loss: 0.239164, avg_loss: 0.372046 007235/063150, loss: 0.053277, avg_loss: 0.371897 007240/063150, loss: 0.304795, avg_loss: 0.371779 007245/063150, loss: 0.115691, avg_loss: 0.371625 007250/063150, loss: 0.077418, avg_loss: 0.371502 007255/063150, loss: 0.289505, avg_loss: 0.371377 007260/063150, loss: 0.593990, avg_loss: 0.371345 007265/063150, loss: 0.067507, avg_loss: 0.371207 007270/063150, loss: 0.400890, avg_loss: 0.371084 007275/063150, loss: 0.374084, avg_loss: 0.371018 007280/063150, loss: 0.174338, avg_loss: 0.370893 007285/063150, loss: 0.221011, avg_loss: 0.370754 007290/063150, loss: 0.219682, avg_loss: 0.370742 007295/063150, loss: 0.246324, avg_loss: 0.370629 007300/063150, loss: 0.142836, avg_loss: 0.370506 007305/063150, loss: 0.145212, avg_loss: 0.370399 007310/063150, loss: 0.207463, avg_loss: 0.370305 007315/063150, loss: 0.081330, avg_loss: 0.370185 007320/063150, loss: 0.143124, avg_loss: 0.370080 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 7320/63150: {'accuracy': 0.8681192660550459} 007325/063150, loss: 0.159430, avg_loss: 0.369978 007330/063150, loss: 0.366371, avg_loss: 0.369850 007335/063150, loss: 0.266461, avg_loss: 0.369731 007340/063150, loss: 0.159506, avg_loss: 0.369570 007345/063150, loss: 0.208134, avg_loss: 0.369405 007350/063150, loss: 0.217419, avg_loss: 0.369265 007355/063150, loss: 0.166781, avg_loss: 0.369124 007360/063150, loss: 0.214333, avg_loss: 0.368970 007365/063150, loss: 0.150210, avg_loss: 0.368803 007370/063150, loss: 0.063424, avg_loss: 0.368624 007375/063150, loss: 0.135044, avg_loss: 0.368499 007380/063150, loss: 0.279078, avg_loss: 0.368433 007385/063150, loss: 0.304123, avg_loss: 0.368338 007390/063150, loss: 0.363484, avg_loss: 0.368269 007395/063150, loss: 0.149212, avg_loss: 0.368144 007400/063150, loss: 0.347053, avg_loss: 0.368109 007405/063150, loss: 0.185472, avg_loss: 0.368013 007410/063150, loss: 0.138774, avg_loss: 0.367915 007415/063150, loss: 0.265823, avg_loss: 0.367792 007420/063150, loss: 0.139445, avg_loss: 0.367688 007425/063150, loss: 0.222034, avg_loss: 0.367562 007430/063150, loss: 0.162439, avg_loss: 0.367416 007435/063150, loss: 0.156976, avg_loss: 0.367278 007440/063150, loss: 0.070721, avg_loss: 0.367158 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 7440/63150: {'accuracy': 0.8589449541284404} 007445/063150, loss: 0.291048, avg_loss: 0.367024 007450/063150, loss: 0.282264, avg_loss: 0.366928 007455/063150, loss: 0.101688, avg_loss: 0.366750 007460/063150, loss: 0.325055, avg_loss: 0.366644 007465/063150, loss: 0.094004, avg_loss: 0.366516 007470/063150, loss: 0.064246, avg_loss: 0.366395 007475/063150, loss: 0.180423, avg_loss: 0.366292 007480/063150, loss: 0.252848, avg_loss: 0.366215 007485/063150, loss: 0.302171, avg_loss: 0.366137 007490/063150, loss: 0.181653, avg_loss: 0.366021 007495/063150, loss: 0.250473, avg_loss: 0.365896 007500/063150, loss: 0.180313, avg_loss: 0.365773 007505/063150, loss: 0.153185, avg_loss: 0.365665 007510/063150, loss: 0.061817, avg_loss: 0.365510 007515/063150, loss: 0.066394, avg_loss: 0.365351 007520/063150, loss: 0.127214, avg_loss: 0.365183 007525/063150, loss: 0.247178, avg_loss: 0.365057 007530/063150, loss: 0.190868, avg_loss: 0.364947 007535/063150, loss: 0.098464, avg_loss: 0.364943 007540/063150, loss: 0.101409, avg_loss: 0.364868 007545/063150, loss: 0.171416, avg_loss: 0.364827 007550/063150, loss: 0.122503, avg_loss: 0.364686 007555/063150, loss: 0.093141, avg_loss: 0.364563 007560/063150, loss: 0.233811, avg_loss: 0.364472 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 7560/63150: {'accuracy': 0.8486238532110092} 007565/063150, loss: 0.387827, avg_loss: 0.364357 007570/063150, loss: 0.340059, avg_loss: 0.364275 007575/063150, loss: 0.172777, avg_loss: 0.364146 007580/063150, loss: 0.249109, avg_loss: 0.364051 007585/063150, loss: 0.066603, avg_loss: 0.363914 007590/063150, loss: 0.091821, avg_loss: 0.363800 007595/063150, loss: 0.472011, avg_loss: 0.363687 007600/063150, loss: 0.096104, avg_loss: 0.363552 007605/063150, loss: 0.304848, avg_loss: 0.363427 007610/063150, loss: 0.032090, avg_loss: 0.363336 007615/063150, loss: 0.299640, avg_loss: 0.363234 007620/063150, loss: 0.192684, avg_loss: 0.363119 007625/063150, loss: 0.090347, avg_loss: 0.363025 007630/063150, loss: 0.168044, avg_loss: 0.362927 007635/063150, loss: 0.183112, avg_loss: 0.362855 007640/063150, loss: 0.176601, avg_loss: 0.362768 007645/063150, loss: 0.134122, avg_loss: 0.362639 007650/063150, loss: 0.295006, avg_loss: 0.362518 007655/063150, loss: 0.342236, avg_loss: 0.362391 007660/063150, loss: 0.068680, avg_loss: 0.362236 007665/063150, loss: 0.176887, avg_loss: 0.362105 007670/063150, loss: 0.211862, avg_loss: 0.361995 007675/063150, loss: 0.236408, avg_loss: 0.361873 007680/063150, loss: 0.267936, avg_loss: 0.361792 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 7680/63150: {'accuracy': 0.8681192660550459} 007685/063150, loss: 0.379450, avg_loss: 0.361709 007690/063150, loss: 0.208368, avg_loss: 0.361575 007695/063150, loss: 0.226497, avg_loss: 0.361504 007700/063150, loss: 0.145290, avg_loss: 0.361415 007705/063150, loss: 0.329823, avg_loss: 0.361326 007710/063150, loss: 0.297184, avg_loss: 0.361214 007715/063150, loss: 0.115789, avg_loss: 0.361070 007720/063150, loss: 0.238246, avg_loss: 0.360940 007725/063150, loss: 0.182013, avg_loss: 0.360804 007730/063150, loss: 0.131330, avg_loss: 0.360726 007735/063150, loss: 0.206145, avg_loss: 0.360629 007740/063150, loss: 0.204966, avg_loss: 0.360515 007745/063150, loss: 0.329338, avg_loss: 0.360433 007750/063150, loss: 0.135681, avg_loss: 0.360353 007755/063150, loss: 0.178485, avg_loss: 0.360234 007760/063150, loss: 0.253994, avg_loss: 0.360109 007765/063150, loss: 0.127027, avg_loss: 0.359984 007770/063150, loss: 0.073020, avg_loss: 0.359853 007775/063150, loss: 0.153367, avg_loss: 0.359712 007780/063150, loss: 0.141749, avg_loss: 0.359581 007785/063150, loss: 0.306362, avg_loss: 0.359478 007790/063150, loss: 0.159246, avg_loss: 0.359375 007795/063150, loss: 0.050078, avg_loss: 0.359285 007800/063150, loss: 0.101789, avg_loss: 0.359178 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 7800/63150: {'accuracy': 0.8635321100917431} 007805/063150, loss: 0.147650, avg_loss: 0.359076 007810/063150, loss: 0.123085, avg_loss: 0.358959 007815/063150, loss: 0.184683, avg_loss: 0.358826 007820/063150, loss: 0.328215, avg_loss: 0.358728 007825/063150, loss: 0.397313, avg_loss: 0.358629 007830/063150, loss: 0.236270, avg_loss: 0.358483 007835/063150, loss: 0.133265, avg_loss: 0.358416 007840/063150, loss: 0.257372, avg_loss: 0.358342 007845/063150, loss: 0.222322, avg_loss: 0.358252 007850/063150, loss: 0.120965, avg_loss: 0.358161 007855/063150, loss: 0.279963, avg_loss: 0.358045 007860/063150, loss: 0.241000, avg_loss: 0.357964 007865/063150, loss: 0.079851, avg_loss: 0.357837 007870/063150, loss: 0.086943, avg_loss: 0.357680 007875/063150, loss: 0.216156, avg_loss: 0.357582 007880/063150, loss: 0.215117, avg_loss: 0.357500 007885/063150, loss: 0.121829, avg_loss: 0.357354 007890/063150, loss: 0.255474, avg_loss: 0.357230 007895/063150, loss: 0.058267, avg_loss: 0.357102 007900/063150, loss: 0.258265, avg_loss: 0.356981 007905/063150, loss: 0.452999, avg_loss: 0.356941 007910/063150, loss: 0.087743, avg_loss: 0.356853 007915/063150, loss: 0.200449, avg_loss: 0.356731 007920/063150, loss: 0.285426, avg_loss: 0.356625 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 7920/63150: {'accuracy': 0.8463302752293578} 007925/063150, loss: 0.171869, avg_loss: 0.356542 007930/063150, loss: 0.108217, avg_loss: 0.356442 007935/063150, loss: 0.183361, avg_loss: 0.356301 007940/063150, loss: 0.265317, avg_loss: 0.356199 007945/063150, loss: 0.290357, avg_loss: 0.356094 007950/063150, loss: 0.365331, avg_loss: 0.356041 007955/063150, loss: 0.257229, avg_loss: 0.355963 007960/063150, loss: 0.144841, avg_loss: 0.355828 007965/063150, loss: 0.315796, avg_loss: 0.355744 007970/063150, loss: 0.263026, avg_loss: 0.355706 007975/063150, loss: 0.215960, avg_loss: 0.355612 007980/063150, loss: 0.192837, avg_loss: 0.355490 007985/063150, loss: 0.241060, avg_loss: 0.355400 007990/063150, loss: 0.288449, avg_loss: 0.355329 007995/063150, loss: 0.121776, avg_loss: 0.355215 008000/063150, loss: 0.276262, avg_loss: 0.355171 008005/063150, loss: 0.250153, avg_loss: 0.355081 008010/063150, loss: 0.215816, avg_loss: 0.354997 008015/063150, loss: 0.131414, avg_loss: 0.354868 008020/063150, loss: 0.038290, avg_loss: 0.354781 008025/063150, loss: 0.213567, avg_loss: 0.354672 008030/063150, loss: 0.156903, avg_loss: 0.354577 008035/063150, loss: 0.171680, avg_loss: 0.354470 008040/063150, loss: 0.119041, avg_loss: 0.354356 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 8040/63150: {'accuracy': 0.856651376146789} 008045/063150, loss: 0.200204, avg_loss: 0.354263 008050/063150, loss: 0.223657, avg_loss: 0.354146 008055/063150, loss: 0.074261, avg_loss: 0.353990 008060/063150, loss: 0.207721, avg_loss: 0.353882 008065/063150, loss: 0.094057, avg_loss: 0.353756 008070/063150, loss: 0.218744, avg_loss: 0.353649 008075/063150, loss: 0.450161, avg_loss: 0.353593 008080/063150, loss: 0.168045, avg_loss: 0.353503 008085/063150, loss: 0.160139, avg_loss: 0.353385 008090/063150, loss: 0.216971, avg_loss: 0.353274 008095/063150, loss: 0.185560, avg_loss: 0.353181 008100/063150, loss: 0.137113, avg_loss: 0.353088 008105/063150, loss: 0.361699, avg_loss: 0.353014 008110/063150, loss: 0.320956, avg_loss: 0.352922 008115/063150, loss: 0.184971, avg_loss: 0.352812 008120/063150, loss: 0.158169, avg_loss: 0.352692 008125/063150, loss: 0.102878, avg_loss: 0.352590 008130/063150, loss: 0.232930, avg_loss: 0.352512 008135/063150, loss: 0.099422, avg_loss: 0.352398 008140/063150, loss: 0.245228, avg_loss: 0.352328 008145/063150, loss: 0.186783, avg_loss: 0.352220 008150/063150, loss: 0.140948, avg_loss: 0.352148 008155/063150, loss: 0.074818, avg_loss: 0.352032 008160/063150, loss: 0.158935, avg_loss: 0.351906 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 8160/63150: {'accuracy': 0.8623853211009175} 008165/063150, loss: 0.275411, avg_loss: 0.351809 008170/063150, loss: 0.254610, avg_loss: 0.351745 008175/063150, loss: 0.151243, avg_loss: 0.351631 008180/063150, loss: 0.325123, avg_loss: 0.351526 008185/063150, loss: 0.155033, avg_loss: 0.351390 008190/063150, loss: 0.236445, avg_loss: 0.351255 008195/063150, loss: 0.240897, avg_loss: 0.351177 008200/063150, loss: 0.151847, avg_loss: 0.351050 008205/063150, loss: 0.046601, avg_loss: 0.350900 008210/063150, loss: 0.400382, avg_loss: 0.350804 008215/063150, loss: 0.203522, avg_loss: 0.350694 008220/063150, loss: 0.119064, avg_loss: 0.350599 008225/063150, loss: 0.146290, avg_loss: 0.350478 008230/063150, loss: 0.119271, avg_loss: 0.350347 008235/063150, loss: 0.184588, avg_loss: 0.350238 008240/063150, loss: 0.120894, avg_loss: 0.350164 008245/063150, loss: 0.367861, avg_loss: 0.350074 008250/063150, loss: 0.212892, avg_loss: 0.349942 008255/063150, loss: 0.300112, avg_loss: 0.349903 008260/063150, loss: 0.178464, avg_loss: 0.349785 008265/063150, loss: 0.083778, avg_loss: 0.349713 008270/063150, loss: 0.279997, avg_loss: 0.349610 008275/063150, loss: 0.151504, avg_loss: 0.349524 008280/063150, loss: 0.347327, avg_loss: 0.349461 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 8280/63150: {'accuracy': 0.8360091743119266} 008285/063150, loss: 0.035563, avg_loss: 0.349342 008290/063150, loss: 0.283749, avg_loss: 0.349258 008295/063150, loss: 0.221612, avg_loss: 0.349166 008300/063150, loss: 0.115435, avg_loss: 0.349053 008305/063150, loss: 0.087390, avg_loss: 0.348955 008310/063150, loss: 0.299136, avg_loss: 0.348848 008315/063150, loss: 0.187080, avg_loss: 0.348738 008320/063150, loss: 0.327208, avg_loss: 0.348686 008325/063150, loss: 0.110752, avg_loss: 0.348574 008330/063150, loss: 0.125973, avg_loss: 0.348480 008335/063150, loss: 0.316209, avg_loss: 0.348443 008340/063150, loss: 0.128039, avg_loss: 0.348354 008345/063150, loss: 0.185242, avg_loss: 0.348262 008350/063150, loss: 0.242111, avg_loss: 0.348189 008355/063150, loss: 0.132835, avg_loss: 0.348116 008360/063150, loss: 0.245911, avg_loss: 0.348020 008365/063150, loss: 0.167025, avg_loss: 0.347888 008370/063150, loss: 0.152295, avg_loss: 0.347808 008375/063150, loss: 0.176811, avg_loss: 0.347728 008380/063150, loss: 0.142865, avg_loss: 0.347633 008385/063150, loss: 0.155957, avg_loss: 0.347538 008390/063150, loss: 0.133290, avg_loss: 0.347445 008395/063150, loss: 0.183931, avg_loss: 0.347336 008400/063150, loss: 0.114982, avg_loss: 0.347229 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 3, step 8400/63150: {'accuracy': 0.8577981651376146} 008405/063150, loss: 0.147639, avg_loss: 0.347163 008410/063150, loss: 0.086868, avg_loss: 0.347044 008415/063150, loss: 0.283607, avg_loss: 0.346969 008420/063150, loss: 0.040499, avg_loss: 0.346836 008425/063150, loss: 0.173972, avg_loss: 0.346727 008430/063150, loss: 0.290508, avg_loss: 0.346612 008435/063150, loss: 0.206648, avg_loss: 0.346473 008440/063150, loss: 0.042936, avg_loss: 0.346341 008445/063150, loss: 0.017667, avg_loss: 0.346210 008450/063150, loss: 0.353954, avg_loss: 0.346095 008455/063150, loss: 0.214215, avg_loss: 0.345992 008460/063150, loss: 0.294760, avg_loss: 0.345875 008465/063150, loss: 0.126303, avg_loss: 0.345781 008470/063150, loss: 0.113326, avg_loss: 0.345660 008475/063150, loss: 0.210505, avg_loss: 0.345569 008480/063150, loss: 0.168250, avg_loss: 0.345483 008485/063150, loss: 0.239621, avg_loss: 0.345375 008490/063150, loss: 0.213532, avg_loss: 0.345257 008495/063150, loss: 0.106317, avg_loss: 0.345151 008500/063150, loss: 0.060797, avg_loss: 0.345046 008505/063150, loss: 0.087356, avg_loss: 0.344930 008510/063150, loss: 0.160134, avg_loss: 0.344825 008515/063150, loss: 0.115659, avg_loss: 0.344714 008520/063150, loss: 0.108328, avg_loss: 0.344600 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 8520/63150: {'accuracy': 0.8577981651376146} 008525/063150, loss: 0.050164, avg_loss: 0.344474 008530/063150, loss: 0.187589, avg_loss: 0.344358 008535/063150, loss: 0.250972, avg_loss: 0.344280 008540/063150, loss: 0.084973, avg_loss: 0.344166 008545/063150, loss: 0.051566, avg_loss: 0.344045 008550/063150, loss: 0.089198, avg_loss: 0.343942 008555/063150, loss: 0.131200, avg_loss: 0.343840 008560/063150, loss: 0.234564, avg_loss: 0.343739 008565/063150, loss: 0.188884, avg_loss: 0.343613 008570/063150, loss: 0.275349, avg_loss: 0.343504 008575/063150, loss: 0.050353, avg_loss: 0.343384 008580/063150, loss: 0.110323, avg_loss: 0.343284 008585/063150, loss: 0.129442, avg_loss: 0.343166 008590/063150, loss: 0.146756, avg_loss: 0.343053 008595/063150, loss: 0.186370, avg_loss: 0.342992 008600/063150, loss: 0.246563, avg_loss: 0.342882 008605/063150, loss: 0.062764, avg_loss: 0.342748 008610/063150, loss: 0.140477, avg_loss: 0.342607 008615/063150, loss: 0.111829, avg_loss: 0.342470 008620/063150, loss: 0.467207, avg_loss: 0.342416 008625/063150, loss: 0.071900, avg_loss: 0.342311 008630/063150, loss: 0.013013, avg_loss: 0.342184 008635/063150, loss: 0.076351, avg_loss: 0.342077 008640/063150, loss: 0.142043, avg_loss: 0.342019 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 8640/63150: {'accuracy': 0.8635321100917431} 008645/063150, loss: 0.172252, avg_loss: 0.341914 008650/063150, loss: 0.129360, avg_loss: 0.341811 008655/063150, loss: 0.192541, avg_loss: 0.341758 008660/063150, loss: 0.098014, avg_loss: 0.341627 008665/063150, loss: 0.251760, avg_loss: 0.341511 008670/063150, loss: 0.070898, avg_loss: 0.341368 008675/063150, loss: 0.107639, avg_loss: 0.341274 008680/063150, loss: 0.283636, avg_loss: 0.341217 008685/063150, loss: 0.185459, avg_loss: 0.341122 008690/063150, loss: 0.280005, avg_loss: 0.341037 008695/063150, loss: 0.153768, avg_loss: 0.340936 008700/063150, loss: 0.093415, avg_loss: 0.340851 008705/063150, loss: 0.155181, avg_loss: 0.340751 008710/063150, loss: 0.302868, avg_loss: 0.340637 008715/063150, loss: 0.105698, avg_loss: 0.340513 008720/063150, loss: 0.065154, avg_loss: 0.340395 008725/063150, loss: 0.115314, avg_loss: 0.340282 008730/063150, loss: 0.182537, avg_loss: 0.340183 008735/063150, loss: 0.157342, avg_loss: 0.340132 008740/063150, loss: 0.366330, avg_loss: 0.340044 008745/063150, loss: 0.275384, avg_loss: 0.339933 008750/063150, loss: 0.207979, avg_loss: 0.339871 008755/063150, loss: 0.096898, avg_loss: 0.339764 008760/063150, loss: 0.088852, avg_loss: 0.339648 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 8760/63150: {'accuracy': 0.8646788990825688} 008765/063150, loss: 0.039967, avg_loss: 0.339504 008770/063150, loss: 0.033317, avg_loss: 0.339377 008775/063150, loss: 0.151095, avg_loss: 0.339251 008780/063150, loss: 0.043551, avg_loss: 0.339143 008785/063150, loss: 0.065352, avg_loss: 0.339048 008790/063150, loss: 0.393764, avg_loss: 0.338969 008795/063150, loss: 0.088041, avg_loss: 0.338866 008800/063150, loss: 0.243336, avg_loss: 0.338827 008805/063150, loss: 0.313326, avg_loss: 0.338732 008810/063150, loss: 0.199689, avg_loss: 0.338640 008815/063150, loss: 0.063370, avg_loss: 0.338503 008820/063150, loss: 0.406386, avg_loss: 0.338435 008825/063150, loss: 0.054040, avg_loss: 0.338328 008830/063150, loss: 0.132369, avg_loss: 0.338201 008835/063150, loss: 0.104322, avg_loss: 0.338061 008840/063150, loss: 0.183163, avg_loss: 0.337956 008845/063150, loss: 0.200131, avg_loss: 0.337822 008850/063150, loss: 0.288721, avg_loss: 0.337746 008855/063150, loss: 0.098022, avg_loss: 0.337621 008860/063150, loss: 0.091157, avg_loss: 0.337541 008865/063150, loss: 0.196395, avg_loss: 0.337457 008870/063150, loss: 0.277938, avg_loss: 0.337379 008875/063150, loss: 0.175176, avg_loss: 0.337301 008880/063150, loss: 0.063588, avg_loss: 0.337179 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 8880/63150: {'accuracy': 0.841743119266055} 008885/063150, loss: 0.231603, avg_loss: 0.337113 008890/063150, loss: 0.131049, avg_loss: 0.337016 008895/063150, loss: 0.226012, avg_loss: 0.336904 008900/063150, loss: 0.282586, avg_loss: 0.336808 008905/063150, loss: 0.044403, avg_loss: 0.336709 008910/063150, loss: 0.086191, avg_loss: 0.336598 008915/063150, loss: 0.114874, avg_loss: 0.336480 008920/063150, loss: 0.143575, avg_loss: 0.336356 008925/063150, loss: 0.186634, avg_loss: 0.336223 008930/063150, loss: 0.210297, avg_loss: 0.336126 008935/063150, loss: 0.136710, avg_loss: 0.335991 008940/063150, loss: 0.086504, avg_loss: 0.335873 008945/063150, loss: 0.116755, avg_loss: 0.335793 008950/063150, loss: 0.096619, avg_loss: 0.335689 008955/063150, loss: 0.318345, avg_loss: 0.335614 008960/063150, loss: 0.179386, avg_loss: 0.335521 008965/063150, loss: 0.184041, avg_loss: 0.335417 008970/063150, loss: 0.111617, avg_loss: 0.335339 008975/063150, loss: 0.229850, avg_loss: 0.335223 008980/063150, loss: 0.045274, avg_loss: 0.335106 008985/063150, loss: 0.053271, avg_loss: 0.334977 008990/063150, loss: 0.097683, avg_loss: 0.334923 008995/063150, loss: 0.057590, avg_loss: 0.334814 009000/063150, loss: 0.051548, avg_loss: 0.334668 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9000/63150: {'accuracy': 0.8509174311926605} 009005/063150, loss: 0.072405, avg_loss: 0.334557 009010/063150, loss: 0.256491, avg_loss: 0.334438 009015/063150, loss: 0.127623, avg_loss: 0.334343 009020/063150, loss: 0.112166, avg_loss: 0.334256 009025/063150, loss: 0.388839, avg_loss: 0.334250 009030/063150, loss: 0.152723, avg_loss: 0.334124 009035/063150, loss: 0.086404, avg_loss: 0.333987 009040/063150, loss: 0.094675, avg_loss: 0.333876 009045/063150, loss: 0.291428, avg_loss: 0.333776 009050/063150, loss: 0.127347, avg_loss: 0.333691 009055/063150, loss: 0.146506, avg_loss: 0.333582 009060/063150, loss: 0.288586, avg_loss: 0.333477 009065/063150, loss: 0.049420, avg_loss: 0.333370 009070/063150, loss: 0.137911, avg_loss: 0.333326 009075/063150, loss: 0.235950, avg_loss: 0.333232 009080/063150, loss: 0.218558, avg_loss: 0.333156 009085/063150, loss: 0.051511, avg_loss: 0.333040 009090/063150, loss: 0.150964, avg_loss: 0.332983 009095/063150, loss: 0.244306, avg_loss: 0.332926 009100/063150, loss: 0.182292, avg_loss: 0.332813 009105/063150, loss: 0.113472, avg_loss: 0.332685 009110/063150, loss: 0.116689, avg_loss: 0.332550 009115/063150, loss: 0.086916, avg_loss: 0.332421 009120/063150, loss: 0.110995, avg_loss: 0.332337 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9120/63150: {'accuracy': 0.8646788990825688} 009125/063150, loss: 0.115181, avg_loss: 0.332256 009130/063150, loss: 0.081418, avg_loss: 0.332164 009135/063150, loss: 0.142157, avg_loss: 0.332047 009140/063150, loss: 0.122091, avg_loss: 0.331966 009145/063150, loss: 0.127238, avg_loss: 0.331915 009150/063150, loss: 0.218985, avg_loss: 0.331838 009155/063150, loss: 0.210979, avg_loss: 0.331741 009160/063150, loss: 0.114556, avg_loss: 0.331630 009165/063150, loss: 0.332905, avg_loss: 0.331561 009170/063150, loss: 0.162467, avg_loss: 0.331473 009175/063150, loss: 0.244894, avg_loss: 0.331355 009180/063150, loss: 0.301605, avg_loss: 0.331267 009185/063150, loss: 0.064814, avg_loss: 0.331177 009190/063150, loss: 0.126476, avg_loss: 0.331073 009195/063150, loss: 0.114781, avg_loss: 0.331011 009200/063150, loss: 0.195647, avg_loss: 0.330949 009205/063150, loss: 0.069879, avg_loss: 0.330830 009210/063150, loss: 0.127997, avg_loss: 0.330740 009215/063150, loss: 0.140868, avg_loss: 0.330648 009220/063150, loss: 0.127070, avg_loss: 0.330551 009225/063150, loss: 0.134876, avg_loss: 0.330465 009230/063150, loss: 0.133600, avg_loss: 0.330359 009235/063150, loss: 0.086346, avg_loss: 0.330253 009240/063150, loss: 0.154050, avg_loss: 0.330127 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9240/63150: {'accuracy': 0.856651376146789} 009245/063150, loss: 0.330200, avg_loss: 0.330061 009250/063150, loss: 0.161128, avg_loss: 0.329989 009255/063150, loss: 0.025313, avg_loss: 0.329887 009260/063150, loss: 0.046100, avg_loss: 0.329787 009265/063150, loss: 0.093881, avg_loss: 0.329675 009270/063150, loss: 0.194773, avg_loss: 0.329576 009275/063150, loss: 0.155308, avg_loss: 0.329494 009280/063150, loss: 0.200168, avg_loss: 0.329395 009285/063150, loss: 0.241702, avg_loss: 0.329300 009290/063150, loss: 0.397542, avg_loss: 0.329242 009295/063150, loss: 0.070779, avg_loss: 0.329115 009300/063150, loss: 0.329174, avg_loss: 0.329066 009305/063150, loss: 0.240871, avg_loss: 0.328965 009310/063150, loss: 0.155293, avg_loss: 0.328869 009315/063150, loss: 0.103249, avg_loss: 0.328774 009320/063150, loss: 0.129871, avg_loss: 0.328678 009325/063150, loss: 0.146412, avg_loss: 0.328575 009330/063150, loss: 0.282041, avg_loss: 0.328496 009335/063150, loss: 0.076443, avg_loss: 0.328371 009340/063150, loss: 0.060455, avg_loss: 0.328275 009345/063150, loss: 0.345695, avg_loss: 0.328202 009350/063150, loss: 0.019107, avg_loss: 0.328093 009355/063150, loss: 0.409689, avg_loss: 0.328034 009360/063150, loss: 0.179435, avg_loss: 0.327944 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9360/63150: {'accuracy': 0.8497706422018348} 009365/063150, loss: 0.131612, avg_loss: 0.327855 009370/063150, loss: 0.409107, avg_loss: 0.327829 009375/063150, loss: 0.175976, avg_loss: 0.327767 009380/063150, loss: 0.035823, avg_loss: 0.327645 009385/063150, loss: 0.078898, avg_loss: 0.327523 009390/063150, loss: 0.330065, avg_loss: 0.327442 009395/063150, loss: 0.100009, avg_loss: 0.327328 009400/063150, loss: 0.131736, avg_loss: 0.327240 009405/063150, loss: 0.340834, avg_loss: 0.327158 009410/063150, loss: 0.119877, avg_loss: 0.327054 009415/063150, loss: 0.188490, avg_loss: 0.326971 009420/063150, loss: 0.249844, avg_loss: 0.326881 009425/063150, loss: 0.054020, avg_loss: 0.326750 009430/063150, loss: 0.056281, avg_loss: 0.326632 009435/063150, loss: 0.115801, avg_loss: 0.326518 009440/063150, loss: 0.201743, avg_loss: 0.326434 009445/063150, loss: 0.161759, avg_loss: 0.326388 009450/063150, loss: 0.403007, avg_loss: 0.326298 009455/063150, loss: 0.317176, avg_loss: 0.326218 009460/063150, loss: 0.130350, avg_loss: 0.326098 009465/063150, loss: 0.132910, avg_loss: 0.326026 009470/063150, loss: 0.209864, avg_loss: 0.325948 009475/063150, loss: 0.256295, avg_loss: 0.325856 009480/063150, loss: 0.110435, avg_loss: 0.325784 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9480/63150: {'accuracy': 0.8600917431192661} 009485/063150, loss: 0.264439, avg_loss: 0.325694 009490/063150, loss: 0.107025, avg_loss: 0.325623 009495/063150, loss: 0.209733, avg_loss: 0.325540 009500/063150, loss: 0.258116, avg_loss: 0.325466 009505/063150, loss: 0.100382, avg_loss: 0.325367 009510/063150, loss: 0.317764, avg_loss: 0.325284 009515/063150, loss: 0.066474, avg_loss: 0.325208 009520/063150, loss: 0.073232, avg_loss: 0.325119 009525/063150, loss: 0.168614, avg_loss: 0.325040 009530/063150, loss: 0.113010, avg_loss: 0.324960 009535/063150, loss: 0.186931, avg_loss: 0.324860 009540/063150, loss: 0.340502, avg_loss: 0.324810 009545/063150, loss: 0.128846, avg_loss: 0.324693 009550/063150, loss: 0.085882, avg_loss: 0.324606 009555/063150, loss: 0.057050, avg_loss: 0.324522 009560/063150, loss: 0.309533, avg_loss: 0.324449 009565/063150, loss: 0.069215, avg_loss: 0.324317 009570/063150, loss: 0.137346, avg_loss: 0.324231 009575/063150, loss: 0.157991, avg_loss: 0.324128 009580/063150, loss: 0.116291, avg_loss: 0.324084 009585/063150, loss: 0.286094, avg_loss: 0.324007 009590/063150, loss: 0.145891, avg_loss: 0.323914 009595/063150, loss: 0.156629, avg_loss: 0.323814 009600/063150, loss: 0.108405, avg_loss: 0.323716 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9600/63150: {'accuracy': 0.8577981651376146} 009605/063150, loss: 0.254737, avg_loss: 0.323630 009610/063150, loss: 0.051464, avg_loss: 0.323535 009615/063150, loss: 0.159589, avg_loss: 0.323416 009620/063150, loss: 0.019581, avg_loss: 0.323297 009625/063150, loss: 0.036223, avg_loss: 0.323190 009630/063150, loss: 0.403626, avg_loss: 0.323095 009635/063150, loss: 0.022795, avg_loss: 0.322986 009640/063150, loss: 0.192581, avg_loss: 0.322888 009645/063150, loss: 0.121482, avg_loss: 0.322799 009650/063150, loss: 0.231140, avg_loss: 0.322690 009655/063150, loss: 0.250417, avg_loss: 0.322617 009660/063150, loss: 0.145530, avg_loss: 0.322524 009665/063150, loss: 0.270132, avg_loss: 0.322447 009670/063150, loss: 0.300896, avg_loss: 0.322375 009675/063150, loss: 0.091067, avg_loss: 0.322294 009680/063150, loss: 0.230012, avg_loss: 0.322266 009685/063150, loss: 0.180229, avg_loss: 0.322192 009690/063150, loss: 0.091564, avg_loss: 0.322111 009695/063150, loss: 0.142479, avg_loss: 0.322034 009700/063150, loss: 0.071125, avg_loss: 0.321986 009705/063150, loss: 0.255455, avg_loss: 0.321945 009710/063150, loss: 0.200662, avg_loss: 0.321898 009715/063150, loss: 0.228818, avg_loss: 0.321867 009720/063150, loss: 0.093207, avg_loss: 0.321789 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9720/63150: {'accuracy': 0.8635321100917431} 009725/063150, loss: 0.236168, avg_loss: 0.321718 009730/063150, loss: 0.290908, avg_loss: 0.321659 009735/063150, loss: 0.068633, avg_loss: 0.321553 009740/063150, loss: 0.196139, avg_loss: 0.321484 009745/063150, loss: 0.180268, avg_loss: 0.321377 009750/063150, loss: 0.263648, avg_loss: 0.321287 009755/063150, loss: 0.124460, avg_loss: 0.321172 009760/063150, loss: 0.177976, avg_loss: 0.321106 009765/063150, loss: 0.086089, avg_loss: 0.321022 009770/063150, loss: 0.318503, avg_loss: 0.320934 009775/063150, loss: 0.158861, avg_loss: 0.320868 009780/063150, loss: 0.091184, avg_loss: 0.320775 009785/063150, loss: 0.153121, avg_loss: 0.320711 009790/063150, loss: 0.142132, avg_loss: 0.320627 009795/063150, loss: 0.104426, avg_loss: 0.320552 009800/063150, loss: 0.205529, avg_loss: 0.320519 009805/063150, loss: 0.369492, avg_loss: 0.320441 009810/063150, loss: 0.119451, avg_loss: 0.320372 009815/063150, loss: 0.187308, avg_loss: 0.320299 009820/063150, loss: 0.203859, avg_loss: 0.320223 009825/063150, loss: 0.090713, avg_loss: 0.320126 009830/063150, loss: 0.208456, avg_loss: 0.320024 009835/063150, loss: 0.188099, avg_loss: 0.319928 009840/063150, loss: 0.093535, avg_loss: 0.319827 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9840/63150: {'accuracy': 0.8635321100917431} 009845/063150, loss: 0.148858, avg_loss: 0.319732 009850/063150, loss: 0.144453, avg_loss: 0.319622 009855/063150, loss: 0.457362, avg_loss: 0.319537 009860/063150, loss: 0.191123, avg_loss: 0.319456 009865/063150, loss: 0.174435, avg_loss: 0.319388 009870/063150, loss: 0.089505, avg_loss: 0.319283 009875/063150, loss: 0.060738, avg_loss: 0.319209 009880/063150, loss: 0.170642, avg_loss: 0.319123 009885/063150, loss: 0.122117, avg_loss: 0.319043 009890/063150, loss: 0.216683, avg_loss: 0.318962 009895/063150, loss: 0.195684, avg_loss: 0.318884 009900/063150, loss: 0.088859, avg_loss: 0.318782 009905/063150, loss: 0.125137, avg_loss: 0.318671 009910/063150, loss: 0.308539, avg_loss: 0.318625 009915/063150, loss: 0.077769, avg_loss: 0.318536 009920/063150, loss: 0.040005, avg_loss: 0.318465 009925/063150, loss: 0.223298, avg_loss: 0.318394 009930/063150, loss: 0.152579, avg_loss: 0.318328 009935/063150, loss: 0.218028, avg_loss: 0.318286 009940/063150, loss: 0.105295, avg_loss: 0.318181 009945/063150, loss: 0.232413, avg_loss: 0.318109 009950/063150, loss: 0.205662, avg_loss: 0.318001 009955/063150, loss: 0.137507, avg_loss: 0.317907 009960/063150, loss: 0.122517, avg_loss: 0.317822 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 9960/63150: {'accuracy': 0.8612385321100917} 009965/063150, loss: 0.192335, avg_loss: 0.317720 009970/063150, loss: 0.090811, avg_loss: 0.317603 009975/063150, loss: 0.228959, avg_loss: 0.317527 009980/063150, loss: 0.056074, avg_loss: 0.317449 009985/063150, loss: 0.045065, avg_loss: 0.317350 009990/063150, loss: 0.094455, avg_loss: 0.317276 009995/063150, loss: 0.301941, avg_loss: 0.317223 010000/063150, loss: 0.189853, avg_loss: 0.317141 010005/063150, loss: 0.178251, avg_loss: 0.317062 010010/063150, loss: 0.093454, avg_loss: 0.316966 010015/063150, loss: 0.127900, avg_loss: 0.316878 010020/063150, loss: 0.175450, avg_loss: 0.316799 010025/063150, loss: 0.043340, avg_loss: 0.316689 010030/063150, loss: 0.264480, avg_loss: 0.316617 010035/063150, loss: 0.348374, avg_loss: 0.316550 010040/063150, loss: 0.096781, avg_loss: 0.316461 010045/063150, loss: 0.116490, avg_loss: 0.316362 010050/063150, loss: 0.302402, avg_loss: 0.316299 010055/063150, loss: 0.070589, avg_loss: 0.316239 010060/063150, loss: 0.075808, avg_loss: 0.316154 010065/063150, loss: 0.093432, avg_loss: 0.316084 010070/063150, loss: 0.253873, avg_loss: 0.315986 010075/063150, loss: 0.135548, avg_loss: 0.315920 010080/063150, loss: 0.072331, avg_loss: 0.315851 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 10080/63150: {'accuracy': 0.8520642201834863} 010085/063150, loss: 0.090528, avg_loss: 0.315767 010090/063150, loss: 0.032806, avg_loss: 0.315690 010095/063150, loss: 0.268787, avg_loss: 0.315619 010100/063150, loss: 0.145613, avg_loss: 0.315566 010105/063150, loss: 0.241086, avg_loss: 0.315461 010110/063150, loss: 0.257949, avg_loss: 0.315409 010115/063150, loss: 0.109835, avg_loss: 0.315305 010120/063150, loss: 0.169312, avg_loss: 0.315238 010125/063150, loss: 0.277694, avg_loss: 0.315159 010130/063150, loss: 0.206227, avg_loss: 0.315092 010135/063150, loss: 0.081554, avg_loss: 0.315028 010140/063150, loss: 0.146481, avg_loss: 0.314942 010145/063150, loss: 0.282927, avg_loss: 0.314873 010150/063150, loss: 0.203866, avg_loss: 0.314798 010155/063150, loss: 0.077397, avg_loss: 0.314710 010160/063150, loss: 0.236177, avg_loss: 0.314683 010165/063150, loss: 0.431229, avg_loss: 0.314615 010170/063150, loss: 0.065212, avg_loss: 0.314548 010175/063150, loss: 0.094072, avg_loss: 0.314497 010180/063150, loss: 0.149413, avg_loss: 0.314412 010185/063150, loss: 0.106497, avg_loss: 0.314334 010190/063150, loss: 0.029558, avg_loss: 0.314216 010195/063150, loss: 0.073058, avg_loss: 0.314132 010200/063150, loss: 0.194715, avg_loss: 0.314035 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 10200/63150: {'accuracy': 0.8600917431192661} 010205/063150, loss: 0.071776, avg_loss: 0.313966 010210/063150, loss: 0.281974, avg_loss: 0.313912 010215/063150, loss: 0.290942, avg_loss: 0.313837 010220/063150, loss: 0.162581, avg_loss: 0.313782 010225/063150, loss: 0.141001, avg_loss: 0.313676 010230/063150, loss: 0.231695, avg_loss: 0.313605 010235/063150, loss: 0.205768, avg_loss: 0.313548 010240/063150, loss: 0.134883, avg_loss: 0.313459 010245/063150, loss: 0.060985, avg_loss: 0.313381 010250/063150, loss: 0.092142, avg_loss: 0.313316 010255/063150, loss: 0.252459, avg_loss: 0.313259 010260/063150, loss: 0.045346, avg_loss: 0.313156 010265/063150, loss: 0.078167, avg_loss: 0.313053 010270/063150, loss: 0.272497, avg_loss: 0.312978 010275/063150, loss: 0.179398, avg_loss: 0.312891 010280/063150, loss: 0.071036, avg_loss: 0.312842 010285/063150, loss: 0.108504, avg_loss: 0.312752 010290/063150, loss: 0.069757, avg_loss: 0.312676 010295/063150, loss: 0.167666, avg_loss: 0.312604 010300/063150, loss: 0.141145, avg_loss: 0.312548 010305/063150, loss: 0.195818, avg_loss: 0.312477 010310/063150, loss: 0.122474, avg_loss: 0.312400 010315/063150, loss: 0.070946, avg_loss: 0.312290 010320/063150, loss: 0.227125, avg_loss: 0.312206 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 10320/63150: {'accuracy': 0.8635321100917431} 010325/063150, loss: 0.091803, avg_loss: 0.312140 010330/063150, loss: 0.057969, avg_loss: 0.312076 010335/063150, loss: 0.215628, avg_loss: 0.312017 010340/063150, loss: 0.147804, avg_loss: 0.311936 010345/063150, loss: 0.085695, avg_loss: 0.311829 010350/063150, loss: 0.170673, avg_loss: 0.311785 010355/063150, loss: 0.056970, avg_loss: 0.311697 010360/063150, loss: 0.156192, avg_loss: 0.311615 010365/063150, loss: 0.077619, avg_loss: 0.311527 010370/063150, loss: 0.240395, avg_loss: 0.311469 010375/063150, loss: 0.038833, avg_loss: 0.311392 010380/063150, loss: 0.045784, avg_loss: 0.311316 010385/063150, loss: 0.192790, avg_loss: 0.311232 010390/063150, loss: 0.241581, avg_loss: 0.311162 010395/063150, loss: 0.180038, avg_loss: 0.311084 010400/063150, loss: 0.111988, avg_loss: 0.311021 010405/063150, loss: 0.072416, avg_loss: 0.310947 010410/063150, loss: 0.034544, avg_loss: 0.310875 010415/063150, loss: 0.167089, avg_loss: 0.310778 010420/063150, loss: 0.080621, avg_loss: 0.310700 010425/063150, loss: 0.168110, avg_loss: 0.310607 010430/063150, loss: 0.125608, avg_loss: 0.310519 010435/063150, loss: 0.176862, avg_loss: 0.310456 010440/063150, loss: 0.169321, avg_loss: 0.310394 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 4, step 10440/63150: {'accuracy': 0.8463302752293578} 010445/063150, loss: 0.307146, avg_loss: 0.310384 010450/063150, loss: 0.174915, avg_loss: 0.310321 010455/063150, loss: 0.106211, avg_loss: 0.310265 010460/063150, loss: 0.125601, avg_loss: 0.310202 010465/063150, loss: 0.098977, avg_loss: 0.310099 010470/063150, loss: 0.055487, avg_loss: 0.310007 010475/063150, loss: 0.120468, avg_loss: 0.309905 010480/063150, loss: 0.061571, avg_loss: 0.309816 010485/063150, loss: 0.106210, avg_loss: 0.309754 010490/063150, loss: 0.380060, avg_loss: 0.309686 010495/063150, loss: 0.108737, avg_loss: 0.309608 010500/063150, loss: 0.047445, avg_loss: 0.309550 010505/063150, loss: 0.083730, avg_loss: 0.309448 010510/063150, loss: 0.166087, avg_loss: 0.309363 010515/063150, loss: 0.146377, avg_loss: 0.309281 010520/063150, loss: 0.128919, avg_loss: 0.309214 010525/063150, loss: 0.166360, avg_loss: 0.309147 010530/063150, loss: 0.118950, avg_loss: 0.309042 010535/063150, loss: 0.178382, avg_loss: 0.308967 010540/063150, loss: 0.143099, avg_loss: 0.308861 010545/063150, loss: 0.148934, avg_loss: 0.308756 010550/063150, loss: 0.030103, avg_loss: 0.308631 010555/063150, loss: 0.124762, avg_loss: 0.308541 010560/063150, loss: 0.010532, avg_loss: 0.308428 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 10560/63150: {'accuracy': 0.8497706422018348} 010565/063150, loss: 0.078442, avg_loss: 0.308340 010570/063150, loss: 0.120963, avg_loss: 0.308291 010575/063150, loss: 0.018122, avg_loss: 0.308182 010580/063150, loss: 0.144638, avg_loss: 0.308092 010585/063150, loss: 0.053745, avg_loss: 0.308007 010590/063150, loss: 0.035331, avg_loss: 0.307905 010595/063150, loss: 0.245777, avg_loss: 0.307830 010600/063150, loss: 0.102280, avg_loss: 0.307758 010605/063150, loss: 0.060599, avg_loss: 0.307657 010610/063150, loss: 0.125457, avg_loss: 0.307572 010615/063150, loss: 0.099095, avg_loss: 0.307493 010620/063150, loss: 0.038345, avg_loss: 0.307397 010625/063150, loss: 0.067861, avg_loss: 0.307301 010630/063150, loss: 0.070490, avg_loss: 0.307201 010635/063150, loss: 0.239503, avg_loss: 0.307135 010640/063150, loss: 0.120326, avg_loss: 0.307051 010645/063150, loss: 0.246785, avg_loss: 0.306971 010650/063150, loss: 0.038675, avg_loss: 0.306892 010655/063150, loss: 0.068527, avg_loss: 0.306790 010660/063150, loss: 0.254869, avg_loss: 0.306723 010665/063150, loss: 0.203750, avg_loss: 0.306620 010670/063150, loss: 0.018846, avg_loss: 0.306519 010675/063150, loss: 0.131322, avg_loss: 0.306427 010680/063150, loss: 0.117686, avg_loss: 0.306348 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 10680/63150: {'accuracy': 0.8474770642201835} 010685/063150, loss: 0.069627, avg_loss: 0.306263 010690/063150, loss: 0.212331, avg_loss: 0.306176 010695/063150, loss: 0.038811, avg_loss: 0.306085 010700/063150, loss: 0.116324, avg_loss: 0.305989 010705/063150, loss: 0.100974, avg_loss: 0.305895 010710/063150, loss: 0.070316, avg_loss: 0.305811 010715/063150, loss: 0.232882, avg_loss: 0.305779 010720/063150, loss: 0.140272, avg_loss: 0.305699 010725/063150, loss: 0.124733, avg_loss: 0.305623 010730/063150, loss: 0.207910, avg_loss: 0.305551 010735/063150, loss: 0.041805, avg_loss: 0.305436 010740/063150, loss: 0.170655, avg_loss: 0.305348 010745/063150, loss: 0.140702, avg_loss: 0.305259 010750/063150, loss: 0.093360, avg_loss: 0.305162 010755/063150, loss: 0.152758, avg_loss: 0.305071 010760/063150, loss: 0.039450, avg_loss: 0.304973 010765/063150, loss: 0.012937, avg_loss: 0.304870 010770/063150, loss: 0.087834, avg_loss: 0.304771 010775/063150, loss: 0.195773, avg_loss: 0.304687 010780/063150, loss: 0.070066, avg_loss: 0.304611 010785/063150, loss: 0.043237, avg_loss: 0.304542 010790/063150, loss: 0.160668, avg_loss: 0.304458 010795/063150, loss: 0.156932, avg_loss: 0.304374 010800/063150, loss: 0.041045, avg_loss: 0.304307 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 10800/63150: {'accuracy': 0.8555045871559633} 010805/063150, loss: 0.050182, avg_loss: 0.304247 010810/063150, loss: 0.138973, avg_loss: 0.304186 010815/063150, loss: 0.191587, avg_loss: 0.304111 010820/063150, loss: 0.189376, avg_loss: 0.304039 010825/063150, loss: 0.055514, avg_loss: 0.303962 010830/063150, loss: 0.045113, avg_loss: 0.303869 010835/063150, loss: 0.059702, avg_loss: 0.303764 010840/063150, loss: 0.103249, avg_loss: 0.303651 010845/063150, loss: 0.029940, avg_loss: 0.303568 010850/063150, loss: 0.358446, avg_loss: 0.303513 010855/063150, loss: 0.072046, avg_loss: 0.303428 010860/063150, loss: 0.024368, avg_loss: 0.303352 010865/063150, loss: 0.070819, avg_loss: 0.303265 010870/063150, loss: 0.040140, avg_loss: 0.303187 010875/063150, loss: 0.106851, avg_loss: 0.303090 010880/063150, loss: 0.059044, avg_loss: 0.303011 010885/063150, loss: 0.144637, avg_loss: 0.302922 010890/063150, loss: 0.142176, avg_loss: 0.302833 010895/063150, loss: 0.272884, avg_loss: 0.302771 010900/063150, loss: 0.101391, avg_loss: 0.302730 010905/063150, loss: 0.035028, avg_loss: 0.302627 010910/063150, loss: 0.109430, avg_loss: 0.302554 010915/063150, loss: 0.307841, avg_loss: 0.302511 010920/063150, loss: 0.062385, avg_loss: 0.302424 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 10920/63150: {'accuracy': 0.8577981651376146} 010925/063150, loss: 0.258084, avg_loss: 0.302359 010930/063150, loss: 0.130835, avg_loss: 0.302289 010935/063150, loss: 0.046937, avg_loss: 0.302228 010940/063150, loss: 0.074056, avg_loss: 0.302142 010945/063150, loss: 0.079976, avg_loss: 0.302065 010950/063150, loss: 0.213374, avg_loss: 0.301996 010955/063150, loss: 0.134519, avg_loss: 0.301914 010960/063150, loss: 0.048815, avg_loss: 0.301816 010965/063150, loss: 0.084379, avg_loss: 0.301738 010970/063150, loss: 0.077946, avg_loss: 0.301674 010975/063150, loss: 0.217925, avg_loss: 0.301624 010980/063150, loss: 0.104104, avg_loss: 0.301537 010985/063150, loss: 0.102159, avg_loss: 0.301463 010990/063150, loss: 0.023048, avg_loss: 0.301380 010995/063150, loss: 0.087407, avg_loss: 0.301307 011000/063150, loss: 0.072717, avg_loss: 0.301241 011005/063150, loss: 0.154431, avg_loss: 0.301172 011010/063150, loss: 0.075919, avg_loss: 0.301110 011015/063150, loss: 0.047905, avg_loss: 0.301024 011020/063150, loss: 0.150243, avg_loss: 0.300958 011025/063150, loss: 0.175037, avg_loss: 0.300911 011030/063150, loss: 0.065675, avg_loss: 0.300827 011035/063150, loss: 0.075467, avg_loss: 0.300758 011040/063150, loss: 0.383090, avg_loss: 0.300684 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 11040/63150: {'accuracy': 0.8635321100917431} 011045/063150, loss: 0.064023, avg_loss: 0.300608 011050/063150, loss: 0.054205, avg_loss: 0.300526 011055/063150, loss: 0.184745, avg_loss: 0.300486 011060/063150, loss: 0.139269, avg_loss: 0.300402 011065/063150, loss: 0.100906, avg_loss: 0.300316 011070/063150, loss: 0.044016, avg_loss: 0.300223 011075/063150, loss: 0.150750, avg_loss: 0.300161 011080/063150, loss: 0.070051, avg_loss: 0.300081 011085/063150, loss: 0.372006, avg_loss: 0.300011 011090/063150, loss: 0.164667, avg_loss: 0.299933 011095/063150, loss: 0.060243, avg_loss: 0.299841 011100/063150, loss: 0.258733, avg_loss: 0.299795 011105/063150, loss: 0.044230, avg_loss: 0.299732 011110/063150, loss: 0.115914, avg_loss: 0.299654 011115/063150, loss: 0.069076, avg_loss: 0.299555 011120/063150, loss: 0.146748, avg_loss: 0.299479 011125/063150, loss: 0.377297, avg_loss: 0.299429 011130/063150, loss: 0.026127, avg_loss: 0.299348 011135/063150, loss: 0.165699, avg_loss: 0.299297 011140/063150, loss: 0.322973, avg_loss: 0.299238 011145/063150, loss: 0.240229, avg_loss: 0.299185 011150/063150, loss: 0.138315, avg_loss: 0.299081 011155/063150, loss: 0.085248, avg_loss: 0.299017 011160/063150, loss: 0.300645, avg_loss: 0.298951 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 11160/63150: {'accuracy': 0.8635321100917431} 011165/063150, loss: 0.180261, avg_loss: 0.298876 011170/063150, loss: 0.037220, avg_loss: 0.298770 011175/063150, loss: 0.111140, avg_loss: 0.298685 011180/063150, loss: 0.163392, avg_loss: 0.298612 011185/063150, loss: 0.162369, avg_loss: 0.298522 011190/063150, loss: 0.286023, avg_loss: 0.298468 011195/063150, loss: 0.031226, avg_loss: 0.298378 011200/063150, loss: 0.050550, avg_loss: 0.298291 011205/063150, loss: 0.062056, avg_loss: 0.298229 011210/063150, loss: 0.255696, avg_loss: 0.298149 011215/063150, loss: 0.011775, avg_loss: 0.298050 011220/063150, loss: 0.172581, avg_loss: 0.297959 011225/063150, loss: 0.124258, avg_loss: 0.297887 011230/063150, loss: 0.146640, avg_loss: 0.297816 011235/063150, loss: 0.091208, avg_loss: 0.297743 011240/063150, loss: 0.154959, avg_loss: 0.297661 011245/063150, loss: 0.210192, avg_loss: 0.297603 011250/063150, loss: 0.091186, avg_loss: 0.297531 011255/063150, loss: 0.062088, avg_loss: 0.297441 011260/063150, loss: 0.024434, avg_loss: 0.297362 011265/063150, loss: 0.065718, avg_loss: 0.297265 011270/063150, loss: 0.094349, avg_loss: 0.297168 011275/063150, loss: 0.088033, avg_loss: 0.297088 011280/063150, loss: 0.156095, avg_loss: 0.296992 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 11280/63150: {'accuracy': 0.8577981651376146} 011285/063150, loss: 0.111182, avg_loss: 0.296924 011290/063150, loss: 0.055021, avg_loss: 0.296857 011295/063150, loss: 0.136403, avg_loss: 0.296798 011300/063150, loss: 0.049305, avg_loss: 0.296700 011305/063150, loss: 0.265470, avg_loss: 0.296633 011310/063150, loss: 0.363443, avg_loss: 0.296573 011315/063150, loss: 0.229934, avg_loss: 0.296525 011320/063150, loss: 0.104641, avg_loss: 0.296435 011325/063150, loss: 0.132260, avg_loss: 0.296342 011330/063150, loss: 0.084565, avg_loss: 0.296259 011335/063150, loss: 0.152346, avg_loss: 0.296201 011340/063150, loss: 0.083349, avg_loss: 0.296136 011345/063150, loss: 0.205813, avg_loss: 0.296079 011350/063150, loss: 0.057890, avg_loss: 0.295998 011355/063150, loss: 0.139605, avg_loss: 0.295903 011360/063150, loss: 0.213980, avg_loss: 0.295835 011365/063150, loss: 0.215851, avg_loss: 0.295757 011370/063150, loss: 0.028619, avg_loss: 0.295660 011375/063150, loss: 0.133684, avg_loss: 0.295582 011380/063150, loss: 0.131321, avg_loss: 0.295491 011385/063150, loss: 0.226065, avg_loss: 0.295455 011390/063150, loss: 0.080510, avg_loss: 0.295374 011395/063150, loss: 0.098390, avg_loss: 0.295291 011400/063150, loss: 0.044907, avg_loss: 0.295197 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 11400/63150: {'accuracy': 0.8394495412844036} 011405/063150, loss: 0.058795, avg_loss: 0.295125 011410/063150, loss: 0.044944, avg_loss: 0.295026 011415/063150, loss: 0.065715, avg_loss: 0.294930 011420/063150, loss: 0.200236, avg_loss: 0.294863 011425/063150, loss: 0.121049, avg_loss: 0.294801 011430/063150, loss: 0.146016, avg_loss: 0.294722 011435/063150, loss: 0.030630, avg_loss: 0.294655 011440/063150, loss: 0.038620, avg_loss: 0.294554 011445/063150, loss: 0.036397, avg_loss: 0.294467 011450/063150, loss: 0.078964, avg_loss: 0.294375 011455/063150, loss: 0.173694, avg_loss: 0.294289 011460/063150, loss: 0.167719, avg_loss: 0.294198 011465/063150, loss: 0.218025, avg_loss: 0.294115 011470/063150, loss: 0.064786, avg_loss: 0.294022 011475/063150, loss: 0.174319, avg_loss: 0.293966 011480/063150, loss: 0.360695, avg_loss: 0.293900 011485/063150, loss: 0.176150, avg_loss: 0.293822 011490/063150, loss: 0.103830, avg_loss: 0.293749 011495/063150, loss: 0.074414, avg_loss: 0.293685 011500/063150, loss: 0.047651, avg_loss: 0.293580 011505/063150, loss: 0.064202, avg_loss: 0.293524 011510/063150, loss: 0.076031, avg_loss: 0.293455 011515/063150, loss: 0.205897, avg_loss: 0.293372 011520/063150, loss: 0.054321, avg_loss: 0.293290 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 11520/63150: {'accuracy': 0.8405963302752294} 011525/063150, loss: 0.087383, avg_loss: 0.293240 011530/063150, loss: 0.235280, avg_loss: 0.293222 011535/063150, loss: 0.204239, avg_loss: 0.293153 011540/063150, loss: 0.150483, avg_loss: 0.293101 011545/063150, loss: 0.068807, avg_loss: 0.293034 011550/063150, loss: 0.234452, avg_loss: 0.292963 011555/063150, loss: 0.033685, avg_loss: 0.292882 011560/063150, loss: 0.298000, avg_loss: 0.292804 011565/063150, loss: 0.259819, avg_loss: 0.292732 011570/063150, loss: 0.068553, avg_loss: 0.292664 011575/063150, loss: 0.147556, avg_loss: 0.292595 011580/063150, loss: 0.108045, avg_loss: 0.292555 011585/063150, loss: 0.173416, avg_loss: 0.292507 011590/063150, loss: 0.096568, avg_loss: 0.292415 011595/063150, loss: 0.397567, avg_loss: 0.292399 011600/063150, loss: 0.129087, avg_loss: 0.292319 011605/063150, loss: 0.093456, avg_loss: 0.292240 011610/063150, loss: 0.158300, avg_loss: 0.292181 011615/063150, loss: 0.100482, avg_loss: 0.292100 011620/063150, loss: 0.194647, avg_loss: 0.292030 011625/063150, loss: 0.199176, avg_loss: 0.291964 011630/063150, loss: 0.022553, avg_loss: 0.291895 011635/063150, loss: 0.120800, avg_loss: 0.291820 011640/063150, loss: 0.165404, avg_loss: 0.291768 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 11640/63150: {'accuracy': 0.838302752293578} 011645/063150, loss: 0.062540, avg_loss: 0.291691 011650/063150, loss: 0.248271, avg_loss: 0.291626 011655/063150, loss: 0.133732, avg_loss: 0.291545 011660/063150, loss: 0.074972, avg_loss: 0.291487 011665/063150, loss: 0.217266, avg_loss: 0.291420 011670/063150, loss: 0.042485, avg_loss: 0.291348 011675/063150, loss: 0.045135, avg_loss: 0.291263 011680/063150, loss: 0.046080, avg_loss: 0.291198 011685/063150, loss: 0.064821, avg_loss: 0.291091 011690/063150, loss: 0.033320, avg_loss: 0.291017 011695/063150, loss: 0.111138, avg_loss: 0.290942 011700/063150, loss: 0.197394, avg_loss: 0.290858 011705/063150, loss: 0.256846, avg_loss: 0.290788 011710/063150, loss: 0.063811, avg_loss: 0.290721 011715/063150, loss: 0.067020, avg_loss: 0.290629 011720/063150, loss: 0.357743, avg_loss: 0.290568 011725/063150, loss: 0.231975, avg_loss: 0.290528 011730/063150, loss: 0.041162, avg_loss: 0.290438 011735/063150, loss: 0.023695, avg_loss: 0.290366 011740/063150, loss: 0.084688, avg_loss: 0.290321 011745/063150, loss: 0.473796, avg_loss: 0.290274 011750/063150, loss: 0.114563, avg_loss: 0.290184 011755/063150, loss: 0.187628, avg_loss: 0.290132 011760/063150, loss: 0.333238, avg_loss: 0.290077 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 11760/63150: {'accuracy': 0.8532110091743119} 011765/063150, loss: 0.255383, avg_loss: 0.290005 011770/063150, loss: 0.210848, avg_loss: 0.289965 011775/063150, loss: 0.138689, avg_loss: 0.289895 011780/063150, loss: 0.023046, avg_loss: 0.289823 011785/063150, loss: 0.092461, avg_loss: 0.289749 011790/063150, loss: 0.161388, avg_loss: 0.289696 011795/063150, loss: 0.099379, avg_loss: 0.289604 011800/063150, loss: 0.223064, avg_loss: 0.289555 011805/063150, loss: 0.267299, avg_loss: 0.289505 011810/063150, loss: 0.140722, avg_loss: 0.289426 011815/063150, loss: 0.022494, avg_loss: 0.289328 011820/063150, loss: 0.037473, avg_loss: 0.289279 011825/063150, loss: 0.196594, avg_loss: 0.289243 011830/063150, loss: 0.055268, avg_loss: 0.289167 011835/063150, loss: 0.086641, avg_loss: 0.289107 011840/063150, loss: 0.126851, avg_loss: 0.289030 011845/063150, loss: 0.225183, avg_loss: 0.288967 011850/063150, loss: 0.091154, avg_loss: 0.288902 011855/063150, loss: 0.087773, avg_loss: 0.288823 011860/063150, loss: 0.224470, avg_loss: 0.288785 011865/063150, loss: 0.313331, avg_loss: 0.288740 011870/063150, loss: 0.060008, avg_loss: 0.288671 011875/063150, loss: 0.137095, avg_loss: 0.288643 011880/063150, loss: 0.179460, avg_loss: 0.288602 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 11880/63150: {'accuracy': 0.8497706422018348} 011885/063150, loss: 0.128724, avg_loss: 0.288542 011890/063150, loss: 0.190379, avg_loss: 0.288480 011895/063150, loss: 0.119125, avg_loss: 0.288404 011900/063150, loss: 0.054045, avg_loss: 0.288328 011905/063150, loss: 0.158816, avg_loss: 0.288256 011910/063150, loss: 0.148271, avg_loss: 0.288181 011915/063150, loss: 0.103456, avg_loss: 0.288093 011920/063150, loss: 0.050163, avg_loss: 0.288010 011925/063150, loss: 0.153077, avg_loss: 0.287933 011930/063150, loss: 0.209436, avg_loss: 0.287858 011935/063150, loss: 0.035930, avg_loss: 0.287785 011940/063150, loss: 0.097476, avg_loss: 0.287694 011945/063150, loss: 0.022517, avg_loss: 0.287638 011950/063150, loss: 0.187662, avg_loss: 0.287555 011955/063150, loss: 0.050620, avg_loss: 0.287485 011960/063150, loss: 0.044364, avg_loss: 0.287425 011965/063150, loss: 0.169347, avg_loss: 0.287346 011970/063150, loss: 0.094964, avg_loss: 0.287281 011975/063150, loss: 0.082400, avg_loss: 0.287202 011980/063150, loss: 0.083242, avg_loss: 0.287139 011985/063150, loss: 0.061428, avg_loss: 0.287053 011990/063150, loss: 0.377398, avg_loss: 0.287008 011995/063150, loss: 0.114585, avg_loss: 0.286940 012000/063150, loss: 0.155495, avg_loss: 0.286871 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 12000/63150: {'accuracy': 0.8532110091743119} 012005/063150, loss: 0.078167, avg_loss: 0.286791 012010/063150, loss: 0.203096, avg_loss: 0.286753 012015/063150, loss: 0.183604, avg_loss: 0.286713 012020/063150, loss: 0.240165, avg_loss: 0.286683 012025/063150, loss: 0.145528, avg_loss: 0.286635 012030/063150, loss: 0.192909, avg_loss: 0.286586 012035/063150, loss: 0.094191, avg_loss: 0.286521 012040/063150, loss: 0.110136, avg_loss: 0.286466 012045/063150, loss: 0.066443, avg_loss: 0.286398 012050/063150, loss: 0.298872, avg_loss: 0.286342 012055/063150, loss: 0.148360, avg_loss: 0.286268 012060/063150, loss: 0.071920, avg_loss: 0.286208 012065/063150, loss: 0.049196, avg_loss: 0.286142 012070/063150, loss: 0.023247, avg_loss: 0.286057 012075/063150, loss: 0.195157, avg_loss: 0.285993 012080/063150, loss: 0.060874, avg_loss: 0.285932 012085/063150, loss: 0.054214, avg_loss: 0.285873 012090/063150, loss: 0.136288, avg_loss: 0.285814 012095/063150, loss: 0.162098, avg_loss: 0.285760 012100/063150, loss: 0.050607, avg_loss: 0.285707 012105/063150, loss: 0.051216, avg_loss: 0.285658 012110/063150, loss: 0.174317, avg_loss: 0.285592 012115/063150, loss: 0.308638, avg_loss: 0.285543 012120/063150, loss: 0.122276, avg_loss: 0.285468 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 12120/63150: {'accuracy': 0.8440366972477065} 012125/063150, loss: 0.187330, avg_loss: 0.285401 012130/063150, loss: 0.312589, avg_loss: 0.285346 012135/063150, loss: 0.103478, avg_loss: 0.285301 012140/063150, loss: 0.013447, avg_loss: 0.285216 012145/063150, loss: 0.138980, avg_loss: 0.285140 012150/063150, loss: 0.041558, avg_loss: 0.285058 012155/063150, loss: 0.225342, avg_loss: 0.285002 012160/063150, loss: 0.129627, avg_loss: 0.284916 012165/063150, loss: 0.030539, avg_loss: 0.284856 012170/063150, loss: 0.014549, avg_loss: 0.284765 012175/063150, loss: 0.388364, avg_loss: 0.284729 012180/063150, loss: 0.199386, avg_loss: 0.284668 012185/063150, loss: 0.270246, avg_loss: 0.284626 012190/063150, loss: 0.239315, avg_loss: 0.284565 012195/063150, loss: 0.174959, avg_loss: 0.284557 012200/063150, loss: 0.155529, avg_loss: 0.284499 012205/063150, loss: 0.221781, avg_loss: 0.284444 012210/063150, loss: 0.046283, avg_loss: 0.284390 012215/063150, loss: 0.099540, avg_loss: 0.284325 012220/063150, loss: 0.016643, avg_loss: 0.284268 012225/063150, loss: 0.040506, avg_loss: 0.284214 012230/063150, loss: 0.018380, avg_loss: 0.284139 012235/063150, loss: 0.071935, avg_loss: 0.284054 012240/063150, loss: 0.067944, avg_loss: 0.283995 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 12240/63150: {'accuracy': 0.8497706422018348} 012245/063150, loss: 0.019915, avg_loss: 0.283949 012250/063150, loss: 0.309860, avg_loss: 0.283886 012255/063150, loss: 0.213824, avg_loss: 0.283836 012260/063150, loss: 0.081780, avg_loss: 0.283773 012265/063150, loss: 0.170630, avg_loss: 0.283700 012270/063150, loss: 0.024879, avg_loss: 0.283634 012275/063150, loss: 0.276173, avg_loss: 0.283585 012280/063150, loss: 0.141190, avg_loss: 0.283528 012285/063150, loss: 0.172848, avg_loss: 0.283464 012290/063150, loss: 0.141190, avg_loss: 0.283390 012295/063150, loss: 0.207016, avg_loss: 0.283330 012300/063150, loss: 0.097402, avg_loss: 0.283271 012305/063150, loss: 0.136080, avg_loss: 0.283214 012310/063150, loss: 0.199594, avg_loss: 0.283155 012315/063150, loss: 0.288813, avg_loss: 0.283093 012320/063150, loss: 0.135031, avg_loss: 0.283027 012325/063150, loss: 0.084555, avg_loss: 0.282954 012330/063150, loss: 0.117300, avg_loss: 0.282885 012335/063150, loss: 0.312335, avg_loss: 0.282843 012340/063150, loss: 0.167584, avg_loss: 0.282768 012345/063150, loss: 0.078126, avg_loss: 0.282698 012350/063150, loss: 0.087348, avg_loss: 0.282625 012355/063150, loss: 0.209596, avg_loss: 0.282554 012360/063150, loss: 0.101469, avg_loss: 0.282492 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 12360/63150: {'accuracy': 0.8405963302752294} 012365/063150, loss: 0.159936, avg_loss: 0.282443 012370/063150, loss: 0.022752, avg_loss: 0.282420 012375/063150, loss: 0.228275, avg_loss: 0.282351 012380/063150, loss: 0.123931, avg_loss: 0.282297 012385/063150, loss: 0.026388, avg_loss: 0.282220 012390/063150, loss: 0.088277, avg_loss: 0.282133 012395/063150, loss: 0.128585, avg_loss: 0.282073 012400/063150, loss: 0.141453, avg_loss: 0.281997 012405/063150, loss: 0.101637, avg_loss: 0.281938 012410/063150, loss: 0.055244, avg_loss: 0.281858 012415/063150, loss: 0.033181, avg_loss: 0.281779 012420/063150, loss: 0.048105, avg_loss: 0.281723 012425/063150, loss: 0.059283, avg_loss: 0.281687 012430/063150, loss: 0.013903, avg_loss: 0.281627 012435/063150, loss: 0.211168, avg_loss: 0.281573 012440/063150, loss: 0.084332, avg_loss: 0.281500 012445/063150, loss: 0.289767, avg_loss: 0.281452 012450/063150, loss: 0.188534, avg_loss: 0.281404 012455/063150, loss: 0.251635, avg_loss: 0.281345 012460/063150, loss: 0.161930, avg_loss: 0.281305 012465/063150, loss: 0.335953, avg_loss: 0.281260 012470/063150, loss: 0.208025, avg_loss: 0.281224 012475/063150, loss: 0.053871, avg_loss: 0.281155 012480/063150, loss: 0.125879, avg_loss: 0.281101 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 12480/63150: {'accuracy': 0.8463302752293578} 012485/063150, loss: 0.050669, avg_loss: 0.281058 012490/063150, loss: 0.255816, avg_loss: 0.281012 012495/063150, loss: 0.018033, avg_loss: 0.280947 012500/063150, loss: 0.237609, avg_loss: 0.280892 012505/063150, loss: 0.047795, avg_loss: 0.280828 012510/063150, loss: 0.197351, avg_loss: 0.280756 012515/063150, loss: 0.105237, avg_loss: 0.280693 012520/063150, loss: 0.144539, avg_loss: 0.280629 012525/063150, loss: 0.073976, avg_loss: 0.280562 012530/063150, loss: 0.467036, avg_loss: 0.280512 012535/063150, loss: 0.138596, avg_loss: 0.280434 012540/063150, loss: 0.130581, avg_loss: 0.280370 012545/063150, loss: 0.164143, avg_loss: 0.280315 012550/063150, loss: 0.230616, avg_loss: 0.280273 012555/063150, loss: 0.246401, avg_loss: 0.280253 012560/063150, loss: 0.105198, avg_loss: 0.280194 012565/063150, loss: 0.270347, avg_loss: 0.280141 012570/063150, loss: 0.256924, avg_loss: 0.280090 012575/063150, loss: 0.047514, avg_loss: 0.280016 012580/063150, loss: 0.056171, avg_loss: 0.279965 012585/063150, loss: 0.143267, avg_loss: 0.279906 012590/063150, loss: 0.043212, avg_loss: 0.279839 012595/063150, loss: 0.019226, avg_loss: 0.279764 012600/063150, loss: 0.270574, avg_loss: 0.279695 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 5, step 12600/63150: {'accuracy': 0.8589449541284404} 012605/063150, loss: 0.017577, avg_loss: 0.279607 012610/063150, loss: 0.155197, avg_loss: 0.279541 012615/063150, loss: 0.046276, avg_loss: 0.279450 012620/063150, loss: 0.070490, avg_loss: 0.279386 012625/063150, loss: 0.265416, avg_loss: 0.279346 012630/063150, loss: 0.024924, avg_loss: 0.279274 012635/063150, loss: 0.093318, avg_loss: 0.279199 012640/063150, loss: 0.088615, avg_loss: 0.279127 012645/063150, loss: 0.181436, avg_loss: 0.279061 012650/063150, loss: 0.059107, avg_loss: 0.278987 012655/063150, loss: 0.160050, avg_loss: 0.278910 012660/063150, loss: 0.097640, avg_loss: 0.278837 012665/063150, loss: 0.118547, avg_loss: 0.278769 012670/063150, loss: 0.048246, avg_loss: 0.278693 012675/063150, loss: 0.041198, avg_loss: 0.278613 012680/063150, loss: 0.119466, avg_loss: 0.278527 012685/063150, loss: 0.173420, avg_loss: 0.278456 012690/063150, loss: 0.088104, avg_loss: 0.278417 012695/063150, loss: 0.065836, avg_loss: 0.278326 012700/063150, loss: 0.084069, avg_loss: 0.278263 012705/063150, loss: 0.096931, avg_loss: 0.278184 012710/063150, loss: 0.110463, avg_loss: 0.278117 012715/063150, loss: 0.021911, avg_loss: 0.278062 012720/063150, loss: 0.060979, avg_loss: 0.278011 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 12720/63150: {'accuracy': 0.8543577981651376} 012725/063150, loss: 0.039972, avg_loss: 0.277944 012730/063150, loss: 0.089546, avg_loss: 0.277874 012735/063150, loss: 0.104067, avg_loss: 0.277801 012740/063150, loss: 0.123020, avg_loss: 0.277766 012745/063150, loss: 0.454353, avg_loss: 0.277713 012750/063150, loss: 0.128797, avg_loss: 0.277638 012755/063150, loss: 0.083241, avg_loss: 0.277574 012760/063150, loss: 0.147388, avg_loss: 0.277509 012765/063150, loss: 0.126672, avg_loss: 0.277428 012770/063150, loss: 0.089539, avg_loss: 0.277366 012775/063150, loss: 0.061422, avg_loss: 0.277287 012780/063150, loss: 0.047593, avg_loss: 0.277216 012785/063150, loss: 0.111838, avg_loss: 0.277134 012790/063150, loss: 0.183344, avg_loss: 0.277058 012795/063150, loss: 0.107863, avg_loss: 0.277014 012800/063150, loss: 0.093615, avg_loss: 0.276946 012805/063150, loss: 0.089627, avg_loss: 0.276874 012810/063150, loss: 0.012900, avg_loss: 0.276793 012815/063150, loss: 0.194428, avg_loss: 0.276725 012820/063150, loss: 0.180725, avg_loss: 0.276672 012825/063150, loss: 0.124272, avg_loss: 0.276606 012830/063150, loss: 0.028520, avg_loss: 0.276532 012835/063150, loss: 0.120147, avg_loss: 0.276459 012840/063150, loss: 0.047961, avg_loss: 0.276381 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 12840/63150: {'accuracy': 0.8360091743119266} 012845/063150, loss: 0.298571, avg_loss: 0.276322 012850/063150, loss: 0.115047, avg_loss: 0.276253 012855/063150, loss: 0.118174, avg_loss: 0.276199 012860/063150, loss: 0.107821, avg_loss: 0.276123 012865/063150, loss: 0.034822, avg_loss: 0.276055 012870/063150, loss: 0.042447, avg_loss: 0.275995 012875/063150, loss: 0.201824, avg_loss: 0.275930 012880/063150, loss: 0.109776, avg_loss: 0.275851 012885/063150, loss: 0.105178, avg_loss: 0.275799 012890/063150, loss: 0.035115, avg_loss: 0.275728 012895/063150, loss: 0.122633, avg_loss: 0.275669 012900/063150, loss: 0.056667, avg_loss: 0.275593 012905/063150, loss: 0.117544, avg_loss: 0.275527 012910/063150, loss: 0.061899, avg_loss: 0.275480 012915/063150, loss: 0.106639, avg_loss: 0.275413 012920/063150, loss: 0.121677, avg_loss: 0.275346 012925/063150, loss: 0.019810, avg_loss: 0.275274 012930/063150, loss: 0.202127, avg_loss: 0.275209 012935/063150, loss: 0.173433, avg_loss: 0.275154 012940/063150, loss: 0.221774, avg_loss: 0.275096 012945/063150, loss: 0.280484, avg_loss: 0.275045 012950/063150, loss: 0.130884, avg_loss: 0.274974 012955/063150, loss: 0.271203, avg_loss: 0.274916 012960/063150, loss: 0.063081, avg_loss: 0.274861 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 12960/63150: {'accuracy': 0.8371559633027523} 012965/063150, loss: 0.062341, avg_loss: 0.274783 012970/063150, loss: 0.109526, avg_loss: 0.274700 012975/063150, loss: 0.032035, avg_loss: 0.274647 012980/063150, loss: 0.151826, avg_loss: 0.274578 012985/063150, loss: 0.232828, avg_loss: 0.274515 012990/063150, loss: 0.082368, avg_loss: 0.274460 012995/063150, loss: 0.114868, avg_loss: 0.274422 013000/063150, loss: 0.152240, avg_loss: 0.274367 013005/063150, loss: 0.114849, avg_loss: 0.274305 013010/063150, loss: 0.269775, avg_loss: 0.274245 013015/063150, loss: 0.056767, avg_loss: 0.274164 013020/063150, loss: 0.064866, avg_loss: 0.274128 013025/063150, loss: 0.069140, avg_loss: 0.274061 013030/063150, loss: 0.033224, avg_loss: 0.273980 013035/063150, loss: 0.114303, avg_loss: 0.273930 013040/063150, loss: 0.124206, avg_loss: 0.273858 013045/063150, loss: 0.098769, avg_loss: 0.273780 013050/063150, loss: 0.160771, avg_loss: 0.273715 013055/063150, loss: 0.111488, avg_loss: 0.273662 013060/063150, loss: 0.128473, avg_loss: 0.273587 013065/063150, loss: 0.073546, avg_loss: 0.273526 013070/063150, loss: 0.112468, avg_loss: 0.273459 013075/063150, loss: 0.185894, avg_loss: 0.273410 013080/063150, loss: 0.295514, avg_loss: 0.273359 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 13080/63150: {'accuracy': 0.8486238532110092} 013085/063150, loss: 0.022749, avg_loss: 0.273272 013090/063150, loss: 0.093160, avg_loss: 0.273207 013095/063150, loss: 0.085173, avg_loss: 0.273130 013100/063150, loss: 0.063070, avg_loss: 0.273056 013105/063150, loss: 0.037746, avg_loss: 0.272982 013110/063150, loss: 0.028997, avg_loss: 0.272902 013115/063150, loss: 0.108188, avg_loss: 0.272864 013120/063150, loss: 0.073325, avg_loss: 0.272808 013125/063150, loss: 0.061102, avg_loss: 0.272766 013130/063150, loss: 0.113964, avg_loss: 0.272722 013135/063150, loss: 0.121411, avg_loss: 0.272648 013140/063150, loss: 0.122367, avg_loss: 0.272577 013145/063150, loss: 0.118462, avg_loss: 0.272531 013150/063150, loss: 0.048148, avg_loss: 0.272448 013155/063150, loss: 0.057264, avg_loss: 0.272376 013160/063150, loss: 0.056291, avg_loss: 0.272317 013165/063150, loss: 0.092015, avg_loss: 0.272259 013170/063150, loss: 0.162923, avg_loss: 0.272207 013175/063150, loss: 0.088421, avg_loss: 0.272124 013180/063150, loss: 0.100824, avg_loss: 0.272072 013185/063150, loss: 0.198496, avg_loss: 0.272020 013190/063150, loss: 0.045203, avg_loss: 0.271968 013195/063150, loss: 0.069659, avg_loss: 0.271895 013200/063150, loss: 0.087967, avg_loss: 0.271830 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 13200/63150: {'accuracy': 0.8543577981651376} 013205/063150, loss: 0.062170, avg_loss: 0.271765 013210/063150, loss: 0.093041, avg_loss: 0.271720 013215/063150, loss: 0.043924, avg_loss: 0.271663 013220/063150, loss: 0.125447, avg_loss: 0.271591 013225/063150, loss: 0.258061, avg_loss: 0.271541 013230/063150, loss: 0.169371, avg_loss: 0.271493 013235/063150, loss: 0.141665, avg_loss: 0.271423 013240/063150, loss: 0.110977, avg_loss: 0.271340 013245/063150, loss: 0.113201, avg_loss: 0.271263 013250/063150, loss: 0.068434, avg_loss: 0.271223 013255/063150, loss: 0.249632, avg_loss: 0.271161 013260/063150, loss: 0.080187, avg_loss: 0.271095 013265/063150, loss: 0.046250, avg_loss: 0.271020 013270/063150, loss: 0.035164, avg_loss: 0.270960 013275/063150, loss: 0.295392, avg_loss: 0.270915 013280/063150, loss: 0.023077, avg_loss: 0.270843 013285/063150, loss: 0.122318, avg_loss: 0.270769 013290/063150, loss: 0.020301, avg_loss: 0.270687 013295/063150, loss: 0.174889, avg_loss: 0.270629 013300/063150, loss: 0.147910, avg_loss: 0.270560 013305/063150, loss: 0.019807, avg_loss: 0.270477 013310/063150, loss: 0.008060, avg_loss: 0.270389 013315/063150, loss: 0.039331, avg_loss: 0.270319 013320/063150, loss: 0.038662, avg_loss: 0.270301 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 13320/63150: {'accuracy': 0.8497706422018348} 013325/063150, loss: 0.029035, avg_loss: 0.270221 013330/063150, loss: 0.300784, avg_loss: 0.270162 013335/063150, loss: 0.031550, avg_loss: 0.270093 013340/063150, loss: 0.125954, avg_loss: 0.270047 013345/063150, loss: 0.192454, avg_loss: 0.269993 013350/063150, loss: 0.107369, avg_loss: 0.269934 013355/063150, loss: 0.081192, avg_loss: 0.269892 013360/063150, loss: 0.371979, avg_loss: 0.269858 013365/063150, loss: 0.079792, avg_loss: 0.269803 013370/063150, loss: 0.173756, avg_loss: 0.269746 013375/063150, loss: 0.375714, avg_loss: 0.269696 013380/063150, loss: 0.168938, avg_loss: 0.269636 013385/063150, loss: 0.178837, avg_loss: 0.269594 013390/063150, loss: 0.076373, avg_loss: 0.269524 013395/063150, loss: 0.138537, avg_loss: 0.269461 013400/063150, loss: 0.080482, avg_loss: 0.269395 013405/063150, loss: 0.109945, avg_loss: 0.269358 013410/063150, loss: 0.070638, avg_loss: 0.269297 013415/063150, loss: 0.129705, avg_loss: 0.269240 013420/063150, loss: 0.257224, avg_loss: 0.269183 013425/063150, loss: 0.026388, avg_loss: 0.269124 013430/063150, loss: 0.129405, avg_loss: 0.269061 013435/063150, loss: 0.060233, avg_loss: 0.269002 013440/063150, loss: 0.191335, avg_loss: 0.268973 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 13440/63150: {'accuracy': 0.8451834862385321} 013445/063150, loss: 0.094875, avg_loss: 0.268929 013450/063150, loss: 0.043482, avg_loss: 0.268865 013455/063150, loss: 0.096150, avg_loss: 0.268818 013460/063150, loss: 0.031702, avg_loss: 0.268734 013465/063150, loss: 0.208088, avg_loss: 0.268681 013470/063150, loss: 0.070791, avg_loss: 0.268615 013475/063150, loss: 0.072805, avg_loss: 0.268554 013480/063150, loss: 0.298978, avg_loss: 0.268508 013485/063150, loss: 0.232945, avg_loss: 0.268460 013490/063150, loss: 0.155616, avg_loss: 0.268403 013495/063150, loss: 0.154669, avg_loss: 0.268354 013500/063150, loss: 0.076360, avg_loss: 0.268292 013505/063150, loss: 0.042192, avg_loss: 0.268244 013510/063150, loss: 0.112927, avg_loss: 0.268188 013515/063150, loss: 0.019044, avg_loss: 0.268134 013520/063150, loss: 0.195267, avg_loss: 0.268111 013525/063150, loss: 0.116349, avg_loss: 0.268052 013530/063150, loss: 0.076219, avg_loss: 0.267990 013535/063150, loss: 0.056818, avg_loss: 0.267924 013540/063150, loss: 0.071377, avg_loss: 0.267865 013545/063150, loss: 0.163268, avg_loss: 0.267803 013550/063150, loss: 0.037107, avg_loss: 0.267723 013555/063150, loss: 0.078334, avg_loss: 0.267651 013560/063150, loss: 0.092340, avg_loss: 0.267588 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 13560/63150: {'accuracy': 0.8463302752293578} 013565/063150, loss: 0.094319, avg_loss: 0.267545 013570/063150, loss: 0.120061, avg_loss: 0.267476 013575/063150, loss: 0.054833, avg_loss: 0.267426 013580/063150, loss: 0.271442, avg_loss: 0.267368 013585/063150, loss: 0.058013, avg_loss: 0.267313 013590/063150, loss: 0.118900, avg_loss: 0.267276 013595/063150, loss: 0.142767, avg_loss: 0.267212 013600/063150, loss: 0.066206, avg_loss: 0.267183 013605/063150, loss: 0.135607, avg_loss: 0.267127 013610/063150, loss: 0.030110, avg_loss: 0.267060 013615/063150, loss: 0.084123, avg_loss: 0.266999 013620/063150, loss: 0.135131, avg_loss: 0.266948 013625/063150, loss: 0.089114, avg_loss: 0.266882 013630/063150, loss: 0.270881, avg_loss: 0.266860 013635/063150, loss: 0.055905, avg_loss: 0.266795 013640/063150, loss: 0.087505, avg_loss: 0.266715 013645/063150, loss: 0.064347, avg_loss: 0.266675 013650/063150, loss: 0.013942, avg_loss: 0.266625 013655/063150, loss: 0.116575, avg_loss: 0.266570 013660/063150, loss: 0.184201, avg_loss: 0.266531 013665/063150, loss: 0.202388, avg_loss: 0.266484 013670/063150, loss: 0.061368, avg_loss: 0.266427 013675/063150, loss: 0.116518, avg_loss: 0.266383 013680/063150, loss: 0.052227, avg_loss: 0.266326 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 13680/63150: {'accuracy': 0.8474770642201835} 013685/063150, loss: 0.077557, avg_loss: 0.266278 013690/063150, loss: 0.182886, avg_loss: 0.266220 013695/063150, loss: 0.247576, avg_loss: 0.266172 013700/063150, loss: 0.036649, avg_loss: 0.266120 013705/063150, loss: 0.138115, avg_loss: 0.266070 013710/063150, loss: 0.107699, avg_loss: 0.266012 013715/063150, loss: 0.031566, avg_loss: 0.265960 013720/063150, loss: 0.054828, avg_loss: 0.265891 013725/063150, loss: 0.071758, avg_loss: 0.265828 013730/063150, loss: 0.060742, avg_loss: 0.265781 013735/063150, loss: 0.168752, avg_loss: 0.265723 013740/063150, loss: 0.028218, avg_loss: 0.265656 013745/063150, loss: 0.107161, avg_loss: 0.265585 013750/063150, loss: 0.069972, avg_loss: 0.265522 013755/063150, loss: 0.024637, avg_loss: 0.265466 013760/063150, loss: 0.056660, avg_loss: 0.265396 013765/063150, loss: 0.022972, avg_loss: 0.265322 013770/063150, loss: 0.203705, avg_loss: 0.265296 013775/063150, loss: 0.280965, avg_loss: 0.265247 013780/063150, loss: 0.101777, avg_loss: 0.265190 013785/063150, loss: 0.149071, avg_loss: 0.265143 013790/063150, loss: 0.053666, avg_loss: 0.265105 013795/063150, loss: 0.066703, avg_loss: 0.265040 013800/063150, loss: 0.223569, avg_loss: 0.265025 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 13800/63150: {'accuracy': 0.8268348623853211} 013805/063150, loss: 0.079790, avg_loss: 0.264975 013810/063150, loss: 0.128095, avg_loss: 0.264909 013815/063150, loss: 0.055525, avg_loss: 0.264866 013820/063150, loss: 0.307931, avg_loss: 0.264832 013825/063150, loss: 0.076600, avg_loss: 0.264770 013830/063150, loss: 0.091545, avg_loss: 0.264711 013835/063150, loss: 0.030899, avg_loss: 0.264676 013840/063150, loss: 0.121103, avg_loss: 0.264630 013845/063150, loss: 0.084404, avg_loss: 0.264551 013850/063150, loss: 0.097718, avg_loss: 0.264489 013855/063150, loss: 0.187055, avg_loss: 0.264446 013860/063150, loss: 0.138895, avg_loss: 0.264382 013865/063150, loss: 0.105118, avg_loss: 0.264313 013870/063150, loss: 0.172121, avg_loss: 0.264275 013875/063150, loss: 0.087043, avg_loss: 0.264222 013880/063150, loss: 0.165336, avg_loss: 0.264185 013885/063150, loss: 0.045960, avg_loss: 0.264110 013890/063150, loss: 0.270816, avg_loss: 0.264054 013895/063150, loss: 0.072052, avg_loss: 0.263987 013900/063150, loss: 0.175842, avg_loss: 0.263973 013905/063150, loss: 0.050581, avg_loss: 0.263902 013910/063150, loss: 0.084705, avg_loss: 0.263834 013915/063150, loss: 0.152145, avg_loss: 0.263778 013920/063150, loss: 0.051316, avg_loss: 0.263720 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 13920/63150: {'accuracy': 0.8497706422018348} 013925/063150, loss: 0.103797, avg_loss: 0.263661 013930/063150, loss: 0.089867, avg_loss: 0.263589 013935/063150, loss: 0.163807, avg_loss: 0.263540 013940/063150, loss: 0.078997, avg_loss: 0.263480 013945/063150, loss: 0.152408, avg_loss: 0.263419 013950/063150, loss: 0.123994, avg_loss: 0.263358 013955/063150, loss: 0.013176, avg_loss: 0.263298 013960/063150, loss: 0.095018, avg_loss: 0.263246 013965/063150, loss: 0.134163, avg_loss: 0.263174 013970/063150, loss: 0.178191, avg_loss: 0.263127 013975/063150, loss: 0.208208, avg_loss: 0.263086 013980/063150, loss: 0.360650, avg_loss: 0.263034 013985/063150, loss: 0.108444, avg_loss: 0.262983 013990/063150, loss: 0.286343, avg_loss: 0.262923 013995/063150, loss: 0.140673, avg_loss: 0.262886 014000/063150, loss: 0.256444, avg_loss: 0.262837 014005/063150, loss: 0.128041, avg_loss: 0.262775 014010/063150, loss: 0.088349, avg_loss: 0.262733 014015/063150, loss: 0.212553, avg_loss: 0.262692 014020/063150, loss: 0.087990, avg_loss: 0.262636 014025/063150, loss: 0.038597, avg_loss: 0.262564 014030/063150, loss: 0.316874, avg_loss: 0.262512 014035/063150, loss: 0.044449, avg_loss: 0.262449 014040/063150, loss: 0.326197, avg_loss: 0.262405 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 14040/63150: {'accuracy': 0.8589449541284404} 014045/063150, loss: 0.177093, avg_loss: 0.262340 014050/063150, loss: 0.036103, avg_loss: 0.262290 014055/063150, loss: 0.060110, avg_loss: 0.262223 014060/063150, loss: 0.056052, avg_loss: 0.262196 014065/063150, loss: 0.046807, avg_loss: 0.262145 014070/063150, loss: 0.028137, avg_loss: 0.262088 014075/063150, loss: 0.231622, avg_loss: 0.262031 014080/063150, loss: 0.051018, avg_loss: 0.261989 014085/063150, loss: 0.016655, avg_loss: 0.261940 014090/063150, loss: 0.140307, avg_loss: 0.261899 014095/063150, loss: 0.029685, avg_loss: 0.261857 014100/063150, loss: 0.094307, avg_loss: 0.261827 014105/063150, loss: 0.138286, avg_loss: 0.261769 014110/063150, loss: 0.051098, avg_loss: 0.261706 014115/063150, loss: 0.172384, avg_loss: 0.261658 014120/063150, loss: 0.149141, avg_loss: 0.261597 014125/063150, loss: 0.092800, avg_loss: 0.261531 014130/063150, loss: 0.067818, avg_loss: 0.261476 014135/063150, loss: 0.094962, avg_loss: 0.261433 014140/063150, loss: 0.045183, avg_loss: 0.261381 014145/063150, loss: 0.085965, avg_loss: 0.261328 014150/063150, loss: 0.166606, avg_loss: 0.261287 014155/063150, loss: 0.145029, avg_loss: 0.261240 014160/063150, loss: 0.060549, avg_loss: 0.261186 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 14160/63150: {'accuracy': 0.8474770642201835} 014165/063150, loss: 0.020250, avg_loss: 0.261122 014170/063150, loss: 0.399914, avg_loss: 0.261074 014175/063150, loss: 0.065335, avg_loss: 0.261008 014180/063150, loss: 0.235548, avg_loss: 0.260967 014185/063150, loss: 0.017201, avg_loss: 0.260909 014190/063150, loss: 0.036110, avg_loss: 0.260864 014195/063150, loss: 0.088286, avg_loss: 0.260820 014200/063150, loss: 0.074259, avg_loss: 0.260758 014205/063150, loss: 0.059955, avg_loss: 0.260701 014210/063150, loss: 0.082869, avg_loss: 0.260642 014215/063150, loss: 0.098786, avg_loss: 0.260595 014220/063150, loss: 0.175388, avg_loss: 0.260569 014225/063150, loss: 0.085989, avg_loss: 0.260520 014230/063150, loss: 0.084125, avg_loss: 0.260462 014235/063150, loss: 0.183629, avg_loss: 0.260432 014240/063150, loss: 0.088892, avg_loss: 0.260388 014245/063150, loss: 0.083819, avg_loss: 0.260324 014250/063150, loss: 0.346467, avg_loss: 0.260294 014255/063150, loss: 0.062989, avg_loss: 0.260247 014260/063150, loss: 0.212593, avg_loss: 0.260204 014265/063150, loss: 0.076411, avg_loss: 0.260172 014270/063150, loss: 0.115249, avg_loss: 0.260107 014275/063150, loss: 0.063950, avg_loss: 0.260053 014280/063150, loss: 0.031637, avg_loss: 0.259986 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 14280/63150: {'accuracy': 0.8577981651376146} 014285/063150, loss: 0.054005, avg_loss: 0.259940 014290/063150, loss: 0.089260, avg_loss: 0.259893 014295/063150, loss: 0.066401, avg_loss: 0.259847 014300/063150, loss: 0.137174, avg_loss: 0.259792 014305/063150, loss: 0.101514, avg_loss: 0.259719 014310/063150, loss: 0.063591, avg_loss: 0.259659 014315/063150, loss: 0.020809, avg_loss: 0.259599 014320/063150, loss: 0.178270, avg_loss: 0.259537 014325/063150, loss: 0.039895, avg_loss: 0.259490 014330/063150, loss: 0.154372, avg_loss: 0.259434 014335/063150, loss: 0.200468, avg_loss: 0.259392 014340/063150, loss: 0.037809, avg_loss: 0.259327 014345/063150, loss: 0.067647, avg_loss: 0.259264 014350/063150, loss: 0.145115, avg_loss: 0.259215 014355/063150, loss: 0.026962, avg_loss: 0.259157 014360/063150, loss: 0.121841, avg_loss: 0.259105 014365/063150, loss: 0.022555, avg_loss: 0.259032 014370/063150, loss: 0.047826, avg_loss: 0.258974 014375/063150, loss: 0.010901, avg_loss: 0.258900 014380/063150, loss: 0.096120, avg_loss: 0.258845 014385/063150, loss: 0.292007, avg_loss: 0.258798 014390/063150, loss: 0.206674, avg_loss: 0.258736 014395/063150, loss: 0.073791, avg_loss: 0.258686 014400/063150, loss: 0.034189, avg_loss: 0.258624 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 14400/63150: {'accuracy': 0.8612385321100917} 014405/063150, loss: 0.015206, avg_loss: 0.258570 014410/063150, loss: 0.281437, avg_loss: 0.258538 014415/063150, loss: 0.074194, avg_loss: 0.258499 014420/063150, loss: 0.024112, avg_loss: 0.258438 014425/063150, loss: 0.145962, avg_loss: 0.258382 014430/063150, loss: 0.251093, avg_loss: 0.258355 014435/063150, loss: 0.134923, avg_loss: 0.258307 014440/063150, loss: 0.198847, avg_loss: 0.258301 014445/063150, loss: 0.054991, avg_loss: 0.258246 014450/063150, loss: 0.311306, avg_loss: 0.258196 014455/063150, loss: 0.164875, avg_loss: 0.258138 014460/063150, loss: 0.065179, avg_loss: 0.258094 014465/063150, loss: 0.084651, avg_loss: 0.258034 014470/063150, loss: 0.168442, avg_loss: 0.257981 014475/063150, loss: 0.034467, avg_loss: 0.257909 014480/063150, loss: 0.147439, avg_loss: 0.257889 014485/063150, loss: 0.043330, avg_loss: 0.257826 014490/063150, loss: 0.119306, avg_loss: 0.257777 014495/063150, loss: 0.118149, avg_loss: 0.257739 014500/063150, loss: 0.148272, avg_loss: 0.257688 014505/063150, loss: 0.217492, avg_loss: 0.257653 014510/063150, loss: 0.050174, avg_loss: 0.257602 014515/063150, loss: 0.059788, avg_loss: 0.257539 014520/063150, loss: 0.049787, avg_loss: 0.257507 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 14520/63150: {'accuracy': 0.8623853211009175} 014525/063150, loss: 0.042498, avg_loss: 0.257449 014530/063150, loss: 0.340567, avg_loss: 0.257409 014535/063150, loss: 0.057205, avg_loss: 0.257355 014540/063150, loss: 0.119171, avg_loss: 0.257304 014545/063150, loss: 0.085505, avg_loss: 0.257271 014550/063150, loss: 0.027120, avg_loss: 0.257223 014555/063150, loss: 0.061823, avg_loss: 0.257154 014560/063150, loss: 0.042163, avg_loss: 0.257104 014565/063150, loss: 0.119290, avg_loss: 0.257054 014570/063150, loss: 0.240499, avg_loss: 0.256998 014575/063150, loss: 0.086652, avg_loss: 0.256939 014580/063150, loss: 0.116210, avg_loss: 0.256870 014585/063150, loss: 0.121100, avg_loss: 0.256828 014590/063150, loss: 0.099862, avg_loss: 0.256778 014595/063150, loss: 0.138586, avg_loss: 0.256736 014600/063150, loss: 0.045343, avg_loss: 0.256679 014605/063150, loss: 0.137912, avg_loss: 0.256620 014610/063150, loss: 0.029011, avg_loss: 0.256575 014615/063150, loss: 0.061517, avg_loss: 0.256512 014620/063150, loss: 0.121189, avg_loss: 0.256477 014625/063150, loss: 0.296102, avg_loss: 0.256448 014630/063150, loss: 0.081778, avg_loss: 0.256400 014635/063150, loss: 0.124910, avg_loss: 0.256357 014640/063150, loss: 0.207077, avg_loss: 0.256319 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 6, step 14640/63150: {'accuracy': 0.8600917431192661} 014645/063150, loss: 0.058024, avg_loss: 0.256280 014650/063150, loss: 0.150247, avg_loss: 0.256216 014655/063150, loss: 0.110824, avg_loss: 0.256178 014660/063150, loss: 0.292875, avg_loss: 0.256133 014665/063150, loss: 0.233535, avg_loss: 0.256114 014670/063150, loss: 0.087444, avg_loss: 0.256052 014675/063150, loss: 0.154656, avg_loss: 0.256013 014680/063150, loss: 0.079222, avg_loss: 0.255978 014685/063150, loss: 0.084735, avg_loss: 0.255920 014690/063150, loss: 0.019807, avg_loss: 0.255851 014695/063150, loss: 0.126704, avg_loss: 0.255799 014700/063150, loss: 0.077859, avg_loss: 0.255753 014705/063150, loss: 0.037996, avg_loss: 0.255692 014710/063150, loss: 0.184385, avg_loss: 0.255654 014715/063150, loss: 0.103425, avg_loss: 0.255624 014720/063150, loss: 0.072027, avg_loss: 0.255573 014725/063150, loss: 0.065999, avg_loss: 0.255518 014730/063150, loss: 0.084613, avg_loss: 0.255458 014735/063150, loss: 0.173158, avg_loss: 0.255426 014740/063150, loss: 0.042773, avg_loss: 0.255356 014745/063150, loss: 0.023401, avg_loss: 0.255290 014750/063150, loss: 0.270549, avg_loss: 0.255232 014755/063150, loss: 0.028614, avg_loss: 0.255167 014760/063150, loss: 0.100419, avg_loss: 0.255107 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 14760/63150: {'accuracy': 0.8589449541284404} 014765/063150, loss: 0.047162, avg_loss: 0.255042 014770/063150, loss: 0.048707, avg_loss: 0.254974 014775/063150, loss: 0.037687, avg_loss: 0.254917 014780/063150, loss: 0.064711, avg_loss: 0.254868 014785/063150, loss: 0.031524, avg_loss: 0.254799 014790/063150, loss: 0.098283, avg_loss: 0.254782 014795/063150, loss: 0.025336, avg_loss: 0.254720 014800/063150, loss: 0.153026, avg_loss: 0.254667 014805/063150, loss: 0.027600, avg_loss: 0.254610 014810/063150, loss: 0.023772, avg_loss: 0.254570 014815/063150, loss: 0.049845, avg_loss: 0.254500 014820/063150, loss: 0.028427, avg_loss: 0.254441 014825/063150, loss: 0.020604, avg_loss: 0.254368 014830/063150, loss: 0.033797, avg_loss: 0.254316 014835/063150, loss: 0.254177, avg_loss: 0.254265 014840/063150, loss: 0.279819, avg_loss: 0.254216 014845/063150, loss: 0.209376, avg_loss: 0.254161 014850/063150, loss: 0.090203, avg_loss: 0.254106 014855/063150, loss: 0.221246, avg_loss: 0.254065 014860/063150, loss: 0.069436, avg_loss: 0.254010 014865/063150, loss: 0.033942, avg_loss: 0.253952 014870/063150, loss: 0.131283, avg_loss: 0.253924 014875/063150, loss: 0.097068, avg_loss: 0.253883 014880/063150, loss: 0.051916, avg_loss: 0.253822 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 14880/63150: {'accuracy': 0.8520642201834863} 014885/063150, loss: 0.060921, avg_loss: 0.253770 014890/063150, loss: 0.022754, avg_loss: 0.253715 014895/063150, loss: 0.016856, avg_loss: 0.253650 014900/063150, loss: 0.169625, avg_loss: 0.253610 014905/063150, loss: 0.086245, avg_loss: 0.253552 014910/063150, loss: 0.038238, avg_loss: 0.253482 014915/063150, loss: 0.013177, avg_loss: 0.253405 014920/063150, loss: 0.256956, avg_loss: 0.253349 014925/063150, loss: 0.032279, avg_loss: 0.253299 014930/063150, loss: 0.087344, avg_loss: 0.253249 014935/063150, loss: 0.010722, avg_loss: 0.253183 014940/063150, loss: 0.052754, avg_loss: 0.253118 014945/063150, loss: 0.006596, avg_loss: 0.253055 014950/063150, loss: 0.127233, avg_loss: 0.252994 014955/063150, loss: 0.063616, avg_loss: 0.252931 014960/063150, loss: 0.227392, avg_loss: 0.252893 014965/063150, loss: 0.062876, avg_loss: 0.252826 014970/063150, loss: 0.187272, avg_loss: 0.252771 014975/063150, loss: 0.181508, avg_loss: 0.252717 014980/063150, loss: 0.264755, avg_loss: 0.252685 014985/063150, loss: 0.067823, avg_loss: 0.252659 014990/063150, loss: 0.117201, avg_loss: 0.252611 014995/063150, loss: 0.067068, avg_loss: 0.252555 015000/063150, loss: 0.101558, avg_loss: 0.252500 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15000/63150: {'accuracy': 0.8509174311926605} 015005/063150, loss: 0.057630, avg_loss: 0.252457 015010/063150, loss: 0.037018, avg_loss: 0.252391 015015/063150, loss: 0.283105, avg_loss: 0.252355 015020/063150, loss: 0.119350, avg_loss: 0.252311 015025/063150, loss: 0.099779, avg_loss: 0.252251 015030/063150, loss: 0.039832, avg_loss: 0.252202 015035/063150, loss: 0.086444, avg_loss: 0.252146 015040/063150, loss: 0.149655, avg_loss: 0.252112 015045/063150, loss: 0.079242, avg_loss: 0.252051 015050/063150, loss: 0.082632, avg_loss: 0.251981 015055/063150, loss: 0.166733, avg_loss: 0.251936 015060/063150, loss: 0.041102, avg_loss: 0.251898 015065/063150, loss: 0.432804, avg_loss: 0.251862 015070/063150, loss: 0.082084, avg_loss: 0.251805 015075/063150, loss: 0.020640, avg_loss: 0.251747 015080/063150, loss: 0.072865, avg_loss: 0.251694 015085/063150, loss: 0.018421, avg_loss: 0.251641 015090/063150, loss: 0.220518, avg_loss: 0.251584 015095/063150, loss: 0.052701, avg_loss: 0.251526 015100/063150, loss: 0.059496, avg_loss: 0.251475 015105/063150, loss: 0.108729, avg_loss: 0.251419 015110/063150, loss: 0.098916, avg_loss: 0.251363 015115/063150, loss: 0.046422, avg_loss: 0.251309 015120/063150, loss: 0.173842, avg_loss: 0.251254 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15120/63150: {'accuracy': 0.8612385321100917} 015125/063150, loss: 0.036144, avg_loss: 0.251209 015130/063150, loss: 0.028506, avg_loss: 0.251142 015135/063150, loss: 0.092467, avg_loss: 0.251086 015140/063150, loss: 0.049376, avg_loss: 0.251026 015145/063150, loss: 0.157167, avg_loss: 0.250971 015150/063150, loss: 0.107291, avg_loss: 0.250921 015155/063150, loss: 0.097855, avg_loss: 0.250865 015160/063150, loss: 0.015526, avg_loss: 0.250796 015165/063150, loss: 0.049513, avg_loss: 0.250747 015170/063150, loss: 0.102515, avg_loss: 0.250687 015175/063150, loss: 0.187899, avg_loss: 0.250628 015180/063150, loss: 0.026176, avg_loss: 0.250586 015185/063150, loss: 0.060727, avg_loss: 0.250519 015190/063150, loss: 0.031158, avg_loss: 0.250482 015195/063150, loss: 0.142221, avg_loss: 0.250423 015200/063150, loss: 0.045691, avg_loss: 0.250364 015205/063150, loss: 0.093195, avg_loss: 0.250320 015210/063150, loss: 0.106645, avg_loss: 0.250256 015215/063150, loss: 0.260352, avg_loss: 0.250221 015220/063150, loss: 0.086733, avg_loss: 0.250162 015225/063150, loss: 0.155065, avg_loss: 0.250118 015230/063150, loss: 0.143915, avg_loss: 0.250074 015235/063150, loss: 0.037681, avg_loss: 0.250027 015240/063150, loss: 0.139268, avg_loss: 0.249977 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15240/63150: {'accuracy': 0.8520642201834863} 015245/063150, loss: 0.171303, avg_loss: 0.249936 015250/063150, loss: 0.131963, avg_loss: 0.249886 015255/063150, loss: 0.017564, avg_loss: 0.249836 015260/063150, loss: 0.053436, avg_loss: 0.249788 015265/063150, loss: 0.015324, avg_loss: 0.249744 015270/063150, loss: 0.031634, avg_loss: 0.249688 015275/063150, loss: 0.163604, avg_loss: 0.249647 015280/063150, loss: 0.028024, avg_loss: 0.249587 015285/063150, loss: 0.024133, avg_loss: 0.249528 015290/063150, loss: 0.286434, avg_loss: 0.249497 015295/063150, loss: 0.024551, avg_loss: 0.249435 015300/063150, loss: 0.064727, avg_loss: 0.249377 015305/063150, loss: 0.022773, avg_loss: 0.249317 015310/063150, loss: 0.192043, avg_loss: 0.249287 015315/063150, loss: 0.125754, avg_loss: 0.249238 015320/063150, loss: 0.172836, avg_loss: 0.249187 015325/063150, loss: 0.039195, avg_loss: 0.249127 015330/063150, loss: 0.116977, avg_loss: 0.249090 015335/063150, loss: 0.026914, avg_loss: 0.249050 015340/063150, loss: 0.076980, avg_loss: 0.249006 015345/063150, loss: 0.167223, avg_loss: 0.248953 015350/063150, loss: 0.043318, avg_loss: 0.248910 015355/063150, loss: 0.211880, avg_loss: 0.248859 015360/063150, loss: 0.237495, avg_loss: 0.248805 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15360/63150: {'accuracy': 0.856651376146789} 015365/063150, loss: 0.068923, avg_loss: 0.248754 015370/063150, loss: 0.122501, avg_loss: 0.248703 015375/063150, loss: 0.244610, avg_loss: 0.248658 015380/063150, loss: 0.038137, avg_loss: 0.248599 015385/063150, loss: 0.054783, avg_loss: 0.248533 015390/063150, loss: 0.014205, avg_loss: 0.248504 015395/063150, loss: 0.163701, avg_loss: 0.248451 015400/063150, loss: 0.141392, avg_loss: 0.248409 015405/063150, loss: 0.047014, avg_loss: 0.248352 015410/063150, loss: 0.161129, avg_loss: 0.248311 015415/063150, loss: 0.063588, avg_loss: 0.248256 015420/063150, loss: 0.106174, avg_loss: 0.248216 015425/063150, loss: 0.135584, avg_loss: 0.248159 015430/063150, loss: 0.040877, avg_loss: 0.248122 015435/063150, loss: 0.058661, avg_loss: 0.248074 015440/063150, loss: 0.060923, avg_loss: 0.248022 015445/063150, loss: 0.043286, avg_loss: 0.247969 015450/063150, loss: 0.097451, avg_loss: 0.247926 015455/063150, loss: 0.059941, avg_loss: 0.247883 015460/063150, loss: 0.057785, avg_loss: 0.247840 015465/063150, loss: 0.122885, avg_loss: 0.247789 015470/063150, loss: 0.049087, avg_loss: 0.247754 015475/063150, loss: 0.094039, avg_loss: 0.247696 015480/063150, loss: 0.168765, avg_loss: 0.247647 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15480/63150: {'accuracy': 0.8704128440366973} 015485/063150, loss: 0.015912, avg_loss: 0.247586 015490/063150, loss: 0.133411, avg_loss: 0.247534 015495/063150, loss: 0.075367, avg_loss: 0.247479 015500/063150, loss: 0.054369, avg_loss: 0.247413 015505/063150, loss: 0.015628, avg_loss: 0.247351 015510/063150, loss: 0.014699, avg_loss: 0.247302 015515/063150, loss: 0.149320, avg_loss: 0.247253 015520/063150, loss: 0.003490, avg_loss: 0.247195 015525/063150, loss: 0.036033, avg_loss: 0.247159 015530/063150, loss: 0.227210, avg_loss: 0.247136 015535/063150, loss: 0.112899, avg_loss: 0.247093 015540/063150, loss: 0.061346, avg_loss: 0.247038 015545/063150, loss: 0.141176, avg_loss: 0.247006 015550/063150, loss: 0.082260, avg_loss: 0.246953 015555/063150, loss: 0.077427, avg_loss: 0.246921 015560/063150, loss: 0.032544, avg_loss: 0.246872 015565/063150, loss: 0.028731, avg_loss: 0.246816 015570/063150, loss: 0.151045, avg_loss: 0.246767 015575/063150, loss: 0.156013, avg_loss: 0.246724 015580/063150, loss: 0.176002, avg_loss: 0.246689 015585/063150, loss: 0.195044, avg_loss: 0.246633 015590/063150, loss: 0.145153, avg_loss: 0.246580 015595/063150, loss: 0.060876, avg_loss: 0.246530 015600/063150, loss: 0.019595, avg_loss: 0.246485 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15600/63150: {'accuracy': 0.8520642201834863} 015605/063150, loss: 0.174659, avg_loss: 0.246442 015610/063150, loss: 0.076890, avg_loss: 0.246397 015615/063150, loss: 0.054905, avg_loss: 0.246371 015620/063150, loss: 0.034342, avg_loss: 0.246326 015625/063150, loss: 0.025884, avg_loss: 0.246270 015630/063150, loss: 0.073457, avg_loss: 0.246205 015635/063150, loss: 0.170781, avg_loss: 0.246159 015640/063150, loss: 0.063921, avg_loss: 0.246114 015645/063150, loss: 0.073859, avg_loss: 0.246069 015650/063150, loss: 0.201619, avg_loss: 0.246014 015655/063150, loss: 0.218607, avg_loss: 0.245992 015660/063150, loss: 0.014419, avg_loss: 0.245934 015665/063150, loss: 0.199271, avg_loss: 0.245897 015670/063150, loss: 0.061761, avg_loss: 0.245848 015675/063150, loss: 0.060662, avg_loss: 0.245784 015680/063150, loss: 0.189873, avg_loss: 0.245758 015685/063150, loss: 0.081219, avg_loss: 0.245718 015690/063150, loss: 0.103686, avg_loss: 0.245674 015695/063150, loss: 0.049083, avg_loss: 0.245640 015700/063150, loss: 0.213367, avg_loss: 0.245612 015705/063150, loss: 0.207482, avg_loss: 0.245575 015710/063150, loss: 0.024889, avg_loss: 0.245526 015715/063150, loss: 0.079323, avg_loss: 0.245485 015720/063150, loss: 0.134524, avg_loss: 0.245455 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15720/63150: {'accuracy': 0.8371559633027523} 015725/063150, loss: 0.090505, avg_loss: 0.245400 015730/063150, loss: 0.016397, avg_loss: 0.245347 015735/063150, loss: 0.129737, avg_loss: 0.245297 015740/063150, loss: 0.019403, avg_loss: 0.245242 015745/063150, loss: 0.063688, avg_loss: 0.245209 015750/063150, loss: 0.033200, avg_loss: 0.245145 015755/063150, loss: 0.220870, avg_loss: 0.245094 015760/063150, loss: 0.101973, avg_loss: 0.245033 015765/063150, loss: 0.193970, avg_loss: 0.244987 015770/063150, loss: 0.096565, avg_loss: 0.244955 015775/063150, loss: 0.066594, avg_loss: 0.244907 015780/063150, loss: 0.119268, avg_loss: 0.244866 015785/063150, loss: 0.182717, avg_loss: 0.244846 015790/063150, loss: 0.123303, avg_loss: 0.244793 015795/063150, loss: 0.094873, avg_loss: 0.244766 015800/063150, loss: 0.175980, avg_loss: 0.244722 015805/063150, loss: 0.033686, avg_loss: 0.244671 015810/063150, loss: 0.091493, avg_loss: 0.244618 015815/063150, loss: 0.157901, avg_loss: 0.244599 015820/063150, loss: 0.061673, avg_loss: 0.244553 015825/063150, loss: 0.042599, avg_loss: 0.244499 015830/063150, loss: 0.022082, avg_loss: 0.244455 015835/063150, loss: 0.222017, avg_loss: 0.244407 015840/063150, loss: 0.232396, avg_loss: 0.244364 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15840/63150: {'accuracy': 0.8451834862385321} 015845/063150, loss: 0.072962, avg_loss: 0.244309 015850/063150, loss: 0.007790, avg_loss: 0.244267 015855/063150, loss: 0.021392, avg_loss: 0.244220 015860/063150, loss: 0.162051, avg_loss: 0.244171 015865/063150, loss: 0.013669, avg_loss: 0.244129 015870/063150, loss: 0.082533, avg_loss: 0.244099 015875/063150, loss: 0.048399, avg_loss: 0.244044 015880/063150, loss: 0.054102, avg_loss: 0.243990 015885/063150, loss: 0.045106, avg_loss: 0.243946 015890/063150, loss: 0.018356, avg_loss: 0.243893 015895/063150, loss: 0.064778, avg_loss: 0.243834 015900/063150, loss: 0.106186, avg_loss: 0.243805 015905/063150, loss: 0.013312, avg_loss: 0.243752 015910/063150, loss: 0.308522, avg_loss: 0.243715 015915/063150, loss: 0.206676, avg_loss: 0.243669 015920/063150, loss: 0.057731, avg_loss: 0.243610 015925/063150, loss: 0.133215, avg_loss: 0.243565 015930/063150, loss: 0.125428, avg_loss: 0.243518 015935/063150, loss: 0.101146, avg_loss: 0.243471 015940/063150, loss: 0.048975, avg_loss: 0.243412 015945/063150, loss: 0.095781, avg_loss: 0.243362 015950/063150, loss: 0.147959, avg_loss: 0.243320 015955/063150, loss: 0.018992, avg_loss: 0.243263 015960/063150, loss: 0.147412, avg_loss: 0.243239 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 15960/63150: {'accuracy': 0.8635321100917431} 015965/063150, loss: 0.046434, avg_loss: 0.243195 015970/063150, loss: 0.103837, avg_loss: 0.243160 015975/063150, loss: 0.036223, avg_loss: 0.243098 015980/063150, loss: 0.155872, avg_loss: 0.243048 015985/063150, loss: 0.048480, avg_loss: 0.243014 015990/063150, loss: 0.230808, avg_loss: 0.242982 015995/063150, loss: 0.068854, avg_loss: 0.242936 016000/063150, loss: 0.163879, avg_loss: 0.242897 016005/063150, loss: 0.081024, avg_loss: 0.242860 016010/063150, loss: 0.050348, avg_loss: 0.242808 016015/063150, loss: 0.036733, avg_loss: 0.242745 016020/063150, loss: 0.127995, avg_loss: 0.242693 016025/063150, loss: 0.139483, avg_loss: 0.242646 016030/063150, loss: 0.214190, avg_loss: 0.242614 016035/063150, loss: 0.008660, avg_loss: 0.242587 016040/063150, loss: 0.023276, avg_loss: 0.242540 016045/063150, loss: 0.079154, avg_loss: 0.242500 016050/063150, loss: 0.095760, avg_loss: 0.242466 016055/063150, loss: 0.128351, avg_loss: 0.242422 016060/063150, loss: 0.078901, avg_loss: 0.242374 016065/063150, loss: 0.220945, avg_loss: 0.242330 016070/063150, loss: 0.124696, avg_loss: 0.242288 016075/063150, loss: 0.105566, avg_loss: 0.242240 016080/063150, loss: 0.061437, avg_loss: 0.242192 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 16080/63150: {'accuracy': 0.8623853211009175} 016085/063150, loss: 0.077287, avg_loss: 0.242158 016090/063150, loss: 0.023930, avg_loss: 0.242113 016095/063150, loss: 0.107452, avg_loss: 0.242069 016100/063150, loss: 0.162722, avg_loss: 0.242030 016105/063150, loss: 0.179557, avg_loss: 0.241990 016110/063150, loss: 0.133646, avg_loss: 0.241937 016115/063150, loss: 0.150010, avg_loss: 0.241893 016120/063150, loss: 0.198442, avg_loss: 0.241860 016125/063150, loss: 0.107166, avg_loss: 0.241802 016130/063150, loss: 0.024254, avg_loss: 0.241751 016135/063150, loss: 0.076878, avg_loss: 0.241699 016140/063150, loss: 0.045169, avg_loss: 0.241636 016145/063150, loss: 0.100221, avg_loss: 0.241597 016150/063150, loss: 0.030091, avg_loss: 0.241540 016155/063150, loss: 0.501772, avg_loss: 0.241504 016160/063150, loss: 0.041420, avg_loss: 0.241441 016165/063150, loss: 0.414921, avg_loss: 0.241442 016170/063150, loss: 0.067747, avg_loss: 0.241397 016175/063150, loss: 0.165367, avg_loss: 0.241349 016180/063150, loss: 0.051663, avg_loss: 0.241309 016185/063150, loss: 0.076393, avg_loss: 0.241257 016190/063150, loss: 0.234359, avg_loss: 0.241223 016195/063150, loss: 0.114011, avg_loss: 0.241194 016200/063150, loss: 0.034018, avg_loss: 0.241161 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 16200/63150: {'accuracy': 0.8520642201834863} 016205/063150, loss: 0.132344, avg_loss: 0.241119 016210/063150, loss: 0.069125, avg_loss: 0.241057 016215/063150, loss: 0.188767, avg_loss: 0.241012 016220/063150, loss: 0.185615, avg_loss: 0.240975 016225/063150, loss: 0.286258, avg_loss: 0.240949 016230/063150, loss: 0.057768, avg_loss: 0.240905 016235/063150, loss: 0.108053, avg_loss: 0.240855 016240/063150, loss: 0.035894, avg_loss: 0.240809 016245/063150, loss: 0.015202, avg_loss: 0.240753 016250/063150, loss: 0.098608, avg_loss: 0.240714 016255/063150, loss: 0.032770, avg_loss: 0.240678 016260/063150, loss: 0.274860, avg_loss: 0.240650 016265/063150, loss: 0.052015, avg_loss: 0.240599 016270/063150, loss: 0.186151, avg_loss: 0.240554 016275/063150, loss: 0.022749, avg_loss: 0.240521 016280/063150, loss: 0.020349, avg_loss: 0.240468 016285/063150, loss: 0.129580, avg_loss: 0.240433 016290/063150, loss: 0.077496, avg_loss: 0.240390 016295/063150, loss: 0.091408, avg_loss: 0.240342 016300/063150, loss: 0.174978, avg_loss: 0.240306 016305/063150, loss: 0.066926, avg_loss: 0.240263 016310/063150, loss: 0.218361, avg_loss: 0.240225 016315/063150, loss: 0.308288, avg_loss: 0.240198 016320/063150, loss: 0.056079, avg_loss: 0.240151 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 16320/63150: {'accuracy': 0.8497706422018348} 016325/063150, loss: 0.172810, avg_loss: 0.240114 016330/063150, loss: 0.192324, avg_loss: 0.240070 016335/063150, loss: 0.054192, avg_loss: 0.240039 016340/063150, loss: 0.052309, avg_loss: 0.240005 016345/063150, loss: 0.079147, avg_loss: 0.239961 016350/063150, loss: 0.065793, avg_loss: 0.239917 016355/063150, loss: 0.328105, avg_loss: 0.239876 016360/063150, loss: 0.071816, avg_loss: 0.239824 016365/063150, loss: 0.144563, avg_loss: 0.239776 016370/063150, loss: 0.208988, avg_loss: 0.239746 016375/063150, loss: 0.084231, avg_loss: 0.239720 016380/063150, loss: 0.077513, avg_loss: 0.239671 016385/063150, loss: 0.015028, avg_loss: 0.239622 016390/063150, loss: 0.025971, avg_loss: 0.239568 016395/063150, loss: 0.144280, avg_loss: 0.239542 016400/063150, loss: 0.113108, avg_loss: 0.239502 016405/063150, loss: 0.066161, avg_loss: 0.239463 016410/063150, loss: 0.057078, avg_loss: 0.239421 016415/063150, loss: 0.085710, avg_loss: 0.239390 016420/063150, loss: 0.065105, avg_loss: 0.239345 016425/063150, loss: 0.124231, avg_loss: 0.239296 016430/063150, loss: 0.119014, avg_loss: 0.239256 016435/063150, loss: 0.172879, avg_loss: 0.239214 016440/063150, loss: 0.175219, avg_loss: 0.239174 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 16440/63150: {'accuracy': 0.8451834862385321} 016445/063150, loss: 0.162650, avg_loss: 0.239134 016450/063150, loss: 0.055402, avg_loss: 0.239080 016455/063150, loss: 0.058849, avg_loss: 0.239021 016460/063150, loss: 0.103137, avg_loss: 0.238980 016465/063150, loss: 0.167158, avg_loss: 0.238932 016470/063150, loss: 0.088193, avg_loss: 0.238884 016475/063150, loss: 0.038158, avg_loss: 0.238831 016480/063150, loss: 0.075967, avg_loss: 0.238794 016485/063150, loss: 0.150140, avg_loss: 0.238745 016490/063150, loss: 0.029540, avg_loss: 0.238712 016495/063150, loss: 0.100816, avg_loss: 0.238673 016500/063150, loss: 0.060318, avg_loss: 0.238623 016505/063150, loss: 0.008539, avg_loss: 0.238573 016510/063150, loss: 0.041330, avg_loss: 0.238534 016515/063150, loss: 0.139269, avg_loss: 0.238484 016520/063150, loss: 0.056222, avg_loss: 0.238447 016525/063150, loss: 0.083048, avg_loss: 0.238412 016530/063150, loss: 0.031124, avg_loss: 0.238389 016535/063150, loss: 0.071019, avg_loss: 0.238335 016540/063150, loss: 0.063919, avg_loss: 0.238286 016545/063150, loss: 0.111142, avg_loss: 0.238242 016550/063150, loss: 0.157742, avg_loss: 0.238213 016555/063150, loss: 0.040504, avg_loss: 0.238159 016560/063150, loss: 0.042707, avg_loss: 0.238117 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 16560/63150: {'accuracy': 0.8612385321100917} 016565/063150, loss: 0.032571, avg_loss: 0.238077 016570/063150, loss: 0.053007, avg_loss: 0.238025 016575/063150, loss: 0.009305, avg_loss: 0.237972 016580/063150, loss: 0.076985, avg_loss: 0.237919 016585/063150, loss: 0.137714, avg_loss: 0.237885 016590/063150, loss: 0.047511, avg_loss: 0.237837 016595/063150, loss: 0.104895, avg_loss: 0.237788 016600/063150, loss: 0.065866, avg_loss: 0.237738 016605/063150, loss: 0.041281, avg_loss: 0.237696 016610/063150, loss: 0.055500, avg_loss: 0.237656 016615/063150, loss: 0.193311, avg_loss: 0.237619 016620/063150, loss: 0.082116, avg_loss: 0.237596 016625/063150, loss: 0.065884, avg_loss: 0.237553 016630/063150, loss: 0.045553, avg_loss: 0.237502 016635/063150, loss: 0.224087, avg_loss: 0.237466 016640/063150, loss: 0.071157, avg_loss: 0.237412 016645/063150, loss: 0.189170, avg_loss: 0.237381 016650/063150, loss: 0.206425, avg_loss: 0.237340 016655/063150, loss: 0.095484, avg_loss: 0.237285 016660/063150, loss: 0.039728, avg_loss: 0.237232 016665/063150, loss: 0.163450, avg_loss: 0.237215 016670/063150, loss: 0.165841, avg_loss: 0.237167 016675/063150, loss: 0.287012, avg_loss: 0.237147 016680/063150, loss: 0.041589, avg_loss: 0.237121 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 16680/63150: {'accuracy': 0.8623853211009175} 016685/063150, loss: 0.120070, avg_loss: 0.237086 016690/063150, loss: 0.066053, avg_loss: 0.237047 016695/063150, loss: 0.047316, avg_loss: 0.237009 016700/063150, loss: 0.175039, avg_loss: 0.236974 016705/063150, loss: 0.081830, avg_loss: 0.236925 016710/063150, loss: 0.105867, avg_loss: 0.236884 016715/063150, loss: 0.041150, avg_loss: 0.236829 016720/063150, loss: 0.083114, avg_loss: 0.236793 016725/063150, loss: 0.094519, avg_loss: 0.236736 016730/063150, loss: 0.197794, avg_loss: 0.236687 016735/063150, loss: 0.119675, avg_loss: 0.236663 016740/063150, loss: 0.086907, avg_loss: 0.236620 016745/063150, loss: 0.101801, avg_loss: 0.236599 016750/063150, loss: 0.049607, avg_loss: 0.236558 016755/063150, loss: 0.047553, avg_loss: 0.236517 016760/063150, loss: 0.034978, avg_loss: 0.236456 016765/063150, loss: 0.096803, avg_loss: 0.236415 016770/063150, loss: 0.025004, avg_loss: 0.236369 016775/063150, loss: 0.034605, avg_loss: 0.236308 016780/063150, loss: 0.052020, avg_loss: 0.236259 016785/063150, loss: 0.138093, avg_loss: 0.236237 016790/063150, loss: 0.035095, avg_loss: 0.236192 016795/063150, loss: 0.067674, avg_loss: 0.236138 016800/063150, loss: 0.018855, avg_loss: 0.236093 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 7, step 16800/63150: {'accuracy': 0.8635321100917431} 016805/063150, loss: 0.101116, avg_loss: 0.236049 016810/063150, loss: 0.011860, avg_loss: 0.235997 016815/063150, loss: 0.187719, avg_loss: 0.235959 016820/063150, loss: 0.018296, avg_loss: 0.235914 016825/063150, loss: 0.123737, avg_loss: 0.235868 016830/063150, loss: 0.113800, avg_loss: 0.235823 016835/063150, loss: 0.152372, avg_loss: 0.235793 016840/063150, loss: 0.081752, avg_loss: 0.235744 016845/063150, loss: 0.213118, avg_loss: 0.235697 016850/063150, loss: 0.055062, avg_loss: 0.235662 016855/063150, loss: 0.143505, avg_loss: 0.235613 016860/063150, loss: 0.095485, avg_loss: 0.235561 016865/063150, loss: 0.011055, avg_loss: 0.235497 016870/063150, loss: 0.109862, avg_loss: 0.235449 016875/063150, loss: 0.040288, avg_loss: 0.235409 016880/063150, loss: 0.031741, avg_loss: 0.235349 016885/063150, loss: 0.196666, avg_loss: 0.235325 016890/063150, loss: 0.017752, avg_loss: 0.235273 016895/063150, loss: 0.024362, avg_loss: 0.235235 016900/063150, loss: 0.166875, avg_loss: 0.235179 016905/063150, loss: 0.067633, avg_loss: 0.235126 016910/063150, loss: 0.055818, avg_loss: 0.235073 016915/063150, loss: 0.092095, avg_loss: 0.235043 016920/063150, loss: 0.011565, avg_loss: 0.235000 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 16920/63150: {'accuracy': 0.856651376146789} 016925/063150, loss: 0.188910, avg_loss: 0.234976 016930/063150, loss: 0.061645, avg_loss: 0.234923 016935/063150, loss: 0.108689, avg_loss: 0.234871 016940/063150, loss: 0.068740, avg_loss: 0.234831 016945/063150, loss: 0.047377, avg_loss: 0.234789 016950/063150, loss: 0.024611, avg_loss: 0.234745 016955/063150, loss: 0.062378, avg_loss: 0.234688 016960/063150, loss: 0.008458, avg_loss: 0.234633 016965/063150, loss: 0.218376, avg_loss: 0.234589 016970/063150, loss: 0.104255, avg_loss: 0.234544 016975/063150, loss: 0.094962, avg_loss: 0.234487 016980/063150, loss: 0.168265, avg_loss: 0.234442 016985/063150, loss: 0.087028, avg_loss: 0.234403 016990/063150, loss: 0.037372, avg_loss: 0.234347 016995/063150, loss: 0.363057, avg_loss: 0.234322 017000/063150, loss: 0.016488, avg_loss: 0.234272 017005/063150, loss: 0.141457, avg_loss: 0.234226 017010/063150, loss: 0.020359, avg_loss: 0.234170 017015/063150, loss: 0.129698, avg_loss: 0.234128 017020/063150, loss: 0.146840, avg_loss: 0.234079 017025/063150, loss: 0.054228, avg_loss: 0.234049 017030/063150, loss: 0.127275, avg_loss: 0.234005 017035/063150, loss: 0.219676, avg_loss: 0.233957 017040/063150, loss: 0.032730, avg_loss: 0.233926 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 17040/63150: {'accuracy': 0.856651376146789} 017045/063150, loss: 0.046308, avg_loss: 0.233872 017050/063150, loss: 0.005736, avg_loss: 0.233816 017055/063150, loss: 0.103439, avg_loss: 0.233773 017060/063150, loss: 0.208360, avg_loss: 0.233723 017065/063150, loss: 0.079755, avg_loss: 0.233669 017070/063150, loss: 0.093970, avg_loss: 0.233631 017075/063150, loss: 0.038068, avg_loss: 0.233581 017080/063150, loss: 0.205549, avg_loss: 0.233533 017085/063150, loss: 0.194604, avg_loss: 0.233495 017090/063150, loss: 0.052513, avg_loss: 0.233450 017095/063150, loss: 0.014904, avg_loss: 0.233404 017100/063150, loss: 0.058633, avg_loss: 0.233358 017105/063150, loss: 0.053776, avg_loss: 0.233308 017110/063150, loss: 0.113098, avg_loss: 0.233268 017115/063150, loss: 0.309650, avg_loss: 0.233234 017120/063150, loss: 0.038255, avg_loss: 0.233180 017125/063150, loss: 0.153401, avg_loss: 0.233134 017130/063150, loss: 0.007677, avg_loss: 0.233083 017135/063150, loss: 0.297544, avg_loss: 0.233046 017140/063150, loss: 0.044526, avg_loss: 0.232996 017145/063150, loss: 0.032363, avg_loss: 0.232953 017150/063150, loss: 0.048358, avg_loss: 0.232899 017155/063150, loss: 0.198430, avg_loss: 0.232874 017160/063150, loss: 0.055898, avg_loss: 0.232837 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 17160/63150: {'accuracy': 0.8302752293577982} 017165/063150, loss: 0.098937, avg_loss: 0.232797 017170/063150, loss: 0.105463, avg_loss: 0.232752 017175/063150, loss: 0.059305, avg_loss: 0.232705 017180/063150, loss: 0.036069, avg_loss: 0.232657 017185/063150, loss: 0.059806, avg_loss: 0.232617 017190/063150, loss: 0.027286, avg_loss: 0.232576 017195/063150, loss: 0.046667, avg_loss: 0.232530 017200/063150, loss: 0.167140, avg_loss: 0.232496 017205/063150, loss: 0.047795, avg_loss: 0.232446 017210/063150, loss: 0.020975, avg_loss: 0.232407 017215/063150, loss: 0.067810, avg_loss: 0.232371 017220/063150, loss: 0.131941, avg_loss: 0.232320 017225/063150, loss: 0.067531, avg_loss: 0.232289 017230/063150, loss: 0.054477, avg_loss: 0.232238 017235/063150, loss: 0.126009, avg_loss: 0.232204 017240/063150, loss: 0.237204, avg_loss: 0.232167 017245/063150, loss: 0.053867, avg_loss: 0.232128 017250/063150, loss: 0.108877, avg_loss: 0.232083 017255/063150, loss: 0.009721, avg_loss: 0.232044 017260/063150, loss: 0.186060, avg_loss: 0.232010 017265/063150, loss: 0.069663, avg_loss: 0.231968 017270/063150, loss: 0.029338, avg_loss: 0.231922 017275/063150, loss: 0.109713, avg_loss: 0.231878 017280/063150, loss: 0.102273, avg_loss: 0.231832 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 17280/63150: {'accuracy': 0.8612385321100917} 017285/063150, loss: 0.070566, avg_loss: 0.231784 017290/063150, loss: 0.059461, avg_loss: 0.231732 017295/063150, loss: 0.310723, avg_loss: 0.231708 017300/063150, loss: 0.063797, avg_loss: 0.231661 017305/063150, loss: 0.049737, avg_loss: 0.231621 017310/063150, loss: 0.029027, avg_loss: 0.231580 017315/063150, loss: 0.136663, avg_loss: 0.231537 017320/063150, loss: 0.049554, avg_loss: 0.231492 017325/063150, loss: 0.059589, avg_loss: 0.231442 017330/063150, loss: 0.189943, avg_loss: 0.231399 017335/063150, loss: 0.032731, avg_loss: 0.231348 017340/063150, loss: 0.034896, avg_loss: 0.231301 017345/063150, loss: 0.054217, avg_loss: 0.231246 017350/063150, loss: 0.140563, avg_loss: 0.231192 017355/063150, loss: 0.075075, avg_loss: 0.231144 017360/063150, loss: 0.012152, avg_loss: 0.231104 017365/063150, loss: 0.302546, avg_loss: 0.231067 017370/063150, loss: 0.133162, avg_loss: 0.231025 017375/063150, loss: 0.064575, avg_loss: 0.230971 017380/063150, loss: 0.019819, avg_loss: 0.230927 017385/063150, loss: 0.188444, avg_loss: 0.230892 017390/063150, loss: 0.005671, avg_loss: 0.230849 017395/063150, loss: 0.090578, avg_loss: 0.230796 017400/063150, loss: 0.043809, avg_loss: 0.230738 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 17400/63150: {'accuracy': 0.8463302752293578} 017405/063150, loss: 0.036402, avg_loss: 0.230681 017410/063150, loss: 0.039472, avg_loss: 0.230639 017415/063150, loss: 0.016200, avg_loss: 0.230588 017420/063150, loss: 0.011126, avg_loss: 0.230552 017425/063150, loss: 0.126715, avg_loss: 0.230504 017430/063150, loss: 0.021441, avg_loss: 0.230471 017435/063150, loss: 0.008443, avg_loss: 0.230416 017440/063150, loss: 0.052094, avg_loss: 0.230361 017445/063150, loss: 0.071290, avg_loss: 0.230327 017450/063150, loss: 0.091956, avg_loss: 0.230276 017455/063150, loss: 0.042454, avg_loss: 0.230239 017460/063150, loss: 0.088620, avg_loss: 0.230203 017465/063150, loss: 0.097100, avg_loss: 0.230161 017470/063150, loss: 0.011998, avg_loss: 0.230105 017475/063150, loss: 0.080687, avg_loss: 0.230060 017480/063150, loss: 0.060479, avg_loss: 0.230016 017485/063150, loss: 0.047041, avg_loss: 0.229981 017490/063150, loss: 0.030602, avg_loss: 0.229931 017495/063150, loss: 0.129102, avg_loss: 0.229879 017500/063150, loss: 0.153531, avg_loss: 0.229835 017505/063150, loss: 0.017624, avg_loss: 0.229789 017510/063150, loss: 0.008433, avg_loss: 0.229733 017515/063150, loss: 0.045323, avg_loss: 0.229688 017520/063150, loss: 0.094997, avg_loss: 0.229635 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 17520/63150: {'accuracy': 0.8589449541284404} 017525/063150, loss: 0.021621, avg_loss: 0.229577 017530/063150, loss: 0.046823, avg_loss: 0.229536 017535/063150, loss: 0.007758, avg_loss: 0.229485 017540/063150, loss: 0.014523, avg_loss: 0.229438 017545/063150, loss: 0.081847, avg_loss: 0.229401 017550/063150, loss: 0.010575, avg_loss: 0.229349 017555/063150, loss: 0.486461, avg_loss: 0.229325 017560/063150, loss: 0.125393, avg_loss: 0.229293 017565/063150, loss: 0.007944, avg_loss: 0.229237 017570/063150, loss: 0.088168, avg_loss: 0.229187 017575/063150, loss: 0.144809, avg_loss: 0.229134 017580/063150, loss: 0.031677, avg_loss: 0.229085 017585/063150, loss: 0.008890, avg_loss: 0.229036 017590/063150, loss: 0.204599, avg_loss: 0.228995 017595/063150, loss: 0.034883, avg_loss: 0.228966 017600/063150, loss: 0.011582, avg_loss: 0.228910 017605/063150, loss: 0.019860, avg_loss: 0.228869 017610/063150, loss: 0.005591, avg_loss: 0.228827 017615/063150, loss: 0.031591, avg_loss: 0.228786 017620/063150, loss: 0.154887, avg_loss: 0.228752 017625/063150, loss: 0.018517, avg_loss: 0.228698 017630/063150, loss: 0.035602, avg_loss: 0.228645 017635/063150, loss: 0.018666, avg_loss: 0.228612 017640/063150, loss: 0.066284, avg_loss: 0.228564 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 17640/63150: {'accuracy': 0.8486238532110092} 017645/063150, loss: 0.072173, avg_loss: 0.228513 017650/063150, loss: 0.007612, avg_loss: 0.228471 017655/063150, loss: 0.010855, avg_loss: 0.228434 017660/063150, loss: 0.033809, avg_loss: 0.228393 017665/063150, loss: 0.230516, avg_loss: 0.228355 017670/063150, loss: 0.035392, avg_loss: 0.228323 017675/063150, loss: 0.038668, avg_loss: 0.228275 017680/063150, loss: 0.006914, avg_loss: 0.228229 017685/063150, loss: 0.028876, avg_loss: 0.228181 017690/063150, loss: 0.035444, avg_loss: 0.228137 017695/063150, loss: 0.027767, avg_loss: 0.228103 017700/063150, loss: 0.075690, avg_loss: 0.228059 017705/063150, loss: 0.045387, avg_loss: 0.228012 017710/063150, loss: 0.007984, avg_loss: 0.227965 017715/063150, loss: 0.035655, avg_loss: 0.227934 017720/063150, loss: 0.017365, avg_loss: 0.227892 017725/063150, loss: 0.162145, avg_loss: 0.227848 017730/063150, loss: 0.043882, avg_loss: 0.227814 017735/063150, loss: 0.038666, avg_loss: 0.227781 017740/063150, loss: 0.208579, avg_loss: 0.227744 017745/063150, loss: 0.092012, avg_loss: 0.227696 017750/063150, loss: 0.047530, avg_loss: 0.227662 017755/063150, loss: 0.009785, avg_loss: 0.227615 017760/063150, loss: 0.028872, avg_loss: 0.227573 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 17760/63150: {'accuracy': 0.8658256880733946} 017765/063150, loss: 0.120344, avg_loss: 0.227555 017770/063150, loss: 0.095952, avg_loss: 0.227526 017775/063150, loss: 0.220576, avg_loss: 0.227491 017780/063150, loss: 0.082850, avg_loss: 0.227438 017785/063150, loss: 0.014846, avg_loss: 0.227390 017790/063150, loss: 0.043109, avg_loss: 0.227359 017795/063150, loss: 0.065601, avg_loss: 0.227312 017800/063150, loss: 0.048581, avg_loss: 0.227278 017805/063150, loss: 0.035231, avg_loss: 0.227233 017810/063150, loss: 0.079415, avg_loss: 0.227190 017815/063150, loss: 0.193466, avg_loss: 0.227159 017820/063150, loss: 0.083692, avg_loss: 0.227113 017825/063150, loss: 0.039092, avg_loss: 0.227070 017830/063150, loss: 0.054011, avg_loss: 0.227021 017835/063150, loss: 0.180928, avg_loss: 0.226995 017840/063150, loss: 0.045921, avg_loss: 0.226962 017845/063150, loss: 0.025807, avg_loss: 0.226913 017850/063150, loss: 0.132919, avg_loss: 0.226873 017855/063150, loss: 0.142249, avg_loss: 0.226846 017860/063150, loss: 0.061275, avg_loss: 0.226799 017865/063150, loss: 0.058661, avg_loss: 0.226755 017870/063150, loss: 0.094644, avg_loss: 0.226713 017875/063150, loss: 0.026756, avg_loss: 0.226658 017880/063150, loss: 0.095061, avg_loss: 0.226617 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 17880/63150: {'accuracy': 0.8635321100917431} 017885/063150, loss: 0.018860, avg_loss: 0.226578 017890/063150, loss: 0.031925, avg_loss: 0.226530 017895/063150, loss: 0.091192, avg_loss: 0.226489 017900/063150, loss: 0.036902, avg_loss: 0.226442 017905/063150, loss: 0.011141, avg_loss: 0.226394 017910/063150, loss: 0.062903, avg_loss: 0.226352 017915/063150, loss: 0.013839, avg_loss: 0.226317 017920/063150, loss: 0.043660, avg_loss: 0.226276 017925/063150, loss: 0.059538, avg_loss: 0.226231 017930/063150, loss: 0.065650, avg_loss: 0.226200 017935/063150, loss: 0.043581, avg_loss: 0.226153 017940/063150, loss: 0.156602, avg_loss: 0.226136 017945/063150, loss: 0.199880, avg_loss: 0.226112 017950/063150, loss: 0.005969, avg_loss: 0.226072 017955/063150, loss: 0.080872, avg_loss: 0.226043 017960/063150, loss: 0.023863, avg_loss: 0.226000 017965/063150, loss: 0.160618, avg_loss: 0.225959 017970/063150, loss: 0.054974, avg_loss: 0.225922 017975/063150, loss: 0.054022, avg_loss: 0.225883 017980/063150, loss: 0.053272, avg_loss: 0.225830 017985/063150, loss: 0.054394, avg_loss: 0.225781 017990/063150, loss: 0.059562, avg_loss: 0.225734 017995/063150, loss: 0.219956, avg_loss: 0.225699 018000/063150, loss: 0.016590, avg_loss: 0.225659 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 18000/63150: {'accuracy': 0.8532110091743119} 018005/063150, loss: 0.094877, avg_loss: 0.225623 018010/063150, loss: 0.303773, avg_loss: 0.225609 018015/063150, loss: 0.025608, avg_loss: 0.225554 018020/063150, loss: 0.096185, avg_loss: 0.225516 018025/063150, loss: 0.112359, avg_loss: 0.225472 018030/063150, loss: 0.065774, avg_loss: 0.225431 018035/063150, loss: 0.012045, avg_loss: 0.225398 018040/063150, loss: 0.014176, avg_loss: 0.225351 018045/063150, loss: 0.127408, avg_loss: 0.225311 018050/063150, loss: 0.176268, avg_loss: 0.225264 018055/063150, loss: 0.031399, avg_loss: 0.225236 018060/063150, loss: 0.058895, avg_loss: 0.225218 018065/063150, loss: 0.016002, avg_loss: 0.225165 018070/063150, loss: 0.134147, avg_loss: 0.225126 018075/063150, loss: 0.077181, avg_loss: 0.225091 018080/063150, loss: 0.031730, avg_loss: 0.225055 018085/063150, loss: 0.040803, avg_loss: 0.225009 018090/063150, loss: 0.054315, avg_loss: 0.224966 018095/063150, loss: 0.255076, avg_loss: 0.224930 018100/063150, loss: 0.006734, avg_loss: 0.224881 018105/063150, loss: 0.067851, avg_loss: 0.224855 018110/063150, loss: 0.039820, avg_loss: 0.224828 018115/063150, loss: 0.064140, avg_loss: 0.224776 018120/063150, loss: 0.090054, avg_loss: 0.224735 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 18120/63150: {'accuracy': 0.8612385321100917} 018125/063150, loss: 0.068486, avg_loss: 0.224705 018130/063150, loss: 0.012807, avg_loss: 0.224665 018135/063150, loss: 0.061182, avg_loss: 0.224617 018140/063150, loss: 0.036541, avg_loss: 0.224580 018145/063150, loss: 0.085190, avg_loss: 0.224542 018150/063150, loss: 0.145590, avg_loss: 0.224501 018155/063150, loss: 0.006795, avg_loss: 0.224463 018160/063150, loss: 0.066192, avg_loss: 0.224426 018165/063150, loss: 0.042268, avg_loss: 0.224377 018170/063150, loss: 0.025564, avg_loss: 0.224339 018175/063150, loss: 0.013970, avg_loss: 0.224312 018180/063150, loss: 0.011923, avg_loss: 0.224264 018185/063150, loss: 0.307300, avg_loss: 0.224248 018190/063150, loss: 0.141313, avg_loss: 0.224202 018195/063150, loss: 0.255624, avg_loss: 0.224181 018200/063150, loss: 0.021737, avg_loss: 0.224145 018205/063150, loss: 0.090034, avg_loss: 0.224106 018210/063150, loss: 0.066458, avg_loss: 0.224071 018215/063150, loss: 0.210535, avg_loss: 0.224062 018220/063150, loss: 0.100298, avg_loss: 0.224019 018225/063150, loss: 0.099111, avg_loss: 0.223969 018230/063150, loss: 0.095595, avg_loss: 0.223924 018235/063150, loss: 0.115430, avg_loss: 0.223886 018240/063150, loss: 0.014914, avg_loss: 0.223839 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 18240/63150: {'accuracy': 0.8555045871559633} 018245/063150, loss: 0.176087, avg_loss: 0.223801 018250/063150, loss: 0.042016, avg_loss: 0.223759 018255/063150, loss: 0.169340, avg_loss: 0.223730 018260/063150, loss: 0.024305, avg_loss: 0.223688 018265/063150, loss: 0.114911, avg_loss: 0.223668 018270/063150, loss: 0.007482, avg_loss: 0.223633 018275/063150, loss: 0.082665, avg_loss: 0.223592 018280/063150, loss: 0.092684, avg_loss: 0.223549 018285/063150, loss: 0.008454, avg_loss: 0.223503 018290/063150, loss: 0.025961, avg_loss: 0.223453 018295/063150, loss: 0.022461, avg_loss: 0.223412 018300/063150, loss: 0.011742, avg_loss: 0.223362 018305/063150, loss: 0.131183, avg_loss: 0.223323 018310/063150, loss: 0.035741, avg_loss: 0.223271 018315/063150, loss: 0.210260, avg_loss: 0.223246 018320/063150, loss: 0.006041, avg_loss: 0.223198 018325/063150, loss: 0.073307, avg_loss: 0.223184 018330/063150, loss: 0.036502, avg_loss: 0.223140 018335/063150, loss: 0.113309, avg_loss: 0.223096 018340/063150, loss: 0.011259, avg_loss: 0.223050 018345/063150, loss: 0.299175, avg_loss: 0.223023 018350/063150, loss: 0.049813, avg_loss: 0.222990 018355/063150, loss: 0.061634, avg_loss: 0.222946 018360/063150, loss: 0.018357, avg_loss: 0.222907 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 18360/63150: {'accuracy': 0.8428899082568807} 018365/063150, loss: 0.154111, avg_loss: 0.222865 018370/063150, loss: 0.055873, avg_loss: 0.222826 018375/063150, loss: 0.009445, avg_loss: 0.222801 018380/063150, loss: 0.032229, avg_loss: 0.222757 018385/063150, loss: 0.010724, avg_loss: 0.222730 018390/063150, loss: 0.153864, avg_loss: 0.222697 018395/063150, loss: 0.191097, avg_loss: 0.222659 018400/063150, loss: 0.117784, avg_loss: 0.222617 018405/063150, loss: 0.066621, avg_loss: 0.222572 018410/063150, loss: 0.077809, avg_loss: 0.222530 018415/063150, loss: 0.013450, avg_loss: 0.222499 018420/063150, loss: 0.042740, avg_loss: 0.222466 018425/063150, loss: 0.120426, avg_loss: 0.222430 018430/063150, loss: 0.010818, avg_loss: 0.222385 018435/063150, loss: 0.008703, avg_loss: 0.222345 018440/063150, loss: 0.009350, avg_loss: 0.222306 018445/063150, loss: 0.028555, avg_loss: 0.222266 018450/063150, loss: 0.027964, avg_loss: 0.222226 018455/063150, loss: 0.143402, avg_loss: 0.222184 018460/063150, loss: 0.127254, avg_loss: 0.222143 018465/063150, loss: 0.015297, avg_loss: 0.222117 018470/063150, loss: 0.047723, avg_loss: 0.222071 018475/063150, loss: 0.261815, avg_loss: 0.222031 018480/063150, loss: 0.120404, avg_loss: 0.221988 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 18480/63150: {'accuracy': 0.8646788990825688} 018485/063150, loss: 0.064984, avg_loss: 0.221949 018490/063150, loss: 0.037511, avg_loss: 0.221905 018495/063150, loss: 0.127570, avg_loss: 0.221880 018500/063150, loss: 0.195143, avg_loss: 0.221853 018505/063150, loss: 0.037831, avg_loss: 0.221837 018510/063150, loss: 0.088682, avg_loss: 0.221794 018515/063150, loss: 0.053058, avg_loss: 0.221757 018520/063150, loss: 0.080938, avg_loss: 0.221723 018525/063150, loss: 0.095276, avg_loss: 0.221691 018530/063150, loss: 0.039497, avg_loss: 0.221661 018535/063150, loss: 0.044286, avg_loss: 0.221622 018540/063150, loss: 0.319078, avg_loss: 0.221604 018545/063150, loss: 0.054725, avg_loss: 0.221559 018550/063150, loss: 0.049340, avg_loss: 0.221528 018555/063150, loss: 0.080291, avg_loss: 0.221502 018560/063150, loss: 0.033925, avg_loss: 0.221470 018565/063150, loss: 0.058828, avg_loss: 0.221427 018570/063150, loss: 0.101471, avg_loss: 0.221403 018575/063150, loss: 0.008715, avg_loss: 0.221360 018580/063150, loss: 0.139146, avg_loss: 0.221325 018585/063150, loss: 0.037854, avg_loss: 0.221296 018590/063150, loss: 0.170593, avg_loss: 0.221258 018595/063150, loss: 0.182651, avg_loss: 0.221229 018600/063150, loss: 0.024977, avg_loss: 0.221188 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 18600/63150: {'accuracy': 0.8623853211009175} 018605/063150, loss: 0.171517, avg_loss: 0.221166 018610/063150, loss: 0.054443, avg_loss: 0.221118 018615/063150, loss: 0.126820, avg_loss: 0.221078 018620/063150, loss: 0.187955, avg_loss: 0.221049 018625/063150, loss: 0.014203, avg_loss: 0.221002 018630/063150, loss: 0.015251, avg_loss: 0.220961 018635/063150, loss: 0.058205, avg_loss: 0.220927 018640/063150, loss: 0.052869, avg_loss: 0.220874 018645/063150, loss: 0.008448, avg_loss: 0.220838 018650/063150, loss: 0.008589, avg_loss: 0.220803 018655/063150, loss: 0.007442, avg_loss: 0.220764 018660/063150, loss: 0.068382, avg_loss: 0.220717 018665/063150, loss: 0.037799, avg_loss: 0.220668 018670/063150, loss: 0.089896, avg_loss: 0.220645 018675/063150, loss: 0.008053, avg_loss: 0.220610 018680/063150, loss: 0.032760, avg_loss: 0.220566 018685/063150, loss: 0.055638, avg_loss: 0.220526 018690/063150, loss: 0.072445, avg_loss: 0.220484 018695/063150, loss: 0.100890, avg_loss: 0.220441 018700/063150, loss: 0.041633, avg_loss: 0.220397 018705/063150, loss: 0.208999, avg_loss: 0.220359 018710/063150, loss: 0.119963, avg_loss: 0.220335 018715/063150, loss: 0.035308, avg_loss: 0.220287 018720/063150, loss: 0.405673, avg_loss: 0.220259 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 18720/63150: {'accuracy': 0.8623853211009175} 018725/063150, loss: 0.145305, avg_loss: 0.220219 018730/063150, loss: 0.175643, avg_loss: 0.220203 018735/063150, loss: 0.090242, avg_loss: 0.220168 018740/063150, loss: 0.113953, avg_loss: 0.220123 018745/063150, loss: 0.057388, avg_loss: 0.220088 018750/063150, loss: 0.026575, avg_loss: 0.220064 018755/063150, loss: 0.060014, avg_loss: 0.220026 018760/063150, loss: 0.075421, avg_loss: 0.220006 018765/063150, loss: 0.196025, avg_loss: 0.219966 018770/063150, loss: 0.102817, avg_loss: 0.219939 018775/063150, loss: 0.107872, avg_loss: 0.219905 018780/063150, loss: 0.066504, avg_loss: 0.219869 018785/063150, loss: 0.092743, avg_loss: 0.219837 018790/063150, loss: 0.025489, avg_loss: 0.219807 018795/063150, loss: 0.090109, avg_loss: 0.219769 018800/063150, loss: 0.176351, avg_loss: 0.219730 018805/063150, loss: 0.089408, avg_loss: 0.219688 018810/063150, loss: 0.126943, avg_loss: 0.219658 018815/063150, loss: 0.047076, avg_loss: 0.219629 018820/063150, loss: 0.024257, avg_loss: 0.219586 018825/063150, loss: 0.068136, avg_loss: 0.219548 018830/063150, loss: 0.065755, avg_loss: 0.219520 018835/063150, loss: 0.373434, avg_loss: 0.219501 018840/063150, loss: 0.147209, avg_loss: 0.219467 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 8, step 18840/63150: {'accuracy': 0.856651376146789} 018845/063150, loss: 0.097032, avg_loss: 0.219435 018850/063150, loss: 0.028307, avg_loss: 0.219398 018855/063150, loss: 0.438921, avg_loss: 0.219367 018860/063150, loss: 0.058575, avg_loss: 0.219325 018865/063150, loss: 0.092320, avg_loss: 0.219296 018870/063150, loss: 0.156517, avg_loss: 0.219253 018875/063150, loss: 0.028116, avg_loss: 0.219208 018880/063150, loss: 0.043536, avg_loss: 0.219169 018885/063150, loss: 0.007797, avg_loss: 0.219129 018890/063150, loss: 0.042524, avg_loss: 0.219101 018895/063150, loss: 0.005262, avg_loss: 0.219057 018900/063150, loss: 0.030635, avg_loss: 0.219031 018905/063150, loss: 0.032573, avg_loss: 0.218990 018910/063150, loss: 0.089701, avg_loss: 0.218956 018915/063150, loss: 0.117259, avg_loss: 0.218924 018920/063150, loss: 0.098910, avg_loss: 0.218897 018925/063150, loss: 0.090920, avg_loss: 0.218860 018930/063150, loss: 0.072013, avg_loss: 0.218851 018935/063150, loss: 0.103394, avg_loss: 0.218804 018940/063150, loss: 0.068606, avg_loss: 0.218769 018945/063150, loss: 0.140656, avg_loss: 0.218736 018950/063150, loss: 0.095962, avg_loss: 0.218701 018955/063150, loss: 0.042470, avg_loss: 0.218681 018960/063150, loss: 0.016058, avg_loss: 0.218641 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 18960/63150: {'accuracy': 0.8612385321100917} 018965/063150, loss: 0.180997, avg_loss: 0.218616 018970/063150, loss: 0.110749, avg_loss: 0.218572 018975/063150, loss: 0.016178, avg_loss: 0.218532 018980/063150, loss: 0.162525, avg_loss: 0.218493 018985/063150, loss: 0.011016, avg_loss: 0.218444 018990/063150, loss: 0.007060, avg_loss: 0.218395 018995/063150, loss: 0.011415, avg_loss: 0.218342 019000/063150, loss: 0.105103, avg_loss: 0.218314 019005/063150, loss: 0.052087, avg_loss: 0.218278 019010/063150, loss: 0.035837, avg_loss: 0.218250 019015/063150, loss: 0.047668, avg_loss: 0.218204 019020/063150, loss: 0.033483, avg_loss: 0.218155 019025/063150, loss: 0.052653, avg_loss: 0.218125 019030/063150, loss: 0.202084, avg_loss: 0.218093 019035/063150, loss: 0.086567, avg_loss: 0.218059 019040/063150, loss: 0.008701, avg_loss: 0.218011 019045/063150, loss: 0.014826, avg_loss: 0.217963 019050/063150, loss: 0.066158, avg_loss: 0.217918 019055/063150, loss: 0.016057, avg_loss: 0.217870 019060/063150, loss: 0.011187, avg_loss: 0.217833 019065/063150, loss: 0.020678, avg_loss: 0.217785 019070/063150, loss: 0.061636, avg_loss: 0.217740 019075/063150, loss: 0.053741, avg_loss: 0.217698 019080/063150, loss: 0.246716, avg_loss: 0.217664 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 19080/63150: {'accuracy': 0.8635321100917431} 019085/063150, loss: 0.010740, avg_loss: 0.217613 019090/063150, loss: 0.005055, avg_loss: 0.217567 019095/063150, loss: 0.221914, avg_loss: 0.217527 019100/063150, loss: 0.003849, avg_loss: 0.217489 019105/063150, loss: 0.033573, avg_loss: 0.217461 019110/063150, loss: 0.185240, avg_loss: 0.217420 019115/063150, loss: 0.170535, avg_loss: 0.217393 019120/063150, loss: 0.009032, avg_loss: 0.217352 019125/063150, loss: 0.050983, avg_loss: 0.217316 019130/063150, loss: 0.097995, avg_loss: 0.217286 019135/063150, loss: 0.090675, avg_loss: 0.217243 019140/063150, loss: 0.074113, avg_loss: 0.217200 019145/063150, loss: 0.068241, avg_loss: 0.217154 019150/063150, loss: 0.024249, avg_loss: 0.217109 019155/063150, loss: 0.007722, avg_loss: 0.217062 019160/063150, loss: 0.104711, avg_loss: 0.217027 019165/063150, loss: 0.088703, avg_loss: 0.216993 019170/063150, loss: 0.208635, avg_loss: 0.216966 019175/063150, loss: 0.015174, avg_loss: 0.216932 019180/063150, loss: 0.135024, avg_loss: 0.216895 019185/063150, loss: 0.174911, avg_loss: 0.216869 019190/063150, loss: 0.021792, avg_loss: 0.216825 019195/063150, loss: 0.218855, avg_loss: 0.216787 019200/063150, loss: 0.007629, avg_loss: 0.216750 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 19200/63150: {'accuracy': 0.8543577981651376} 019205/063150, loss: 0.024247, avg_loss: 0.216708 019210/063150, loss: 0.057514, avg_loss: 0.216661 019215/063150, loss: 0.030202, avg_loss: 0.216615 019220/063150, loss: 0.062161, avg_loss: 0.216570 019225/063150, loss: 0.087198, avg_loss: 0.216525 019230/063150, loss: 0.087506, avg_loss: 0.216484 019235/063150, loss: 0.013654, avg_loss: 0.216455 019240/063150, loss: 0.066586, avg_loss: 0.216415 019245/063150, loss: 0.022497, avg_loss: 0.216372 019250/063150, loss: 0.025786, avg_loss: 0.216336 019255/063150, loss: 0.047644, avg_loss: 0.216296 019260/063150, loss: 0.124519, avg_loss: 0.216266 019265/063150, loss: 0.048460, avg_loss: 0.216222 019270/063150, loss: 0.328344, avg_loss: 0.216192 019275/063150, loss: 0.051528, avg_loss: 0.216155 019280/063150, loss: 0.003120, avg_loss: 0.216120 019285/063150, loss: 0.351587, avg_loss: 0.216102 019290/063150, loss: 0.138053, avg_loss: 0.216065 019295/063150, loss: 0.107496, avg_loss: 0.216027 019300/063150, loss: 0.206535, avg_loss: 0.215999 019305/063150, loss: 0.099606, avg_loss: 0.215956 019310/063150, loss: 0.086262, avg_loss: 0.215918 019315/063150, loss: 0.119667, avg_loss: 0.215889 019320/063150, loss: 0.108768, avg_loss: 0.215846 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 19320/63150: {'accuracy': 0.8577981651376146} 019325/063150, loss: 0.051342, avg_loss: 0.215809 019330/063150, loss: 0.357806, avg_loss: 0.215789 019335/063150, loss: 0.043653, avg_loss: 0.215752 019340/063150, loss: 0.011829, avg_loss: 0.215710 019345/063150, loss: 0.102938, avg_loss: 0.215668 019350/063150, loss: 0.019636, avg_loss: 0.215623 019355/063150, loss: 0.020850, avg_loss: 0.215588 019360/063150, loss: 0.119910, avg_loss: 0.215551 019365/063150, loss: 0.030863, avg_loss: 0.215542 019370/063150, loss: 0.068839, avg_loss: 0.215506 019375/063150, loss: 0.047784, avg_loss: 0.215471 019380/063150, loss: 0.121680, avg_loss: 0.215440 019385/063150, loss: 0.049925, avg_loss: 0.215397 019390/063150, loss: 0.027935, avg_loss: 0.215355 019395/063150, loss: 0.055413, avg_loss: 0.215313 019400/063150, loss: 0.189959, avg_loss: 0.215276 019405/063150, loss: 0.011148, avg_loss: 0.215234 019410/063150, loss: 0.024897, avg_loss: 0.215202 019415/063150, loss: 0.161681, avg_loss: 0.215162 019420/063150, loss: 0.025722, avg_loss: 0.215123 019425/063150, loss: 0.030716, avg_loss: 0.215076 019430/063150, loss: 0.018566, avg_loss: 0.215054 019435/063150, loss: 0.056378, avg_loss: 0.215017 019440/063150, loss: 0.025367, avg_loss: 0.214975 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 19440/63150: {'accuracy': 0.8658256880733946} 019445/063150, loss: 0.015941, avg_loss: 0.214932 019450/063150, loss: 0.008002, avg_loss: 0.214890 019455/063150, loss: 0.012858, avg_loss: 0.214841 019460/063150, loss: 0.074739, avg_loss: 0.214802 019465/063150, loss: 0.066991, avg_loss: 0.214766 019470/063150, loss: 0.028028, avg_loss: 0.214731 019475/063150, loss: 0.017374, avg_loss: 0.214690 019480/063150, loss: 0.029072, avg_loss: 0.214646 019485/063150, loss: 0.054066, avg_loss: 0.214615 019490/063150, loss: 0.050698, avg_loss: 0.214578 019495/063150, loss: 0.096692, avg_loss: 0.214542 019500/063150, loss: 0.013739, avg_loss: 0.214505 019505/063150, loss: 0.010987, avg_loss: 0.214467 019510/063150, loss: 0.151400, avg_loss: 0.214425 019515/063150, loss: 0.057381, avg_loss: 0.214386 019520/063150, loss: 0.086120, avg_loss: 0.214345 019525/063150, loss: 0.157143, avg_loss: 0.214314 019530/063150, loss: 0.029578, avg_loss: 0.214284 019535/063150, loss: 0.033633, avg_loss: 0.214238 019540/063150, loss: 0.051959, avg_loss: 0.214208 019545/063150, loss: 0.051819, avg_loss: 0.214168 019550/063150, loss: 0.040001, avg_loss: 0.214126 019555/063150, loss: 0.063705, avg_loss: 0.214082 019560/063150, loss: 0.037794, avg_loss: 0.214037 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 19560/63150: {'accuracy': 0.856651376146789} 019565/063150, loss: 0.298034, avg_loss: 0.214018 019570/063150, loss: 0.064875, avg_loss: 0.213976 019575/063150, loss: 0.156289, avg_loss: 0.213951 019580/063150, loss: 0.030443, avg_loss: 0.213930 019585/063150, loss: 0.072835, avg_loss: 0.213892 019590/063150, loss: 0.129566, avg_loss: 0.213870 019595/063150, loss: 0.042120, avg_loss: 0.213826 019600/063150, loss: 0.047562, avg_loss: 0.213785 019605/063150, loss: 0.056296, avg_loss: 0.213743 019610/063150, loss: 0.250150, avg_loss: 0.213708 019615/063150, loss: 0.024024, avg_loss: 0.213672 019620/063150, loss: 0.056505, avg_loss: 0.213632 019625/063150, loss: 0.060204, avg_loss: 0.213593 019630/063150, loss: 0.024938, avg_loss: 0.213567 019635/063150, loss: 0.037644, avg_loss: 0.213521 019640/063150, loss: 0.007446, avg_loss: 0.213477 019645/063150, loss: 0.115919, avg_loss: 0.213452 019650/063150, loss: 0.072759, avg_loss: 0.213418 019655/063150, loss: 0.066302, avg_loss: 0.213387 019660/063150, loss: 0.182726, avg_loss: 0.213360 019665/063150, loss: 0.091197, avg_loss: 0.213334 019670/063150, loss: 0.115154, avg_loss: 0.213298 019675/063150, loss: 0.046497, avg_loss: 0.213262 019680/063150, loss: 0.030155, avg_loss: 0.213214 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 19680/63150: {'accuracy': 0.8658256880733946} 019685/063150, loss: 0.183271, avg_loss: 0.213187 019690/063150, loss: 0.102782, avg_loss: 0.213152 019695/063150, loss: 0.026408, avg_loss: 0.213119 019700/063150, loss: 0.010477, avg_loss: 0.213079 019705/063150, loss: 0.253706, avg_loss: 0.213050 019710/063150, loss: 0.129237, avg_loss: 0.213017 019715/063150, loss: 0.235604, avg_loss: 0.212992 019720/063150, loss: 0.040719, avg_loss: 0.212958 019725/063150, loss: 0.035795, avg_loss: 0.212912 019730/063150, loss: 0.265109, avg_loss: 0.212880 019735/063150, loss: 0.231487, avg_loss: 0.212850 019740/063150, loss: 0.042533, avg_loss: 0.212821 019745/063150, loss: 0.121317, avg_loss: 0.212788 019750/063150, loss: 0.087611, avg_loss: 0.212752 019755/063150, loss: 0.041269, avg_loss: 0.212721 019760/063150, loss: 0.091146, avg_loss: 0.212687 019765/063150, loss: 0.013834, avg_loss: 0.212648 019770/063150, loss: 0.072278, avg_loss: 0.212613 019775/063150, loss: 0.042760, avg_loss: 0.212573 019780/063150, loss: 0.041594, avg_loss: 0.212535 019785/063150, loss: 0.054135, avg_loss: 0.212491 019790/063150, loss: 0.141927, avg_loss: 0.212452 019795/063150, loss: 0.142629, avg_loss: 0.212429 019800/063150, loss: 0.073915, avg_loss: 0.212394 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 19800/63150: {'accuracy': 0.8635321100917431} 019805/063150, loss: 0.021128, avg_loss: 0.212349 019810/063150, loss: 0.021840, avg_loss: 0.212310 019815/063150, loss: 0.017284, avg_loss: 0.212263 019820/063150, loss: 0.325577, avg_loss: 0.212247 019825/063150, loss: 0.037908, avg_loss: 0.212229 019830/063150, loss: 0.146878, avg_loss: 0.212198 019835/063150, loss: 0.114555, avg_loss: 0.212163 019840/063150, loss: 0.032154, avg_loss: 0.212123 019845/063150, loss: 0.108952, avg_loss: 0.212094 019850/063150, loss: 0.133731, avg_loss: 0.212076 019855/063150, loss: 0.092918, avg_loss: 0.212046 019860/063150, loss: 0.140383, avg_loss: 0.212013 019865/063150, loss: 0.045069, avg_loss: 0.211972 019870/063150, loss: 0.058704, avg_loss: 0.211937 019875/063150, loss: 0.140951, avg_loss: 0.211909 019880/063150, loss: 0.054925, avg_loss: 0.211866 019885/063150, loss: 0.076593, avg_loss: 0.211827 019890/063150, loss: 0.056155, avg_loss: 0.211784 019895/063150, loss: 0.155074, avg_loss: 0.211750 019900/063150, loss: 0.005892, avg_loss: 0.211707 019905/063150, loss: 0.017230, avg_loss: 0.211679 019910/063150, loss: 0.072572, avg_loss: 0.211651 019915/063150, loss: 0.028444, avg_loss: 0.211618 019920/063150, loss: 0.189735, avg_loss: 0.211608 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 19920/63150: {'accuracy': 0.8646788990825688} 019925/063150, loss: 0.160397, avg_loss: 0.211577 019930/063150, loss: 0.043215, avg_loss: 0.211542 019935/063150, loss: 0.234430, avg_loss: 0.211510 019940/063150, loss: 0.007714, avg_loss: 0.211476 019945/063150, loss: 0.015270, avg_loss: 0.211437 019950/063150, loss: 0.031907, avg_loss: 0.211408 019955/063150, loss: 0.031654, avg_loss: 0.211374 019960/063150, loss: 0.019460, avg_loss: 0.211337 019965/063150, loss: 0.044637, avg_loss: 0.211300 019970/063150, loss: 0.047601, avg_loss: 0.211265 019975/063150, loss: 0.050534, avg_loss: 0.211229 019980/063150, loss: 0.084906, avg_loss: 0.211192 019985/063150, loss: 0.112903, avg_loss: 0.211162 019990/063150, loss: 0.109337, avg_loss: 0.211129 019995/063150, loss: 0.056791, avg_loss: 0.211088 020000/063150, loss: 0.097843, avg_loss: 0.211049 020005/063150, loss: 0.014693, avg_loss: 0.211007 020010/063150, loss: 0.125128, avg_loss: 0.210978 020015/063150, loss: 0.054642, avg_loss: 0.210954 020020/063150, loss: 0.065417, avg_loss: 0.210917 020025/063150, loss: 0.152131, avg_loss: 0.210880 020030/063150, loss: 0.009045, avg_loss: 0.210848 020035/063150, loss: 0.036245, avg_loss: 0.210813 020040/063150, loss: 0.159550, avg_loss: 0.210781 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 20040/63150: {'accuracy': 0.8681192660550459} 020045/063150, loss: 0.007589, avg_loss: 0.210743 020050/063150, loss: 0.033831, avg_loss: 0.210703 020055/063150, loss: 0.031441, avg_loss: 0.210664 020060/063150, loss: 0.124152, avg_loss: 0.210644 020065/063150, loss: 0.135039, avg_loss: 0.210616 020070/063150, loss: 0.014693, avg_loss: 0.210569 020075/063150, loss: 0.042115, avg_loss: 0.210535 020080/063150, loss: 0.033478, avg_loss: 0.210489 020085/063150, loss: 0.070043, avg_loss: 0.210453 020090/063150, loss: 0.016736, avg_loss: 0.210411 020095/063150, loss: 0.030043, avg_loss: 0.210380 020100/063150, loss: 0.199432, avg_loss: 0.210349 020105/063150, loss: 0.046917, avg_loss: 0.210305 020110/063150, loss: 0.063614, avg_loss: 0.210278 020115/063150, loss: 0.005540, avg_loss: 0.210244 020120/063150, loss: 0.022867, avg_loss: 0.210204 020125/063150, loss: 0.036301, avg_loss: 0.210160 020130/063150, loss: 0.066623, avg_loss: 0.210116 020135/063150, loss: 0.023106, avg_loss: 0.210086 020140/063150, loss: 0.005517, avg_loss: 0.210042 020145/063150, loss: 0.065499, avg_loss: 0.210011 020150/063150, loss: 0.058279, avg_loss: 0.209995 020155/063150, loss: 0.040224, avg_loss: 0.209956 020160/063150, loss: 0.047493, avg_loss: 0.209920 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 20160/63150: {'accuracy': 0.8612385321100917} 020165/063150, loss: 0.031267, avg_loss: 0.209879 020170/063150, loss: 0.040430, avg_loss: 0.209841 020175/063150, loss: 0.076775, avg_loss: 0.209808 020180/063150, loss: 0.024195, avg_loss: 0.209793 020185/063150, loss: 0.066413, avg_loss: 0.209762 020190/063150, loss: 0.022350, avg_loss: 0.209718 020195/063150, loss: 0.021636, avg_loss: 0.209673 020200/063150, loss: 0.115731, avg_loss: 0.209639 020205/063150, loss: 0.048761, avg_loss: 0.209599 020210/063150, loss: 0.010425, avg_loss: 0.209560 020215/063150, loss: 0.040928, avg_loss: 0.209522 020220/063150, loss: 0.052633, avg_loss: 0.209495 020225/063150, loss: 0.081250, avg_loss: 0.209465 020230/063150, loss: 0.338672, avg_loss: 0.209455 020235/063150, loss: 0.078776, avg_loss: 0.209433 020240/063150, loss: 0.045523, avg_loss: 0.209403 020245/063150, loss: 0.027094, avg_loss: 0.209374 020250/063150, loss: 0.126051, avg_loss: 0.209339 020255/063150, loss: 0.062450, avg_loss: 0.209298 020260/063150, loss: 0.080874, avg_loss: 0.209260 020265/063150, loss: 0.103944, avg_loss: 0.209225 020270/063150, loss: 0.066819, avg_loss: 0.209198 020275/063150, loss: 0.009557, avg_loss: 0.209163 020280/063150, loss: 0.097118, avg_loss: 0.209123 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 20280/63150: {'accuracy': 0.8692660550458715} 020285/063150, loss: 0.027364, avg_loss: 0.209079 020290/063150, loss: 0.127520, avg_loss: 0.209040 020295/063150, loss: 0.040172, avg_loss: 0.209002 020300/063150, loss: 0.122501, avg_loss: 0.208981 020305/063150, loss: 0.031599, avg_loss: 0.208946 020310/063150, loss: 0.029067, avg_loss: 0.208909 020315/063150, loss: 0.012607, avg_loss: 0.208877 020320/063150, loss: 0.011562, avg_loss: 0.208835 020325/063150, loss: 0.004901, avg_loss: 0.208791 020330/063150, loss: 0.064952, avg_loss: 0.208749 020335/063150, loss: 0.081793, avg_loss: 0.208721 020340/063150, loss: 0.016810, avg_loss: 0.208708 020345/063150, loss: 0.162338, avg_loss: 0.208679 020350/063150, loss: 0.003809, avg_loss: 0.208639 020355/063150, loss: 0.049692, avg_loss: 0.208627 020360/063150, loss: 0.086006, avg_loss: 0.208597 020365/063150, loss: 0.141004, avg_loss: 0.208573 020370/063150, loss: 0.091642, avg_loss: 0.208553 020375/063150, loss: 0.030129, avg_loss: 0.208521 020380/063150, loss: 0.035328, avg_loss: 0.208496 020385/063150, loss: 0.040126, avg_loss: 0.208461 020390/063150, loss: 0.024271, avg_loss: 0.208452 020395/063150, loss: 0.073741, avg_loss: 0.208416 020400/063150, loss: 0.074133, avg_loss: 0.208379 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 20400/63150: {'accuracy': 0.8589449541284404} 020405/063150, loss: 0.110410, avg_loss: 0.208342 020410/063150, loss: 0.030092, avg_loss: 0.208310 020415/063150, loss: 0.106829, avg_loss: 0.208281 020420/063150, loss: 0.217506, avg_loss: 0.208253 020425/063150, loss: 0.018779, avg_loss: 0.208220 020430/063150, loss: 0.042773, avg_loss: 0.208185 020435/063150, loss: 0.120134, avg_loss: 0.208146 020440/063150, loss: 0.054610, avg_loss: 0.208113 020445/063150, loss: 0.072023, avg_loss: 0.208093 020450/063150, loss: 0.157310, avg_loss: 0.208068 020455/063150, loss: 0.012583, avg_loss: 0.208043 020460/063150, loss: 0.153856, avg_loss: 0.208014 020465/063150, loss: 0.015902, avg_loss: 0.207971 020470/063150, loss: 0.044655, avg_loss: 0.207950 020475/063150, loss: 0.040242, avg_loss: 0.207916 020480/063150, loss: 0.025547, avg_loss: 0.207883 020485/063150, loss: 0.118910, avg_loss: 0.207844 020490/063150, loss: 0.024883, avg_loss: 0.207809 020495/063150, loss: 0.083840, avg_loss: 0.207779 020500/063150, loss: 0.041461, avg_loss: 0.207740 020505/063150, loss: 0.070068, avg_loss: 0.207715 020510/063150, loss: 0.018793, avg_loss: 0.207681 020515/063150, loss: 0.037700, avg_loss: 0.207642 020520/063150, loss: 0.029463, avg_loss: 0.207601 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 20520/63150: {'accuracy': 0.8635321100917431} 020525/063150, loss: 0.030204, avg_loss: 0.207559 020530/063150, loss: 0.166353, avg_loss: 0.207527 020535/063150, loss: 0.110615, avg_loss: 0.207491 020540/063150, loss: 0.126063, avg_loss: 0.207449 020545/063150, loss: 0.040499, avg_loss: 0.207408 020550/063150, loss: 0.028593, avg_loss: 0.207372 020555/063150, loss: 0.117682, avg_loss: 0.207335 020560/063150, loss: 0.006620, avg_loss: 0.207307 020565/063150, loss: 0.013388, avg_loss: 0.207292 020570/063150, loss: 0.018150, avg_loss: 0.207269 020575/063150, loss: 0.231136, avg_loss: 0.207248 020580/063150, loss: 0.114148, avg_loss: 0.207226 020585/063150, loss: 0.061861, avg_loss: 0.207199 020590/063150, loss: 0.418328, avg_loss: 0.207176 020595/063150, loss: 0.213805, avg_loss: 0.207148 020600/063150, loss: 0.047238, avg_loss: 0.207116 020605/063150, loss: 0.006294, avg_loss: 0.207074 020610/063150, loss: 0.105144, avg_loss: 0.207043 020615/063150, loss: 0.024012, avg_loss: 0.207007 020620/063150, loss: 0.160236, avg_loss: 0.206989 020625/063150, loss: 0.066625, avg_loss: 0.206961 020630/063150, loss: 0.039587, avg_loss: 0.206926 020635/063150, loss: 0.022547, avg_loss: 0.206884 020640/063150, loss: 0.135983, avg_loss: 0.206853 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 20640/63150: {'accuracy': 0.8555045871559633} 020645/063150, loss: 0.028459, avg_loss: 0.206816 020650/063150, loss: 0.067286, avg_loss: 0.206778 020655/063150, loss: 0.027723, avg_loss: 0.206747 020660/063150, loss: 0.213605, avg_loss: 0.206717 020665/063150, loss: 0.110135, avg_loss: 0.206678 020670/063150, loss: 0.006411, avg_loss: 0.206642 020675/063150, loss: 0.008009, avg_loss: 0.206606 020680/063150, loss: 0.084562, avg_loss: 0.206583 020685/063150, loss: 0.032616, avg_loss: 0.206544 020690/063150, loss: 0.039479, avg_loss: 0.206503 020695/063150, loss: 0.017251, avg_loss: 0.206466 020700/063150, loss: 0.048638, avg_loss: 0.206434 020705/063150, loss: 0.127265, avg_loss: 0.206393 020710/063150, loss: 0.104122, avg_loss: 0.206359 020715/063150, loss: 0.079874, avg_loss: 0.206345 020720/063150, loss: 0.075914, avg_loss: 0.206330 020725/063150, loss: 0.159761, avg_loss: 0.206297 020730/063150, loss: 0.048971, avg_loss: 0.206260 020735/063150, loss: 0.135927, avg_loss: 0.206221 020740/063150, loss: 0.086144, avg_loss: 0.206191 020745/063150, loss: 0.017295, avg_loss: 0.206155 020750/063150, loss: 0.162105, avg_loss: 0.206127 020755/063150, loss: 0.065647, avg_loss: 0.206094 020760/063150, loss: 0.146410, avg_loss: 0.206078 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 20760/63150: {'accuracy': 0.8497706422018348} 020765/063150, loss: 0.083786, avg_loss: 0.206048 020770/063150, loss: 0.109062, avg_loss: 0.206020 020775/063150, loss: 0.027642, avg_loss: 0.205990 020780/063150, loss: 0.144391, avg_loss: 0.205973 020785/063150, loss: 0.086016, avg_loss: 0.205945 020790/063150, loss: 0.050243, avg_loss: 0.205909 020795/063150, loss: 0.010270, avg_loss: 0.205887 020800/063150, loss: 0.113305, avg_loss: 0.205861 020805/063150, loss: 0.174551, avg_loss: 0.205835 020810/063150, loss: 0.031637, avg_loss: 0.205802 020815/063150, loss: 0.196363, avg_loss: 0.205774 020820/063150, loss: 0.073117, avg_loss: 0.205734 020825/063150, loss: 0.021083, avg_loss: 0.205694 020830/063150, loss: 0.005502, avg_loss: 0.205653 020835/063150, loss: 0.092456, avg_loss: 0.205619 020840/063150, loss: 0.011423, avg_loss: 0.205586 020845/063150, loss: 0.271513, avg_loss: 0.205559 020850/063150, loss: 0.011996, avg_loss: 0.205518 020855/063150, loss: 0.004455, avg_loss: 0.205474 020860/063150, loss: 0.148890, avg_loss: 0.205445 020865/063150, loss: 0.009384, avg_loss: 0.205417 020870/063150, loss: 0.088145, avg_loss: 0.205381 020875/063150, loss: 0.022598, avg_loss: 0.205343 020880/063150, loss: 0.110031, avg_loss: 0.205319 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 20880/63150: {'accuracy': 0.8394495412844036} 020885/063150, loss: 0.051499, avg_loss: 0.205279 020890/063150, loss: 0.077464, avg_loss: 0.205264 020895/063150, loss: 0.034167, avg_loss: 0.205230 020900/063150, loss: 0.039848, avg_loss: 0.205191 020905/063150, loss: 0.110465, avg_loss: 0.205173 020910/063150, loss: 0.175760, avg_loss: 0.205153 020915/063150, loss: 0.022009, avg_loss: 0.205116 020920/063150, loss: 0.011672, avg_loss: 0.205080 020925/063150, loss: 0.247682, avg_loss: 0.205055 020930/063150, loss: 0.263460, avg_loss: 0.205029 020935/063150, loss: 0.041382, avg_loss: 0.205001 020940/063150, loss: 0.053091, avg_loss: 0.204971 020945/063150, loss: 0.052148, avg_loss: 0.204940 020950/063150, loss: 0.081791, avg_loss: 0.204908 020955/063150, loss: 0.046655, avg_loss: 0.204876 020960/063150, loss: 0.056876, avg_loss: 0.204841 020965/063150, loss: 0.069362, avg_loss: 0.204815 020970/063150, loss: 0.188785, avg_loss: 0.204796 020975/063150, loss: 0.045364, avg_loss: 0.204774 020980/063150, loss: 0.027498, avg_loss: 0.204746 020985/063150, loss: 0.034381, avg_loss: 0.204714 020990/063150, loss: 0.017459, avg_loss: 0.204683 020995/063150, loss: 0.065009, avg_loss: 0.204648 021000/063150, loss: 0.075192, avg_loss: 0.204622 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 9, step 21000/63150: {'accuracy': 0.8577981651376146} 021005/063150, loss: 0.027160, avg_loss: 0.204582 021010/063150, loss: 0.088042, avg_loss: 0.204558 021015/063150, loss: 0.100500, avg_loss: 0.204535 021020/063150, loss: 0.032092, avg_loss: 0.204503 021025/063150, loss: 0.048235, avg_loss: 0.204488 021030/063150, loss: 0.013025, avg_loss: 0.204458 021035/063150, loss: 0.213528, avg_loss: 0.204433 021040/063150, loss: 0.222350, avg_loss: 0.204412 021045/063150, loss: 0.088415, avg_loss: 0.204384 021050/063150, loss: 0.031291, avg_loss: 0.204351 021055/063150, loss: 0.043558, avg_loss: 0.204316 021060/063150, loss: 0.012827, avg_loss: 0.204283 021065/063150, loss: 0.033970, avg_loss: 0.204245 021070/063150, loss: 0.066525, avg_loss: 0.204211 021075/063150, loss: 0.031503, avg_loss: 0.204177 021080/063150, loss: 0.165437, avg_loss: 0.204150 021085/063150, loss: 0.033988, avg_loss: 0.204110 021090/063150, loss: 0.303216, avg_loss: 0.204085 021095/063150, loss: 0.079413, avg_loss: 0.204045 021100/063150, loss: 0.012632, avg_loss: 0.204009 021105/063150, loss: 0.024073, avg_loss: 0.203974 021110/063150, loss: 0.017585, avg_loss: 0.203935 021115/063150, loss: 0.076978, avg_loss: 0.203906 021120/063150, loss: 0.099934, avg_loss: 0.203868 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 21120/63150: {'accuracy': 0.8669724770642202} 021125/063150, loss: 0.061035, avg_loss: 0.203834 021130/063150, loss: 0.037070, avg_loss: 0.203794 021135/063150, loss: 0.050149, avg_loss: 0.203773 021140/063150, loss: 0.032280, avg_loss: 0.203737 021145/063150, loss: 0.014559, avg_loss: 0.203695 021150/063150, loss: 0.097263, avg_loss: 0.203662 021155/063150, loss: 0.011140, avg_loss: 0.203622 021160/063150, loss: 0.019545, avg_loss: 0.203591 021165/063150, loss: 0.051003, avg_loss: 0.203561 021170/063150, loss: 0.056874, avg_loss: 0.203532 021175/063150, loss: 0.041219, avg_loss: 0.203489 021180/063150, loss: 0.027904, avg_loss: 0.203461 021185/063150, loss: 0.067805, avg_loss: 0.203434 021190/063150, loss: 0.018498, avg_loss: 0.203395 021195/063150, loss: 0.079323, avg_loss: 0.203357 021200/063150, loss: 0.151178, avg_loss: 0.203321 021205/063150, loss: 0.007469, avg_loss: 0.203283 021210/063150, loss: 0.026797, avg_loss: 0.203243 021215/063150, loss: 0.005706, avg_loss: 0.203207 021220/063150, loss: 0.009869, avg_loss: 0.203168 021225/063150, loss: 0.003080, avg_loss: 0.203137 021230/063150, loss: 0.022177, avg_loss: 0.203103 021235/063150, loss: 0.082857, avg_loss: 0.203067 021240/063150, loss: 0.012739, avg_loss: 0.203030 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 21240/63150: {'accuracy': 0.8635321100917431} 021245/063150, loss: 0.058605, avg_loss: 0.202996 021250/063150, loss: 0.162820, avg_loss: 0.202972 021255/063150, loss: 0.058399, avg_loss: 0.202932 021260/063150, loss: 0.006702, avg_loss: 0.202891 021265/063150, loss: 0.039642, avg_loss: 0.202850 021270/063150, loss: 0.006989, avg_loss: 0.202817 021275/063150, loss: 0.116692, avg_loss: 0.202783 021280/063150, loss: 0.017230, avg_loss: 0.202748 021285/063150, loss: 0.039603, avg_loss: 0.202711 021290/063150, loss: 0.074391, avg_loss: 0.202681 021295/063150, loss: 0.027657, avg_loss: 0.202656 021300/063150, loss: 0.037208, avg_loss: 0.202621 021305/063150, loss: 0.041017, avg_loss: 0.202589 021310/063150, loss: 0.013274, avg_loss: 0.202555 021315/063150, loss: 0.013868, avg_loss: 0.202525 021320/063150, loss: 0.038810, avg_loss: 0.202491 021325/063150, loss: 0.111779, avg_loss: 0.202460 021330/063150, loss: 0.036742, avg_loss: 0.202423 021335/063150, loss: 0.061389, avg_loss: 0.202385 021340/063150, loss: 0.018781, avg_loss: 0.202356 021345/063150, loss: 0.028492, avg_loss: 0.202324 021350/063150, loss: 0.126661, avg_loss: 0.202286 021355/063150, loss: 0.017322, avg_loss: 0.202252 021360/063150, loss: 0.042239, avg_loss: 0.202213 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 21360/63150: {'accuracy': 0.8727064220183486} 021365/063150, loss: 0.089343, avg_loss: 0.202183 021370/063150, loss: 0.038140, avg_loss: 0.202141 021375/063150, loss: 0.158543, avg_loss: 0.202108 021380/063150, loss: 0.087458, avg_loss: 0.202069 021385/063150, loss: 0.020430, avg_loss: 0.202036 021390/063150, loss: 0.003075, avg_loss: 0.202003 021395/063150, loss: 0.010664, avg_loss: 0.201972 021400/063150, loss: 0.046870, avg_loss: 0.201934 021405/063150, loss: 0.063215, avg_loss: 0.201900 021410/063150, loss: 0.114710, avg_loss: 0.201866 021415/063150, loss: 0.059310, avg_loss: 0.201829 021420/063150, loss: 0.068002, avg_loss: 0.201800 021425/063150, loss: 0.023043, avg_loss: 0.201766 021430/063150, loss: 0.053517, avg_loss: 0.201726 021435/063150, loss: 0.092121, avg_loss: 0.201694 021440/063150, loss: 0.103308, avg_loss: 0.201659 021445/063150, loss: 0.007302, avg_loss: 0.201614 021450/063150, loss: 0.011896, avg_loss: 0.201578 021455/063150, loss: 0.048774, avg_loss: 0.201550 021460/063150, loss: 0.175042, avg_loss: 0.201519 021465/063150, loss: 0.163047, avg_loss: 0.201495 021470/063150, loss: 0.011442, avg_loss: 0.201454 021475/063150, loss: 0.031252, avg_loss: 0.201416 021480/063150, loss: 0.027138, avg_loss: 0.201377 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 21480/63150: {'accuracy': 0.8520642201834863} 021485/063150, loss: 0.007853, avg_loss: 0.201339 021490/063150, loss: 0.020112, avg_loss: 0.201310 021495/063150, loss: 0.004416, avg_loss: 0.201280 021500/063150, loss: 0.018525, avg_loss: 0.201253 021505/063150, loss: 0.048418, avg_loss: 0.201214 021510/063150, loss: 0.059886, avg_loss: 0.201189 021515/063150, loss: 0.042302, avg_loss: 0.201158 021520/063150, loss: 0.066871, avg_loss: 0.201120 021525/063150, loss: 0.009210, avg_loss: 0.201092 021530/063150, loss: 0.027710, avg_loss: 0.201073 021535/063150, loss: 0.097717, avg_loss: 0.201045 021540/063150, loss: 0.014640, avg_loss: 0.201013 021545/063150, loss: 0.053703, avg_loss: 0.200974 021550/063150, loss: 0.025651, avg_loss: 0.200962 021555/063150, loss: 0.017275, avg_loss: 0.200925 021560/063150, loss: 0.046003, avg_loss: 0.200897 021565/063150, loss: 0.063088, avg_loss: 0.200864 021570/063150, loss: 0.159226, avg_loss: 0.200841 021575/063150, loss: 0.075134, avg_loss: 0.200816 021580/063150, loss: 0.056126, avg_loss: 0.200776 021585/063150, loss: 0.125232, avg_loss: 0.200755 021590/063150, loss: 0.111993, avg_loss: 0.200732 021595/063150, loss: 0.049268, avg_loss: 0.200702 021600/063150, loss: 0.087106, avg_loss: 0.200666 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 21600/63150: {'accuracy': 0.8405963302752294} 021605/063150, loss: 0.025933, avg_loss: 0.200630 021610/063150, loss: 0.017660, avg_loss: 0.200594 021615/063150, loss: 0.086530, avg_loss: 0.200573 021620/063150, loss: 0.012778, avg_loss: 0.200533 021625/063150, loss: 0.144414, avg_loss: 0.200503 021630/063150, loss: 0.095246, avg_loss: 0.200467 021635/063150, loss: 0.024590, avg_loss: 0.200450 021640/063150, loss: 0.089856, avg_loss: 0.200423 021645/063150, loss: 0.179813, avg_loss: 0.200405 021650/063150, loss: 0.021562, avg_loss: 0.200374 021655/063150, loss: 0.025524, avg_loss: 0.200337 021660/063150, loss: 0.047650, avg_loss: 0.200305 021665/063150, loss: 0.237280, avg_loss: 0.200290 021670/063150, loss: 0.064881, avg_loss: 0.200257 021675/063150, loss: 0.027729, avg_loss: 0.200226 021680/063150, loss: 0.110178, avg_loss: 0.200190 021685/063150, loss: 0.025863, avg_loss: 0.200150 021690/063150, loss: 0.048851, avg_loss: 0.200113 021695/063150, loss: 0.004236, avg_loss: 0.200074 021700/063150, loss: 0.123597, avg_loss: 0.200040 021705/063150, loss: 0.042912, avg_loss: 0.200006 021710/063150, loss: 0.009431, avg_loss: 0.199970 021715/063150, loss: 0.144364, avg_loss: 0.199943 021720/063150, loss: 0.013373, avg_loss: 0.199920 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 21720/63150: {'accuracy': 0.8600917431192661} 021725/063150, loss: 0.084313, avg_loss: 0.199884 021730/063150, loss: 0.028954, avg_loss: 0.199851 021735/063150, loss: 0.176788, avg_loss: 0.199826 021740/063150, loss: 0.005431, avg_loss: 0.199790 021745/063150, loss: 0.009105, avg_loss: 0.199753 021750/063150, loss: 0.049044, avg_loss: 0.199724 021755/063150, loss: 0.061687, avg_loss: 0.199690 021760/063150, loss: 0.019765, avg_loss: 0.199655 021765/063150, loss: 0.016331, avg_loss: 0.199621 021770/063150, loss: 0.083436, avg_loss: 0.199584 021775/063150, loss: 0.106852, avg_loss: 0.199558 021780/063150, loss: 0.090581, avg_loss: 0.199524 021785/063150, loss: 0.111586, avg_loss: 0.199493 021790/063150, loss: 0.031610, avg_loss: 0.199455 021795/063150, loss: 0.009092, avg_loss: 0.199415 021800/063150, loss: 0.042358, avg_loss: 0.199378 021805/063150, loss: 0.010819, avg_loss: 0.199349 021810/063150, loss: 0.107072, avg_loss: 0.199320 021815/063150, loss: 0.055009, avg_loss: 0.199300 021820/063150, loss: 0.190386, avg_loss: 0.199277 021825/063150, loss: 0.012031, avg_loss: 0.199242 021830/063150, loss: 0.032310, avg_loss: 0.199204 021835/063150, loss: 0.058814, avg_loss: 0.199174 021840/063150, loss: 0.070953, avg_loss: 0.199152 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 21840/63150: {'accuracy': 0.8543577981651376} 021845/063150, loss: 0.013568, avg_loss: 0.199115 021850/063150, loss: 0.029372, avg_loss: 0.199088 021855/063150, loss: 0.065179, avg_loss: 0.199050 021860/063150, loss: 0.127836, avg_loss: 0.199017 021865/063150, loss: 0.006953, avg_loss: 0.198980 021870/063150, loss: 0.048309, avg_loss: 0.198965 021875/063150, loss: 0.019982, avg_loss: 0.198938 021880/063150, loss: 0.043102, avg_loss: 0.198912 021885/063150, loss: 0.036355, avg_loss: 0.198894 021890/063150, loss: 0.006664, avg_loss: 0.198864 021895/063150, loss: 0.126826, avg_loss: 0.198830 021900/063150, loss: 0.011602, avg_loss: 0.198795 021905/063150, loss: 0.036565, avg_loss: 0.198771 021910/063150, loss: 0.016536, avg_loss: 0.198740 021915/063150, loss: 0.053892, avg_loss: 0.198713 021920/063150, loss: 0.109127, avg_loss: 0.198687 021925/063150, loss: 0.035026, avg_loss: 0.198652 021930/063150, loss: 0.021003, avg_loss: 0.198616 021935/063150, loss: 0.088228, avg_loss: 0.198590 021940/063150, loss: 0.046900, avg_loss: 0.198553 021945/063150, loss: 0.017754, avg_loss: 0.198518 021950/063150, loss: 0.091679, avg_loss: 0.198488 021955/063150, loss: 0.141407, avg_loss: 0.198455 021960/063150, loss: 0.028794, avg_loss: 0.198427 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 21960/63150: {'accuracy': 0.8577981651376146} 021965/063150, loss: 0.040378, avg_loss: 0.198390 021970/063150, loss: 0.098541, avg_loss: 0.198360 021975/063150, loss: 0.039115, avg_loss: 0.198337 021980/063150, loss: 0.014699, avg_loss: 0.198306 021985/063150, loss: 0.024117, avg_loss: 0.198282 021990/063150, loss: 0.011524, avg_loss: 0.198241 021995/063150, loss: 0.008876, avg_loss: 0.198214 022000/063150, loss: 0.023660, avg_loss: 0.198190 022005/063150, loss: 0.133314, avg_loss: 0.198164 022010/063150, loss: 0.072934, avg_loss: 0.198133 022015/063150, loss: 0.168936, avg_loss: 0.198107 022020/063150, loss: 0.102314, avg_loss: 0.198082 022025/063150, loss: 0.012388, avg_loss: 0.198047 022030/063150, loss: 0.034317, avg_loss: 0.198011 022035/063150, loss: 0.034185, avg_loss: 0.197979 022040/063150, loss: 0.033702, avg_loss: 0.197952 022045/063150, loss: 0.062564, avg_loss: 0.197914 022050/063150, loss: 0.093162, avg_loss: 0.197883 022055/063150, loss: 0.081634, avg_loss: 0.197852 022060/063150, loss: 0.105888, avg_loss: 0.197817 022065/063150, loss: 0.088682, avg_loss: 0.197793 022070/063150, loss: 0.042452, avg_loss: 0.197757 022075/063150, loss: 0.103377, avg_loss: 0.197731 022080/063150, loss: 0.005138, avg_loss: 0.197697 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 22080/63150: {'accuracy': 0.856651376146789} 022085/063150, loss: 0.137126, avg_loss: 0.197676 022090/063150, loss: 0.014711, avg_loss: 0.197647 022095/063150, loss: 0.017935, avg_loss: 0.197608 022100/063150, loss: 0.014679, avg_loss: 0.197567 022105/063150, loss: 0.076943, avg_loss: 0.197543 022110/063150, loss: 0.011852, avg_loss: 0.197514 022115/063150, loss: 0.041861, avg_loss: 0.197489 022120/063150, loss: 0.049748, avg_loss: 0.197469 022125/063150, loss: 0.156984, avg_loss: 0.197440 022130/063150, loss: 0.024157, avg_loss: 0.197426 022135/063150, loss: 0.026004, avg_loss: 0.197396 022140/063150, loss: 0.034191, avg_loss: 0.197365 022145/063150, loss: 0.142887, avg_loss: 0.197335 022150/063150, loss: 0.010900, avg_loss: 0.197308 022155/063150, loss: 0.176128, avg_loss: 0.197290 022160/063150, loss: 0.142700, avg_loss: 0.197262 022165/063150, loss: 0.025616, avg_loss: 0.197223 022170/063150, loss: 0.055992, avg_loss: 0.197198 022175/063150, loss: 0.076019, avg_loss: 0.197167 022180/063150, loss: 0.015440, avg_loss: 0.197140 022185/063150, loss: 0.149633, avg_loss: 0.197122 022190/063150, loss: 0.008623, avg_loss: 0.197090 022195/063150, loss: 0.020159, avg_loss: 0.197058 022200/063150, loss: 0.066964, avg_loss: 0.197024 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 22200/63150: {'accuracy': 0.8543577981651376} 022205/063150, loss: 0.293325, avg_loss: 0.197009 022210/063150, loss: 0.033500, avg_loss: 0.196982 022215/063150, loss: 0.069676, avg_loss: 0.196954 022220/063150, loss: 0.044650, avg_loss: 0.196923 022225/063150, loss: 0.074841, avg_loss: 0.196891 022230/063150, loss: 0.070426, avg_loss: 0.196863 022235/063150, loss: 0.030231, avg_loss: 0.196841 022240/063150, loss: 0.054111, avg_loss: 0.196810 022245/063150, loss: 0.051011, avg_loss: 0.196773 022250/063150, loss: 0.025203, avg_loss: 0.196745 022255/063150, loss: 0.017682, avg_loss: 0.196712 022260/063150, loss: 0.047338, avg_loss: 0.196683 022265/063150, loss: 0.295749, avg_loss: 0.196672 022270/063150, loss: 0.046192, avg_loss: 0.196649 022275/063150, loss: 0.118811, avg_loss: 0.196633 022280/063150, loss: 0.008980, avg_loss: 0.196611 022285/063150, loss: 0.109894, avg_loss: 0.196582 022290/063150, loss: 0.052682, avg_loss: 0.196554 022295/063150, loss: 0.123655, avg_loss: 0.196535 022300/063150, loss: 0.104137, avg_loss: 0.196505 022305/063150, loss: 0.041919, avg_loss: 0.196478 022310/063150, loss: 0.018834, avg_loss: 0.196459 022315/063150, loss: 0.059563, avg_loss: 0.196448 022320/063150, loss: 0.032077, avg_loss: 0.196425 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 22320/63150: {'accuracy': 0.8612385321100917} 022325/063150, loss: 0.095994, avg_loss: 0.196397 022330/063150, loss: 0.136851, avg_loss: 0.196376 022335/063150, loss: 0.053174, avg_loss: 0.196344 022340/063150, loss: 0.034559, avg_loss: 0.196326 022345/063150, loss: 0.056062, avg_loss: 0.196308 022350/063150, loss: 0.044600, avg_loss: 0.196272 022355/063150, loss: 0.029881, avg_loss: 0.196240 022360/063150, loss: 0.010959, avg_loss: 0.196212 022365/063150, loss: 0.092464, avg_loss: 0.196182 022370/063150, loss: 0.019121, avg_loss: 0.196153 022375/063150, loss: 0.061359, avg_loss: 0.196127 022380/063150, loss: 0.127033, avg_loss: 0.196106 022385/063150, loss: 0.041401, avg_loss: 0.196084 022390/063150, loss: 0.005995, avg_loss: 0.196048 022395/063150, loss: 0.004339, avg_loss: 0.196017 022400/063150, loss: 0.088383, avg_loss: 0.195988 022405/063150, loss: 0.163154, avg_loss: 0.195964 022410/063150, loss: 0.065892, avg_loss: 0.195942 022415/063150, loss: 0.011418, avg_loss: 0.195907 022420/063150, loss: 0.036883, avg_loss: 0.195869 022425/063150, loss: 0.022298, avg_loss: 0.195836 022430/063150, loss: 0.089791, avg_loss: 0.195804 022435/063150, loss: 0.034271, avg_loss: 0.195777 022440/063150, loss: 0.128377, avg_loss: 0.195751 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 22440/63150: {'accuracy': 0.8589449541284404} 022445/063150, loss: 0.167781, avg_loss: 0.195728 022450/063150, loss: 0.061771, avg_loss: 0.195691 022455/063150, loss: 0.045238, avg_loss: 0.195653 022460/063150, loss: 0.094943, avg_loss: 0.195621 022465/063150, loss: 0.004108, avg_loss: 0.195581 022470/063150, loss: 0.157663, avg_loss: 0.195554 022475/063150, loss: 0.008671, avg_loss: 0.195518 022480/063150, loss: 0.021298, avg_loss: 0.195504 022485/063150, loss: 0.168470, avg_loss: 0.195476 022490/063150, loss: 0.012726, avg_loss: 0.195451 022495/063150, loss: 0.011042, avg_loss: 0.195414 022500/063150, loss: 0.090841, avg_loss: 0.195381 022505/063150, loss: 0.018633, avg_loss: 0.195347 022510/063150, loss: 0.039132, avg_loss: 0.195314 022515/063150, loss: 0.087783, avg_loss: 0.195280 022520/063150, loss: 0.009073, avg_loss: 0.195252 022525/063150, loss: 0.011479, avg_loss: 0.195221 022530/063150, loss: 0.066836, avg_loss: 0.195186 022535/063150, loss: 0.014882, avg_loss: 0.195150 022540/063150, loss: 0.051247, avg_loss: 0.195117 022545/063150, loss: 0.009370, avg_loss: 0.195088 022550/063150, loss: 0.037039, avg_loss: 0.195060 022555/063150, loss: 0.014592, avg_loss: 0.195031 022560/063150, loss: 0.020069, avg_loss: 0.194998 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 22560/63150: {'accuracy': 0.8440366972477065} 022565/063150, loss: 0.098400, avg_loss: 0.194974 022570/063150, loss: 0.003008, avg_loss: 0.194939 022575/063150, loss: 0.017197, avg_loss: 0.194902 022580/063150, loss: 0.117138, avg_loss: 0.194889 022585/063150, loss: 0.047130, avg_loss: 0.194862 022590/063150, loss: 0.007353, avg_loss: 0.194828 022595/063150, loss: 0.199140, avg_loss: 0.194813 022600/063150, loss: 0.093617, avg_loss: 0.194790 022605/063150, loss: 0.011175, avg_loss: 0.194755 022610/063150, loss: 0.178753, avg_loss: 0.194733 022615/063150, loss: 0.099559, avg_loss: 0.194702 022620/063150, loss: 0.026797, avg_loss: 0.194664 022625/063150, loss: 0.020657, avg_loss: 0.194639 022630/063150, loss: 0.043500, avg_loss: 0.194610 022635/063150, loss: 0.243713, avg_loss: 0.194588 022640/063150, loss: 0.080739, avg_loss: 0.194551 022645/063150, loss: 0.002077, avg_loss: 0.194533 022650/063150, loss: 0.193794, avg_loss: 0.194505 022655/063150, loss: 0.051798, avg_loss: 0.194476 022660/063150, loss: 0.026690, avg_loss: 0.194446 022665/063150, loss: 0.013211, avg_loss: 0.194422 022670/063150, loss: 0.037913, avg_loss: 0.194391 022675/063150, loss: 0.117704, avg_loss: 0.194365 022680/063150, loss: 0.064739, avg_loss: 0.194337 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 22680/63150: {'accuracy': 0.8532110091743119} 022685/063150, loss: 0.054149, avg_loss: 0.194307 022690/063150, loss: 0.159059, avg_loss: 0.194279 022695/063150, loss: 0.029992, avg_loss: 0.194252 022700/063150, loss: 0.032528, avg_loss: 0.194222 022705/063150, loss: 0.052093, avg_loss: 0.194188 022710/063150, loss: 0.101192, avg_loss: 0.194158 022715/063150, loss: 0.021592, avg_loss: 0.194126 022720/063150, loss: 0.057674, avg_loss: 0.194100 022725/063150, loss: 0.002278, avg_loss: 0.194086 022730/063150, loss: 0.171402, avg_loss: 0.194077 022735/063150, loss: 0.036579, avg_loss: 0.194046 022740/063150, loss: 0.161170, avg_loss: 0.194017 022745/063150, loss: 0.061871, avg_loss: 0.193992 022750/063150, loss: 0.034428, avg_loss: 0.193957 022755/063150, loss: 0.030176, avg_loss: 0.193923 022760/063150, loss: 0.081419, avg_loss: 0.193903 022765/063150, loss: 0.354843, avg_loss: 0.193896 022770/063150, loss: 0.188152, avg_loss: 0.193889 022775/063150, loss: 0.137218, avg_loss: 0.193866 022780/063150, loss: 0.024686, avg_loss: 0.193834 022785/063150, loss: 0.040832, avg_loss: 0.193806 022790/063150, loss: 0.149951, avg_loss: 0.193779 022795/063150, loss: 0.146829, avg_loss: 0.193757 022800/063150, loss: 0.044110, avg_loss: 0.193730 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 22800/63150: {'accuracy': 0.8371559633027523} 022805/063150, loss: 0.007515, avg_loss: 0.193695 022810/063150, loss: 0.158882, avg_loss: 0.193667 022815/063150, loss: 0.059854, avg_loss: 0.193634 022820/063150, loss: 0.144683, avg_loss: 0.193603 022825/063150, loss: 0.023843, avg_loss: 0.193566 022830/063150, loss: 0.012936, avg_loss: 0.193538 022835/063150, loss: 0.006677, avg_loss: 0.193506 022840/063150, loss: 0.019081, avg_loss: 0.193480 022845/063150, loss: 0.087645, avg_loss: 0.193449 022850/063150, loss: 0.138032, avg_loss: 0.193429 022855/063150, loss: 0.083127, avg_loss: 0.193403 022860/063150, loss: 0.073598, avg_loss: 0.193371 022865/063150, loss: 0.040333, avg_loss: 0.193342 022870/063150, loss: 0.011868, avg_loss: 0.193311 022875/063150, loss: 0.017620, avg_loss: 0.193274 022880/063150, loss: 0.007211, avg_loss: 0.193241 022885/063150, loss: 0.049768, avg_loss: 0.193222 022890/063150, loss: 0.031942, avg_loss: 0.193194 022895/063150, loss: 0.045853, avg_loss: 0.193169 022900/063150, loss: 0.082127, avg_loss: 0.193138 022905/063150, loss: 0.040827, avg_loss: 0.193104 022910/063150, loss: 0.048356, avg_loss: 0.193071 022915/063150, loss: 0.098776, avg_loss: 0.193043 022920/063150, loss: 0.116651, avg_loss: 0.193018 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 22920/63150: {'accuracy': 0.8543577981651376} 022925/063150, loss: 0.012783, avg_loss: 0.192987 022930/063150, loss: 0.002173, avg_loss: 0.192949 022935/063150, loss: 0.026290, avg_loss: 0.192921 022940/063150, loss: 0.030074, avg_loss: 0.192889 022945/063150, loss: 0.014593, avg_loss: 0.192857 022950/063150, loss: 0.098203, avg_loss: 0.192830 022955/063150, loss: 0.038577, avg_loss: 0.192801 022960/063150, loss: 0.166765, avg_loss: 0.192787 022965/063150, loss: 0.041652, avg_loss: 0.192758 022970/063150, loss: 0.232168, avg_loss: 0.192743 022975/063150, loss: 0.056446, avg_loss: 0.192714 022980/063150, loss: 0.121726, avg_loss: 0.192687 022985/063150, loss: 0.004820, avg_loss: 0.192658 022990/063150, loss: 0.021978, avg_loss: 0.192628 022995/063150, loss: 0.236425, avg_loss: 0.192607 023000/063150, loss: 0.053714, avg_loss: 0.192580 023005/063150, loss: 0.211114, avg_loss: 0.192558 023010/063150, loss: 0.003515, avg_loss: 0.192533 023015/063150, loss: 0.052144, avg_loss: 0.192499 023020/063150, loss: 0.026799, avg_loss: 0.192469 023025/063150, loss: 0.091898, avg_loss: 0.192437 023030/063150, loss: 0.003500, avg_loss: 0.192414 023035/063150, loss: 0.005857, avg_loss: 0.192376 023040/063150, loss: 0.044703, avg_loss: 0.192364 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 10, step 23040/63150: {'accuracy': 0.8589449541284404} 023045/063150, loss: 0.143516, avg_loss: 0.192334 023050/063150, loss: 0.170426, avg_loss: 0.192318 023055/063150, loss: 0.096597, avg_loss: 0.192286 023060/063150, loss: 0.003429, avg_loss: 0.192253 023065/063150, loss: 0.012101, avg_loss: 0.192233 023070/063150, loss: 0.010048, avg_loss: 0.192202 023075/063150, loss: 0.043284, avg_loss: 0.192169 023080/063150, loss: 0.004529, avg_loss: 0.192132 023085/063150, loss: 0.011963, avg_loss: 0.192095 023090/063150, loss: 0.024565, avg_loss: 0.192068 023095/063150, loss: 0.006504, avg_loss: 0.192057 023100/063150, loss: 0.026959, avg_loss: 0.192019 023105/063150, loss: 0.012585, avg_loss: 0.191985 023110/063150, loss: 0.132001, avg_loss: 0.191975 023115/063150, loss: 0.026568, avg_loss: 0.191954 023120/063150, loss: 0.026058, avg_loss: 0.191929 023125/063150, loss: 0.076050, avg_loss: 0.191898 023130/063150, loss: 0.015319, avg_loss: 0.191866 023135/063150, loss: 0.131488, avg_loss: 0.191840 023140/063150, loss: 0.013351, avg_loss: 0.191807 023145/063150, loss: 0.023956, avg_loss: 0.191781 023150/063150, loss: 0.005832, avg_loss: 0.191747 023155/063150, loss: 0.024498, avg_loss: 0.191717 023160/063150, loss: 0.032976, avg_loss: 0.191686 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 23160/63150: {'accuracy': 0.8440366972477065} 023165/063150, loss: 0.015528, avg_loss: 0.191651 023170/063150, loss: 0.025395, avg_loss: 0.191617 023175/063150, loss: 0.031576, avg_loss: 0.191581 023180/063150, loss: 0.309930, avg_loss: 0.191561 023185/063150, loss: 0.006360, avg_loss: 0.191524 023190/063150, loss: 0.001535, avg_loss: 0.191486 023195/063150, loss: 0.237478, avg_loss: 0.191469 023200/063150, loss: 0.001275, avg_loss: 0.191430 023205/063150, loss: 0.136791, avg_loss: 0.191402 023210/063150, loss: 0.104698, avg_loss: 0.191377 023215/063150, loss: 0.035337, avg_loss: 0.191342 023220/063150, loss: 0.019973, avg_loss: 0.191320 023225/063150, loss: 0.069043, avg_loss: 0.191287 023230/063150, loss: 0.048119, avg_loss: 0.191257 023235/063150, loss: 0.034886, avg_loss: 0.191225 023240/063150, loss: 0.008942, avg_loss: 0.191186 023245/063150, loss: 0.026159, avg_loss: 0.191158 023250/063150, loss: 0.207585, avg_loss: 0.191140 023255/063150, loss: 0.019199, avg_loss: 0.191103 023260/063150, loss: 0.007328, avg_loss: 0.191076 023265/063150, loss: 0.018104, avg_loss: 0.191039 023270/063150, loss: 0.002913, avg_loss: 0.191008 023275/063150, loss: 0.144199, avg_loss: 0.190984 023280/063150, loss: 0.011352, avg_loss: 0.190952 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 23280/63150: {'accuracy': 0.8486238532110092} 023285/063150, loss: 0.063804, avg_loss: 0.190924 023290/063150, loss: 0.195421, avg_loss: 0.190906 023295/063150, loss: 0.041876, avg_loss: 0.190878 023300/063150, loss: 0.008782, avg_loss: 0.190844 023305/063150, loss: 0.045159, avg_loss: 0.190820 023310/063150, loss: 0.023753, avg_loss: 0.190788 023315/063150, loss: 0.170227, avg_loss: 0.190758 023320/063150, loss: 0.113302, avg_loss: 0.190727 023325/063150, loss: 0.043905, avg_loss: 0.190693 023330/063150, loss: 0.002386, avg_loss: 0.190659 023335/063150, loss: 0.009420, avg_loss: 0.190622 023340/063150, loss: 0.089200, avg_loss: 0.190588 023345/063150, loss: 0.010737, avg_loss: 0.190560 023350/063150, loss: 0.089563, avg_loss: 0.190530 023355/063150, loss: 0.003631, avg_loss: 0.190494 023360/063150, loss: 0.013963, avg_loss: 0.190462 023365/063150, loss: 0.011867, avg_loss: 0.190426 023370/063150, loss: 0.001041, avg_loss: 0.190399 023375/063150, loss: 0.016217, avg_loss: 0.190369 023380/063150, loss: 0.012014, avg_loss: 0.190331 023385/063150, loss: 0.040433, avg_loss: 0.190298 023390/063150, loss: 0.023484, avg_loss: 0.190272 023395/063150, loss: 0.004207, avg_loss: 0.190235 023400/063150, loss: 0.081062, avg_loss: 0.190202 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 23400/63150: {'accuracy': 0.8486238532110092} 023405/063150, loss: 0.032287, avg_loss: 0.190173 023410/063150, loss: 0.003912, avg_loss: 0.190135 023415/063150, loss: 0.003492, avg_loss: 0.190102 023420/063150, loss: 0.027165, avg_loss: 0.190070 023425/063150, loss: 0.004454, avg_loss: 0.190032 023430/063150, loss: 0.047393, avg_loss: 0.189997 023435/063150, loss: 0.003825, avg_loss: 0.189963 023440/063150, loss: 0.007256, avg_loss: 0.189925 023445/063150, loss: 0.004597, avg_loss: 0.189893 023450/063150, loss: 0.007908, avg_loss: 0.189866 023455/063150, loss: 0.016762, avg_loss: 0.189831 023460/063150, loss: 0.006530, avg_loss: 0.189809 023465/063150, loss: 0.174834, avg_loss: 0.189793 023470/063150, loss: 0.114805, avg_loss: 0.189763 023475/063150, loss: 0.047003, avg_loss: 0.189736 023480/063150, loss: 0.018862, avg_loss: 0.189702 023485/063150, loss: 0.122990, avg_loss: 0.189675 023490/063150, loss: 0.033680, avg_loss: 0.189646 023495/063150, loss: 0.019300, avg_loss: 0.189625 023500/063150, loss: 0.025757, avg_loss: 0.189599 023505/063150, loss: 0.169331, avg_loss: 0.189579 023510/063150, loss: 0.201785, avg_loss: 0.189561 023515/063150, loss: 0.015488, avg_loss: 0.189527 023520/063150, loss: 0.021997, avg_loss: 0.189496 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 23520/63150: {'accuracy': 0.8451834862385321} 023525/063150, loss: 0.036511, avg_loss: 0.189470 023530/063150, loss: 0.047651, avg_loss: 0.189442 023535/063150, loss: 0.187139, avg_loss: 0.189415 023540/063150, loss: 0.038144, avg_loss: 0.189393 023545/063150, loss: 0.071739, avg_loss: 0.189372 023550/063150, loss: 0.034931, avg_loss: 0.189348 023555/063150, loss: 0.009769, avg_loss: 0.189322 023560/063150, loss: 0.060041, avg_loss: 0.189294 023565/063150, loss: 0.039219, avg_loss: 0.189265 023570/063150, loss: 0.227561, avg_loss: 0.189240 023575/063150, loss: 0.163626, avg_loss: 0.189214 023580/063150, loss: 0.041914, avg_loss: 0.189185 023585/063150, loss: 0.012803, avg_loss: 0.189147 023590/063150, loss: 0.003413, avg_loss: 0.189120 023595/063150, loss: 0.003761, avg_loss: 0.189088 023600/063150, loss: 0.059110, avg_loss: 0.189064 023605/063150, loss: 0.038025, avg_loss: 0.189041 023610/063150, loss: 0.031567, avg_loss: 0.189014 023615/063150, loss: 0.030665, avg_loss: 0.188984 023620/063150, loss: 0.127741, avg_loss: 0.188961 023625/063150, loss: 0.044313, avg_loss: 0.188938 023630/063150, loss: 0.022755, avg_loss: 0.188904 023635/063150, loss: 0.019720, avg_loss: 0.188874 023640/063150, loss: 0.144572, avg_loss: 0.188847 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 23640/63150: {'accuracy': 0.8589449541284404} 023645/063150, loss: 0.121178, avg_loss: 0.188816 023650/063150, loss: 0.051083, avg_loss: 0.188780 023655/063150, loss: 0.079024, avg_loss: 0.188757 023660/063150, loss: 0.145072, avg_loss: 0.188738 023665/063150, loss: 0.144136, avg_loss: 0.188713 023670/063150, loss: 0.090432, avg_loss: 0.188680 023675/063150, loss: 0.134375, avg_loss: 0.188651 023680/063150, loss: 0.002004, avg_loss: 0.188619 023685/063150, loss: 0.110233, avg_loss: 0.188589 023690/063150, loss: 0.030898, avg_loss: 0.188554 023695/063150, loss: 0.037261, avg_loss: 0.188522 023700/063150, loss: 0.067426, avg_loss: 0.188492 023705/063150, loss: 0.012747, avg_loss: 0.188459 023710/063150, loss: 0.069943, avg_loss: 0.188442 023715/063150, loss: 0.008648, avg_loss: 0.188414 023720/063150, loss: 0.049922, avg_loss: 0.188384 023725/063150, loss: 0.142128, avg_loss: 0.188356 023730/063150, loss: 0.079043, avg_loss: 0.188330 023735/063150, loss: 0.025516, avg_loss: 0.188301 023740/063150, loss: 0.233129, avg_loss: 0.188284 023745/063150, loss: 0.147132, avg_loss: 0.188256 023750/063150, loss: 0.081683, avg_loss: 0.188224 023755/063150, loss: 0.037890, avg_loss: 0.188204 023760/063150, loss: 0.012304, avg_loss: 0.188169 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 23760/63150: {'accuracy': 0.856651376146789} 023765/063150, loss: 0.081415, avg_loss: 0.188148 023770/063150, loss: 0.036861, avg_loss: 0.188116 023775/063150, loss: 0.028176, avg_loss: 0.188100 023780/063150, loss: 0.009934, avg_loss: 0.188076 023785/063150, loss: 0.026138, avg_loss: 0.188053 023790/063150, loss: 0.063748, avg_loss: 0.188022 023795/063150, loss: 0.115647, avg_loss: 0.188002 023800/063150, loss: 0.016312, avg_loss: 0.187977 023805/063150, loss: 0.049148, avg_loss: 0.187942 023810/063150, loss: 0.006433, avg_loss: 0.187915 023815/063150, loss: 0.237302, avg_loss: 0.187894 023820/063150, loss: 0.033043, avg_loss: 0.187862 023825/063150, loss: 0.285820, avg_loss: 0.187838 023830/063150, loss: 0.021223, avg_loss: 0.187808 023835/063150, loss: 0.009376, avg_loss: 0.187779 023840/063150, loss: 0.040155, avg_loss: 0.187747 023845/063150, loss: 0.005624, avg_loss: 0.187710 023850/063150, loss: 0.041572, avg_loss: 0.187684 023855/063150, loss: 0.021178, avg_loss: 0.187652 023860/063150, loss: 0.051807, avg_loss: 0.187621 023865/063150, loss: 0.045404, avg_loss: 0.187599 023870/063150, loss: 0.021514, avg_loss: 0.187581 023875/063150, loss: 0.055826, avg_loss: 0.187549 023880/063150, loss: 0.065029, avg_loss: 0.187524 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 23880/63150: {'accuracy': 0.8623853211009175} 023885/063150, loss: 0.008970, avg_loss: 0.187491 023890/063150, loss: 0.040309, avg_loss: 0.187466 023895/063150, loss: 0.006797, avg_loss: 0.187441 023900/063150, loss: 0.259366, avg_loss: 0.187418 023905/063150, loss: 0.042972, avg_loss: 0.187385 023910/063150, loss: 0.036785, avg_loss: 0.187353 023915/063150, loss: 0.022961, avg_loss: 0.187318 023920/063150, loss: 0.050631, avg_loss: 0.187297 023925/063150, loss: 0.202922, avg_loss: 0.187272 023930/063150, loss: 0.068438, avg_loss: 0.187250 023935/063150, loss: 0.119687, avg_loss: 0.187227 023940/063150, loss: 0.013069, avg_loss: 0.187199 023945/063150, loss: 0.082980, avg_loss: 0.187175 023950/063150, loss: 0.046344, avg_loss: 0.187147 023955/063150, loss: 0.019819, avg_loss: 0.187123 023960/063150, loss: 0.009261, avg_loss: 0.187088 023965/063150, loss: 0.044669, avg_loss: 0.187054 023970/063150, loss: 0.029293, avg_loss: 0.187019 023975/063150, loss: 0.095187, avg_loss: 0.186989 023980/063150, loss: 0.008274, avg_loss: 0.186954 023985/063150, loss: 0.265252, avg_loss: 0.186942 023990/063150, loss: 0.065779, avg_loss: 0.186919 023995/063150, loss: 0.007927, avg_loss: 0.186896 024000/063150, loss: 0.033144, avg_loss: 0.186864 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24000/63150: {'accuracy': 0.8658256880733946} 024005/063150, loss: 0.079531, avg_loss: 0.186848 024010/063150, loss: 0.078247, avg_loss: 0.186826 024015/063150, loss: 0.012809, avg_loss: 0.186798 024020/063150, loss: 0.070974, avg_loss: 0.186774 024025/063150, loss: 0.101348, avg_loss: 0.186751 024030/063150, loss: 0.193687, avg_loss: 0.186727 024035/063150, loss: 0.142077, avg_loss: 0.186698 024040/063150, loss: 0.016566, avg_loss: 0.186668 024045/063150, loss: 0.031934, avg_loss: 0.186634 024050/063150, loss: 0.115324, avg_loss: 0.186605 024055/063150, loss: 0.046231, avg_loss: 0.186575 024060/063150, loss: 0.150850, avg_loss: 0.186557 024065/063150, loss: 0.240307, avg_loss: 0.186540 024070/063150, loss: 0.156217, avg_loss: 0.186512 024075/063150, loss: 0.214173, avg_loss: 0.186485 024080/063150, loss: 0.002789, avg_loss: 0.186454 024085/063150, loss: 0.074546, avg_loss: 0.186438 024090/063150, loss: 0.002071, avg_loss: 0.186408 024095/063150, loss: 0.037241, avg_loss: 0.186377 024100/063150, loss: 0.055435, avg_loss: 0.186347 024105/063150, loss: 0.003818, avg_loss: 0.186314 024110/063150, loss: 0.038079, avg_loss: 0.186281 024115/063150, loss: 0.011029, avg_loss: 0.186255 024120/063150, loss: 0.097449, avg_loss: 0.186234 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24120/63150: {'accuracy': 0.8509174311926605} 024125/063150, loss: 0.079656, avg_loss: 0.186208 024130/063150, loss: 0.190383, avg_loss: 0.186189 024135/063150, loss: 0.021348, avg_loss: 0.186158 024140/063150, loss: 0.019057, avg_loss: 0.186123 024145/063150, loss: 0.007752, avg_loss: 0.186094 024150/063150, loss: 0.014676, avg_loss: 0.186066 024155/063150, loss: 0.010485, avg_loss: 0.186033 024160/063150, loss: 0.089235, avg_loss: 0.186014 024165/063150, loss: 0.100111, avg_loss: 0.185991 024170/063150, loss: 0.045686, avg_loss: 0.185957 024175/063150, loss: 0.021316, avg_loss: 0.185927 024180/063150, loss: 0.066890, avg_loss: 0.185894 024185/063150, loss: 0.017975, avg_loss: 0.185864 024190/063150, loss: 0.024485, avg_loss: 0.185829 024195/063150, loss: 0.082547, avg_loss: 0.185810 024200/063150, loss: 0.007939, avg_loss: 0.185778 024205/063150, loss: 0.016385, avg_loss: 0.185758 024210/063150, loss: 0.024695, avg_loss: 0.185736 024215/063150, loss: 0.037296, avg_loss: 0.185711 024220/063150, loss: 0.045817, avg_loss: 0.185678 024225/063150, loss: 0.045894, avg_loss: 0.185647 024230/063150, loss: 0.088235, avg_loss: 0.185618 024235/063150, loss: 0.097757, avg_loss: 0.185587 024240/063150, loss: 0.067483, avg_loss: 0.185569 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24240/63150: {'accuracy': 0.8532110091743119} 024245/063150, loss: 0.017868, avg_loss: 0.185557 024250/063150, loss: 0.013520, avg_loss: 0.185523 024255/063150, loss: 0.023752, avg_loss: 0.185510 024260/063150, loss: 0.012535, avg_loss: 0.185487 024265/063150, loss: 0.047992, avg_loss: 0.185460 024270/063150, loss: 0.032897, avg_loss: 0.185440 024275/063150, loss: 0.014173, avg_loss: 0.185429 024280/063150, loss: 0.047329, avg_loss: 0.185407 024285/063150, loss: 0.104657, avg_loss: 0.185382 024290/063150, loss: 0.010988, avg_loss: 0.185349 024295/063150, loss: 0.019007, avg_loss: 0.185326 024300/063150, loss: 0.036677, avg_loss: 0.185304 024305/063150, loss: 0.025723, avg_loss: 0.185275 024310/063150, loss: 0.007102, avg_loss: 0.185245 024315/063150, loss: 0.041256, avg_loss: 0.185226 024320/063150, loss: 0.194447, avg_loss: 0.185207 024325/063150, loss: 0.023186, avg_loss: 0.185183 024330/063150, loss: 0.010497, avg_loss: 0.185152 024335/063150, loss: 0.015822, avg_loss: 0.185123 024340/063150, loss: 0.083555, avg_loss: 0.185099 024345/063150, loss: 0.004449, avg_loss: 0.185071 024350/063150, loss: 0.004824, avg_loss: 0.185040 024355/063150, loss: 0.182587, avg_loss: 0.185017 024360/063150, loss: 0.048005, avg_loss: 0.184988 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24360/63150: {'accuracy': 0.8623853211009175} 024365/063150, loss: 0.068125, avg_loss: 0.184959 024370/063150, loss: 0.096433, avg_loss: 0.184942 024375/063150, loss: 0.013241, avg_loss: 0.184910 024380/063150, loss: 0.030887, avg_loss: 0.184882 024385/063150, loss: 0.003620, avg_loss: 0.184853 024390/063150, loss: 0.002875, avg_loss: 0.184821 024395/063150, loss: 0.001897, avg_loss: 0.184791 024400/063150, loss: 0.077137, avg_loss: 0.184771 024405/063150, loss: 0.012710, avg_loss: 0.184737 024410/063150, loss: 0.048309, avg_loss: 0.184705 024415/063150, loss: 0.034666, avg_loss: 0.184683 024420/063150, loss: 0.115656, avg_loss: 0.184663 024425/063150, loss: 0.004650, avg_loss: 0.184629 024430/063150, loss: 0.011696, avg_loss: 0.184603 024435/063150, loss: 0.173228, avg_loss: 0.184578 024440/063150, loss: 0.189898, avg_loss: 0.184556 024445/063150, loss: 0.012757, avg_loss: 0.184521 024450/063150, loss: 0.009241, avg_loss: 0.184493 024455/063150, loss: 0.168062, avg_loss: 0.184473 024460/063150, loss: 0.033910, avg_loss: 0.184447 024465/063150, loss: 0.141621, avg_loss: 0.184420 024470/063150, loss: 0.042807, avg_loss: 0.184398 024475/063150, loss: 0.031887, avg_loss: 0.184375 024480/063150, loss: 0.008083, avg_loss: 0.184353 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24480/63150: {'accuracy': 0.8600917431192661} 024485/063150, loss: 0.153941, avg_loss: 0.184336 024490/063150, loss: 0.087934, avg_loss: 0.184307 024495/063150, loss: 0.281662, avg_loss: 0.184289 024500/063150, loss: 0.020725, avg_loss: 0.184259 024505/063150, loss: 0.006375, avg_loss: 0.184237 024510/063150, loss: 0.199935, avg_loss: 0.184220 024515/063150, loss: 0.067076, avg_loss: 0.184189 024520/063150, loss: 0.034830, avg_loss: 0.184158 024525/063150, loss: 0.013565, avg_loss: 0.184129 024530/063150, loss: 0.005967, avg_loss: 0.184099 024535/063150, loss: 0.062543, avg_loss: 0.184070 024540/063150, loss: 0.149947, avg_loss: 0.184051 024545/063150, loss: 0.115131, avg_loss: 0.184024 024550/063150, loss: 0.026918, avg_loss: 0.184010 024555/063150, loss: 0.016813, avg_loss: 0.183981 024560/063150, loss: 0.036097, avg_loss: 0.183952 024565/063150, loss: 0.028338, avg_loss: 0.183928 024570/063150, loss: 0.149037, avg_loss: 0.183902 024575/063150, loss: 0.038650, avg_loss: 0.183874 024580/063150, loss: 0.041926, avg_loss: 0.183860 024585/063150, loss: 0.084249, avg_loss: 0.183832 024590/063150, loss: 0.128375, avg_loss: 0.183810 024595/063150, loss: 0.007631, avg_loss: 0.183784 024600/063150, loss: 0.047822, avg_loss: 0.183763 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24600/63150: {'accuracy': 0.8577981651376146} 024605/063150, loss: 0.120331, avg_loss: 0.183745 024610/063150, loss: 0.212945, avg_loss: 0.183730 024615/063150, loss: 0.042800, avg_loss: 0.183706 024620/063150, loss: 0.069049, avg_loss: 0.183680 024625/063150, loss: 0.187248, avg_loss: 0.183659 024630/063150, loss: 0.036414, avg_loss: 0.183632 024635/063150, loss: 0.042599, avg_loss: 0.183603 024640/063150, loss: 0.069723, avg_loss: 0.183580 024645/063150, loss: 0.022289, avg_loss: 0.183558 024650/063150, loss: 0.018387, avg_loss: 0.183529 024655/063150, loss: 0.002605, avg_loss: 0.183507 024660/063150, loss: 0.014013, avg_loss: 0.183480 024665/063150, loss: 0.018442, avg_loss: 0.183456 024670/063150, loss: 0.093861, avg_loss: 0.183427 024675/063150, loss: 0.096538, avg_loss: 0.183404 024680/063150, loss: 0.010969, avg_loss: 0.183377 024685/063150, loss: 0.031297, avg_loss: 0.183344 024690/063150, loss: 0.013102, avg_loss: 0.183311 024695/063150, loss: 0.003529, avg_loss: 0.183283 024700/063150, loss: 0.032750, avg_loss: 0.183249 024705/063150, loss: 0.045763, avg_loss: 0.183220 024710/063150, loss: 0.025768, avg_loss: 0.183206 024715/063150, loss: 0.011057, avg_loss: 0.183180 024720/063150, loss: 0.036514, avg_loss: 0.183156 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24720/63150: {'accuracy': 0.8532110091743119} 024725/063150, loss: 0.064662, avg_loss: 0.183127 024730/063150, loss: 0.018193, avg_loss: 0.183105 024735/063150, loss: 0.030102, avg_loss: 0.183087 024740/063150, loss: 0.162927, avg_loss: 0.183064 024745/063150, loss: 0.017302, avg_loss: 0.183041 024750/063150, loss: 0.062712, avg_loss: 0.183030 024755/063150, loss: 0.028731, avg_loss: 0.183003 024760/063150, loss: 0.192076, avg_loss: 0.182982 024765/063150, loss: 0.009942, avg_loss: 0.182959 024770/063150, loss: 0.014857, avg_loss: 0.182932 024775/063150, loss: 0.136700, avg_loss: 0.182910 024780/063150, loss: 0.097069, avg_loss: 0.182888 024785/063150, loss: 0.072616, avg_loss: 0.182863 024790/063150, loss: 0.063138, avg_loss: 0.182847 024795/063150, loss: 0.026529, avg_loss: 0.182819 024800/063150, loss: 0.006449, avg_loss: 0.182790 024805/063150, loss: 0.012190, avg_loss: 0.182773 024810/063150, loss: 0.041017, avg_loss: 0.182745 024815/063150, loss: 0.054915, avg_loss: 0.182721 024820/063150, loss: 0.192289, avg_loss: 0.182702 024825/063150, loss: 0.075893, avg_loss: 0.182685 024830/063150, loss: 0.226798, avg_loss: 0.182666 024835/063150, loss: 0.032918, avg_loss: 0.182640 024840/063150, loss: 0.046388, avg_loss: 0.182618 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24840/63150: {'accuracy': 0.8325688073394495} 024845/063150, loss: 0.024028, avg_loss: 0.182600 024850/063150, loss: 0.041789, avg_loss: 0.182576 024855/063150, loss: 0.013222, avg_loss: 0.182544 024860/063150, loss: 0.026604, avg_loss: 0.182522 024865/063150, loss: 0.088350, avg_loss: 0.182494 024870/063150, loss: 0.022411, avg_loss: 0.182473 024875/063150, loss: 0.016755, avg_loss: 0.182442 024880/063150, loss: 0.004009, avg_loss: 0.182413 024885/063150, loss: 0.148761, avg_loss: 0.182398 024890/063150, loss: 0.003763, avg_loss: 0.182366 024895/063150, loss: 0.092980, avg_loss: 0.182339 024900/063150, loss: 0.069951, avg_loss: 0.182311 024905/063150, loss: 0.009654, avg_loss: 0.182290 024910/063150, loss: 0.011232, avg_loss: 0.182267 024915/063150, loss: 0.054456, avg_loss: 0.182243 024920/063150, loss: 0.020618, avg_loss: 0.182212 024925/063150, loss: 0.073096, avg_loss: 0.182186 024930/063150, loss: 0.067772, avg_loss: 0.182154 024935/063150, loss: 0.028049, avg_loss: 0.182127 024940/063150, loss: 0.002562, avg_loss: 0.182100 024945/063150, loss: 0.156420, avg_loss: 0.182081 024950/063150, loss: 0.014377, avg_loss: 0.182054 024955/063150, loss: 0.015429, avg_loss: 0.182037 024960/063150, loss: 0.071069, avg_loss: 0.182014 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 24960/63150: {'accuracy': 0.8555045871559633} 024965/063150, loss: 0.020242, avg_loss: 0.181989 024970/063150, loss: 0.035802, avg_loss: 0.181963 024975/063150, loss: 0.043467, avg_loss: 0.181941 024980/063150, loss: 0.059451, avg_loss: 0.181913 024985/063150, loss: 0.085899, avg_loss: 0.181892 024990/063150, loss: 0.177826, avg_loss: 0.181868 024995/063150, loss: 0.012488, avg_loss: 0.181843 025000/063150, loss: 0.020636, avg_loss: 0.181820 025005/063150, loss: 0.050793, avg_loss: 0.181790 025010/063150, loss: 0.014892, avg_loss: 0.181764 025015/063150, loss: 0.208433, avg_loss: 0.181746 025020/063150, loss: 0.035705, avg_loss: 0.181724 025025/063150, loss: 0.021759, avg_loss: 0.181696 025030/063150, loss: 0.026641, avg_loss: 0.181667 025035/063150, loss: 0.033330, avg_loss: 0.181641 025040/063150, loss: 0.034776, avg_loss: 0.181616 025045/063150, loss: 0.021790, avg_loss: 0.181588 025050/063150, loss: 0.016698, avg_loss: 0.181558 025055/063150, loss: 0.031271, avg_loss: 0.181526 025060/063150, loss: 0.086882, avg_loss: 0.181509 025065/063150, loss: 0.014553, avg_loss: 0.181480 025070/063150, loss: 0.182002, avg_loss: 0.181468 025075/063150, loss: 0.046344, avg_loss: 0.181437 025080/063150, loss: 0.164926, avg_loss: 0.181417 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 25080/63150: {'accuracy': 0.8635321100917431} 025085/063150, loss: 0.027479, avg_loss: 0.181393 025090/063150, loss: 0.003435, avg_loss: 0.181362 025095/063150, loss: 0.011622, avg_loss: 0.181333 025100/063150, loss: 0.062323, avg_loss: 0.181308 025105/063150, loss: 0.056266, avg_loss: 0.181284 025110/063150, loss: 0.118883, avg_loss: 0.181259 025115/063150, loss: 0.055621, avg_loss: 0.181237 025120/063150, loss: 0.033765, avg_loss: 0.181218 025125/063150, loss: 0.014399, avg_loss: 0.181199 025130/063150, loss: 0.032655, avg_loss: 0.181168 025135/063150, loss: 0.121390, avg_loss: 0.181151 025140/063150, loss: 0.149139, avg_loss: 0.181129 025145/063150, loss: 0.033929, avg_loss: 0.181106 025150/063150, loss: 0.155339, avg_loss: 0.181089 025155/063150, loss: 0.006720, avg_loss: 0.181065 025160/063150, loss: 0.009288, avg_loss: 0.181044 025165/063150, loss: 0.029317, avg_loss: 0.181022 025170/063150, loss: 0.098267, avg_loss: 0.180998 025175/063150, loss: 0.025863, avg_loss: 0.180974 025180/063150, loss: 0.048030, avg_loss: 0.180953 025185/063150, loss: 0.070609, avg_loss: 0.180945 025190/063150, loss: 0.043682, avg_loss: 0.180933 025195/063150, loss: 0.272356, avg_loss: 0.180920 025200/063150, loss: 0.151014, avg_loss: 0.180896 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 11, step 25200/63150: {'accuracy': 0.8440366972477065} 025205/063150, loss: 0.035805, avg_loss: 0.180873 025210/063150, loss: 0.019690, avg_loss: 0.180852 025215/063150, loss: 0.073691, avg_loss: 0.180828 025220/063150, loss: 0.030669, avg_loss: 0.180801 025225/063150, loss: 0.188566, avg_loss: 0.180785 025230/063150, loss: 0.078658, avg_loss: 0.180763 025235/063150, loss: 0.030544, avg_loss: 0.180741 025240/063150, loss: 0.039595, avg_loss: 0.180711 025245/063150, loss: 0.078517, avg_loss: 0.180696 025250/063150, loss: 0.116549, avg_loss: 0.180675 025255/063150, loss: 0.110948, avg_loss: 0.180659 025260/063150, loss: 0.055206, avg_loss: 0.180637 025265/063150, loss: 0.070407, avg_loss: 0.180607 025270/063150, loss: 0.012311, avg_loss: 0.180581 025275/063150, loss: 0.039873, avg_loss: 0.180556 025280/063150, loss: 0.102752, avg_loss: 0.180532 025285/063150, loss: 0.016404, avg_loss: 0.180504 025290/063150, loss: 0.003273, avg_loss: 0.180473 025295/063150, loss: 0.027373, avg_loss: 0.180450 025300/063150, loss: 0.018549, avg_loss: 0.180423 025305/063150, loss: 0.008971, avg_loss: 0.180395 025310/063150, loss: 0.010499, avg_loss: 0.180370 025315/063150, loss: 0.254581, avg_loss: 0.180357 025320/063150, loss: 0.026328, avg_loss: 0.180330 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 25320/63150: {'accuracy': 0.8486238532110092} 025325/063150, loss: 0.036055, avg_loss: 0.180314 025330/063150, loss: 0.004353, avg_loss: 0.180284 025335/063150, loss: 0.126383, avg_loss: 0.180256 025340/063150, loss: 0.116422, avg_loss: 0.180232 025345/063150, loss: 0.004972, avg_loss: 0.180207 025350/063150, loss: 0.012679, avg_loss: 0.180176 025355/063150, loss: 0.151178, avg_loss: 0.180152 025360/063150, loss: 0.010965, avg_loss: 0.180131 025365/063150, loss: 0.015291, avg_loss: 0.180102 025370/063150, loss: 0.030493, avg_loss: 0.180078 025375/063150, loss: 0.038352, avg_loss: 0.180062 025380/063150, loss: 0.005015, avg_loss: 0.180040 025385/063150, loss: 0.010526, avg_loss: 0.180016 025390/063150, loss: 0.078440, avg_loss: 0.180003 025395/063150, loss: 0.078548, avg_loss: 0.179988 025400/063150, loss: 0.007958, avg_loss: 0.179958 025405/063150, loss: 0.025154, avg_loss: 0.179933 025410/063150, loss: 0.044686, avg_loss: 0.179909 025415/063150, loss: 0.026168, avg_loss: 0.179891 025420/063150, loss: 0.029578, avg_loss: 0.179861 025425/063150, loss: 0.098433, avg_loss: 0.179834 025430/063150, loss: 0.021918, avg_loss: 0.179806 025435/063150, loss: 0.064521, avg_loss: 0.179775 025440/063150, loss: 0.130319, avg_loss: 0.179748 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 25440/63150: {'accuracy': 0.8543577981651376} 025445/063150, loss: 0.043099, avg_loss: 0.179723 025450/063150, loss: 0.024163, avg_loss: 0.179695 025455/063150, loss: 0.105224, avg_loss: 0.179667 025460/063150, loss: 0.076689, avg_loss: 0.179642 025465/063150, loss: 0.107199, avg_loss: 0.179623 025470/063150, loss: 0.142201, avg_loss: 0.179604 025475/063150, loss: 0.003523, avg_loss: 0.179573 025480/063150, loss: 0.103005, avg_loss: 0.179549 025485/063150, loss: 0.044651, avg_loss: 0.179529 025490/063150, loss: 0.182101, avg_loss: 0.179506 025495/063150, loss: 0.055720, avg_loss: 0.179478 025500/063150, loss: 0.059569, avg_loss: 0.179448 025505/063150, loss: 0.024212, avg_loss: 0.179419 025510/063150, loss: 0.085435, avg_loss: 0.179397 025515/063150, loss: 0.002877, avg_loss: 0.179367 025520/063150, loss: 0.006330, avg_loss: 0.179341 025525/063150, loss: 0.127947, avg_loss: 0.179315 025530/063150, loss: 0.043959, avg_loss: 0.179293 025535/063150, loss: 0.103943, avg_loss: 0.179270 025540/063150, loss: 0.038381, avg_loss: 0.179246 025545/063150, loss: 0.030754, avg_loss: 0.179223 025550/063150, loss: 0.019015, avg_loss: 0.179196 025555/063150, loss: 0.054003, avg_loss: 0.179176 025560/063150, loss: 0.086202, avg_loss: 0.179151 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 25560/63150: {'accuracy': 0.8509174311926605} 025565/063150, loss: 0.123316, avg_loss: 0.179124 025570/063150, loss: 0.009997, avg_loss: 0.179091 025575/063150, loss: 0.034674, avg_loss: 0.179065 025580/063150, loss: 0.009962, avg_loss: 0.179040 025585/063150, loss: 0.007702, avg_loss: 0.179010 025590/063150, loss: 0.005024, avg_loss: 0.178986 025595/063150, loss: 0.006685, avg_loss: 0.178957 025600/063150, loss: 0.025698, avg_loss: 0.178924 025605/063150, loss: 0.007942, avg_loss: 0.178898 025610/063150, loss: 0.005722, avg_loss: 0.178867 025615/063150, loss: 0.067931, avg_loss: 0.178839 025620/063150, loss: 0.092502, avg_loss: 0.178821 025625/063150, loss: 0.130648, avg_loss: 0.178804 025630/063150, loss: 0.035525, avg_loss: 0.178778 025635/063150, loss: 0.149600, avg_loss: 0.178758 025640/063150, loss: 0.024876, avg_loss: 0.178734 025645/063150, loss: 0.061973, avg_loss: 0.178711 025650/063150, loss: 0.037176, avg_loss: 0.178682 025655/063150, loss: 0.010677, avg_loss: 0.178652 025660/063150, loss: 0.002081, avg_loss: 0.178623 025665/063150, loss: 0.016659, avg_loss: 0.178591 025670/063150, loss: 0.019940, avg_loss: 0.178560 025675/063150, loss: 0.012090, avg_loss: 0.178533 025680/063150, loss: 0.016379, avg_loss: 0.178518 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 25680/63150: {'accuracy': 0.8612385321100917} 025685/063150, loss: 0.009089, avg_loss: 0.178490 025690/063150, loss: 0.002533, avg_loss: 0.178470 025695/063150, loss: 0.049635, avg_loss: 0.178446 025700/063150, loss: 0.040922, avg_loss: 0.178421 025705/063150, loss: 0.004225, avg_loss: 0.178393 025710/063150, loss: 0.022018, avg_loss: 0.178362 025715/063150, loss: 0.004992, avg_loss: 0.178340 025720/063150, loss: 0.002986, avg_loss: 0.178309 025725/063150, loss: 0.009355, avg_loss: 0.178278 025730/063150, loss: 0.040988, avg_loss: 0.178252 025735/063150, loss: 0.002253, avg_loss: 0.178220 025740/063150, loss: 0.106646, avg_loss: 0.178192 025745/063150, loss: 0.021209, avg_loss: 0.178172 025750/063150, loss: 0.004755, avg_loss: 0.178142 025755/063150, loss: 0.003120, avg_loss: 0.178109 025760/063150, loss: 0.041219, avg_loss: 0.178080 025765/063150, loss: 0.029819, avg_loss: 0.178055 025770/063150, loss: 0.027382, avg_loss: 0.178024 025775/063150, loss: 0.000718, avg_loss: 0.178000 025780/063150, loss: 0.001890, avg_loss: 0.177970 025785/063150, loss: 0.228049, avg_loss: 0.177954 025790/063150, loss: 0.008679, avg_loss: 0.177932 025795/063150, loss: 0.114314, avg_loss: 0.177907 025800/063150, loss: 0.236510, avg_loss: 0.177884 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 25800/63150: {'accuracy': 0.8612385321100917} 025805/063150, loss: 0.174846, avg_loss: 0.177865 025810/063150, loss: 0.006171, avg_loss: 0.177850 025815/063150, loss: 0.172056, avg_loss: 0.177826 025820/063150, loss: 0.071335, avg_loss: 0.177804 025825/063150, loss: 0.007823, avg_loss: 0.177780 025830/063150, loss: 0.071160, avg_loss: 0.177766 025835/063150, loss: 0.005636, avg_loss: 0.177753 025840/063150, loss: 0.028840, avg_loss: 0.177725 025845/063150, loss: 0.025217, avg_loss: 0.177706 025850/063150, loss: 0.110818, avg_loss: 0.177686 025855/063150, loss: 0.024595, avg_loss: 0.177659 025860/063150, loss: 0.040332, avg_loss: 0.177631 025865/063150, loss: 0.023318, avg_loss: 0.177601 025870/063150, loss: 0.031631, avg_loss: 0.177573 025875/063150, loss: 0.040223, avg_loss: 0.177556 025880/063150, loss: 0.056486, avg_loss: 0.177525 025885/063150, loss: 0.009252, avg_loss: 0.177499 025890/063150, loss: 0.029961, avg_loss: 0.177472 025895/063150, loss: 0.004423, avg_loss: 0.177442 025900/063150, loss: 0.057323, avg_loss: 0.177419 025905/063150, loss: 0.038005, avg_loss: 0.177394 025910/063150, loss: 0.014551, avg_loss: 0.177364 025915/063150, loss: 0.002359, avg_loss: 0.177336 025920/063150, loss: 0.031817, avg_loss: 0.177307 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 25920/63150: {'accuracy': 0.8612385321100917} 025925/063150, loss: 0.051613, avg_loss: 0.177281 025930/063150, loss: 0.011287, avg_loss: 0.177255 025935/063150, loss: 0.020584, avg_loss: 0.177228 025940/063150, loss: 0.006318, avg_loss: 0.177202 025945/063150, loss: 0.019082, avg_loss: 0.177176 025950/063150, loss: 0.041726, avg_loss: 0.177153 025955/063150, loss: 0.010348, avg_loss: 0.177124 025960/063150, loss: 0.003156, avg_loss: 0.177097 025965/063150, loss: 0.008176, avg_loss: 0.177074 025970/063150, loss: 0.039437, avg_loss: 0.177045 025975/063150, loss: 0.315310, avg_loss: 0.177033 025980/063150, loss: 0.002024, avg_loss: 0.177004 025985/063150, loss: 0.009122, avg_loss: 0.176978 025990/063150, loss: 0.022559, avg_loss: 0.176953 025995/063150, loss: 0.014303, avg_loss: 0.176927 026000/063150, loss: 0.040606, avg_loss: 0.176900 026005/063150, loss: 0.164569, avg_loss: 0.176880 026010/063150, loss: 0.003887, avg_loss: 0.176848 026015/063150, loss: 0.008937, avg_loss: 0.176822 026020/063150, loss: 0.150807, avg_loss: 0.176802 026025/063150, loss: 0.107966, avg_loss: 0.176776 026030/063150, loss: 0.096582, avg_loss: 0.176755 026035/063150, loss: 0.203925, avg_loss: 0.176733 026040/063150, loss: 0.177352, avg_loss: 0.176720 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 26040/63150: {'accuracy': 0.8635321100917431} 026045/063150, loss: 0.008895, avg_loss: 0.176698 026050/063150, loss: 0.019497, avg_loss: 0.176678 026055/063150, loss: 0.084837, avg_loss: 0.176656 026060/063150, loss: 0.008363, avg_loss: 0.176636 026065/063150, loss: 0.025034, avg_loss: 0.176612 026070/063150, loss: 0.168590, avg_loss: 0.176592 026075/063150, loss: 0.054124, avg_loss: 0.176569 026080/063150, loss: 0.113294, avg_loss: 0.176542 026085/063150, loss: 0.080666, avg_loss: 0.176518 026090/063150, loss: 0.038165, avg_loss: 0.176495 026095/063150, loss: 0.077737, avg_loss: 0.176469 026100/063150, loss: 0.039365, avg_loss: 0.176449 026105/063150, loss: 0.047292, avg_loss: 0.176424 026110/063150, loss: 0.007632, avg_loss: 0.176403 026115/063150, loss: 0.015617, avg_loss: 0.176373 026120/063150, loss: 0.024119, avg_loss: 0.176355 026125/063150, loss: 0.015245, avg_loss: 0.176325 026130/063150, loss: 0.022136, avg_loss: 0.176296 026135/063150, loss: 0.184966, avg_loss: 0.176278 026140/063150, loss: 0.052855, avg_loss: 0.176252 026145/063150, loss: 0.201355, avg_loss: 0.176238 026150/063150, loss: 0.029181, avg_loss: 0.176213 026155/063150, loss: 0.034329, avg_loss: 0.176187 026160/063150, loss: 0.010293, avg_loss: 0.176166 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 26160/63150: {'accuracy': 0.8509174311926605} 026165/063150, loss: 0.019409, avg_loss: 0.176137 026170/063150, loss: 0.197464, avg_loss: 0.176113 026175/063150, loss: 0.109693, avg_loss: 0.176089 026180/063150, loss: 0.079245, avg_loss: 0.176066 026185/063150, loss: 0.031555, avg_loss: 0.176049 026190/063150, loss: 0.045944, avg_loss: 0.176023 026195/063150, loss: 0.049008, avg_loss: 0.175998 026200/063150, loss: 0.011719, avg_loss: 0.175971 026205/063150, loss: 0.071017, avg_loss: 0.175945 026210/063150, loss: 0.031797, avg_loss: 0.175921 026215/063150, loss: 0.048544, avg_loss: 0.175893 026220/063150, loss: 0.020856, avg_loss: 0.175870 026225/063150, loss: 0.005539, avg_loss: 0.175843 026230/063150, loss: 0.244267, avg_loss: 0.175824 026235/063150, loss: 0.003661, avg_loss: 0.175799 026240/063150, loss: 0.050767, avg_loss: 0.175776 026245/063150, loss: 0.039586, avg_loss: 0.175753 026250/063150, loss: 0.015857, avg_loss: 0.175737 026255/063150, loss: 0.030485, avg_loss: 0.175717 026260/063150, loss: 0.030583, avg_loss: 0.175700 026265/063150, loss: 0.116681, avg_loss: 0.175674 026270/063150, loss: 0.072904, avg_loss: 0.175660 026275/063150, loss: 0.133219, avg_loss: 0.175638 026280/063150, loss: 0.034455, avg_loss: 0.175623 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 26280/63150: {'accuracy': 0.8612385321100917} 026285/063150, loss: 0.018311, avg_loss: 0.175605 026290/063150, loss: 0.038477, avg_loss: 0.175580 026295/063150, loss: 0.015998, avg_loss: 0.175551 026300/063150, loss: 0.109865, avg_loss: 0.175525 026305/063150, loss: 0.035125, avg_loss: 0.175497 026310/063150, loss: 0.007492, avg_loss: 0.175474 026315/063150, loss: 0.133003, avg_loss: 0.175452 026320/063150, loss: 0.038199, avg_loss: 0.175429 026325/063150, loss: 0.061738, avg_loss: 0.175407 026330/063150, loss: 0.022608, avg_loss: 0.175381 026335/063150, loss: 0.058620, avg_loss: 0.175353 026340/063150, loss: 0.079079, avg_loss: 0.175332 026345/063150, loss: 0.118646, avg_loss: 0.175315 026350/063150, loss: 0.004813, avg_loss: 0.175296 026355/063150, loss: 0.061195, avg_loss: 0.175268 026360/063150, loss: 0.016608, avg_loss: 0.175243 026365/063150, loss: 0.101748, avg_loss: 0.175218 026370/063150, loss: 0.087435, avg_loss: 0.175193 026375/063150, loss: 0.064701, avg_loss: 0.175166 026380/063150, loss: 0.044360, avg_loss: 0.175139 026385/063150, loss: 0.013221, avg_loss: 0.175112 026390/063150, loss: 0.021608, avg_loss: 0.175096 026395/063150, loss: 0.004493, avg_loss: 0.175071 026400/063150, loss: 0.055042, avg_loss: 0.175055 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 26400/63150: {'accuracy': 0.8635321100917431} 026405/063150, loss: 0.016765, avg_loss: 0.175032 026410/063150, loss: 0.031683, avg_loss: 0.175003 026415/063150, loss: 0.165354, avg_loss: 0.174981 026420/063150, loss: 0.094164, avg_loss: 0.174959 026425/063150, loss: 0.073218, avg_loss: 0.174932 026430/063150, loss: 0.033795, avg_loss: 0.174914 026435/063150, loss: 0.012268, avg_loss: 0.174893 026440/063150, loss: 0.021147, avg_loss: 0.174872 026445/063150, loss: 0.052250, avg_loss: 0.174843 026450/063150, loss: 0.093198, avg_loss: 0.174832 026455/063150, loss: 0.016378, avg_loss: 0.174802 026460/063150, loss: 0.033599, avg_loss: 0.174780 026465/063150, loss: 0.044320, avg_loss: 0.174756 026470/063150, loss: 0.030520, avg_loss: 0.174735 026475/063150, loss: 0.031806, avg_loss: 0.174711 026480/063150, loss: 0.013599, avg_loss: 0.174689 026485/063150, loss: 0.019508, avg_loss: 0.174670 026490/063150, loss: 0.029296, avg_loss: 0.174652 026495/063150, loss: 0.040407, avg_loss: 0.174631 026500/063150, loss: 0.033737, avg_loss: 0.174613 026505/063150, loss: 0.140205, avg_loss: 0.174595 026510/063150, loss: 0.092215, avg_loss: 0.174573 026515/063150, loss: 0.079246, avg_loss: 0.174550 026520/063150, loss: 0.006094, avg_loss: 0.174526 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 26520/63150: {'accuracy': 0.8577981651376146} 026525/063150, loss: 0.020210, avg_loss: 0.174500 026530/063150, loss: 0.107439, avg_loss: 0.174481 026535/063150, loss: 0.005114, avg_loss: 0.174455 026540/063150, loss: 0.027660, avg_loss: 0.174429 026545/063150, loss: 0.013018, avg_loss: 0.174404 026550/063150, loss: 0.037231, avg_loss: 0.174377 026555/063150, loss: 0.004058, avg_loss: 0.174357 026560/063150, loss: 0.078024, avg_loss: 0.174340 026565/063150, loss: 0.022878, avg_loss: 0.174328 026570/063150, loss: 0.143254, avg_loss: 0.174313 026575/063150, loss: 0.148537, avg_loss: 0.174300 026580/063150, loss: 0.072872, avg_loss: 0.174283 026585/063150, loss: 0.007272, avg_loss: 0.174253 026590/063150, loss: 0.177293, avg_loss: 0.174237 026595/063150, loss: 0.093959, avg_loss: 0.174211 026600/063150, loss: 0.062901, avg_loss: 0.174188 026605/063150, loss: 0.054010, avg_loss: 0.174164 026610/063150, loss: 0.042959, avg_loss: 0.174138 026615/063150, loss: 0.025157, avg_loss: 0.174113 026620/063150, loss: 0.014808, avg_loss: 0.174085 026625/063150, loss: 0.060873, avg_loss: 0.174063 026630/063150, loss: 0.008438, avg_loss: 0.174035 026635/063150, loss: 0.012551, avg_loss: 0.174008 026640/063150, loss: 0.165363, avg_loss: 0.173985 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 26640/63150: {'accuracy': 0.8577981651376146} 026645/063150, loss: 0.009919, avg_loss: 0.173956 026650/063150, loss: 0.051908, avg_loss: 0.173928 026655/063150, loss: 0.059693, avg_loss: 0.173905 026660/063150, loss: 0.224565, avg_loss: 0.173887 026665/063150, loss: 0.006085, avg_loss: 0.173869 026670/063150, loss: 0.062619, avg_loss: 0.173849 026675/063150, loss: 0.005102, avg_loss: 0.173822 026680/063150, loss: 0.016544, avg_loss: 0.173797 026685/063150, loss: 0.005365, avg_loss: 0.173778 026690/063150, loss: 0.051824, avg_loss: 0.173750 026695/063150, loss: 0.009336, avg_loss: 0.173723 026700/063150, loss: 0.091660, avg_loss: 0.173699 026705/063150, loss: 0.002974, avg_loss: 0.173671 026710/063150, loss: 0.071114, avg_loss: 0.173645 026715/063150, loss: 0.005099, avg_loss: 0.173625 026720/063150, loss: 0.006074, avg_loss: 0.173600 026725/063150, loss: 0.235761, avg_loss: 0.173579 026730/063150, loss: 0.067065, avg_loss: 0.173554 026735/063150, loss: 0.048149, avg_loss: 0.173525 026740/063150, loss: 0.008848, avg_loss: 0.173502 026745/063150, loss: 0.008395, avg_loss: 0.173486 026750/063150, loss: 0.001538, avg_loss: 0.173458 026755/063150, loss: 0.257501, avg_loss: 0.173443 026760/063150, loss: 0.005566, avg_loss: 0.173416 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 26760/63150: {'accuracy': 0.8635321100917431} 026765/063150, loss: 0.007365, avg_loss: 0.173394 026770/063150, loss: 0.044137, avg_loss: 0.173372 026775/063150, loss: 0.018343, avg_loss: 0.173347 026780/063150, loss: 0.053034, avg_loss: 0.173332 026785/063150, loss: 0.054620, avg_loss: 0.173305 026790/063150, loss: 0.005234, avg_loss: 0.173291 026795/063150, loss: 0.041505, avg_loss: 0.173264 026800/063150, loss: 0.202493, avg_loss: 0.173246 026805/063150, loss: 0.135826, avg_loss: 0.173230 026810/063150, loss: 0.009055, avg_loss: 0.173206 026815/063150, loss: 0.053127, avg_loss: 0.173185 026820/063150, loss: 0.044213, avg_loss: 0.173169 026825/063150, loss: 0.015025, avg_loss: 0.173152 026830/063150, loss: 0.039702, avg_loss: 0.173128 026835/063150, loss: 0.090928, avg_loss: 0.173106 026840/063150, loss: 0.018924, avg_loss: 0.173086 026845/063150, loss: 0.024336, avg_loss: 0.173060 026850/063150, loss: 0.009836, avg_loss: 0.173032 026855/063150, loss: 0.377723, avg_loss: 0.173020 026860/063150, loss: 0.003201, avg_loss: 0.172993 026865/063150, loss: 0.029066, avg_loss: 0.172965 026870/063150, loss: 0.062505, avg_loss: 0.172944 026875/063150, loss: 0.034272, avg_loss: 0.172916 026880/063150, loss: 0.052026, avg_loss: 0.172897 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 26880/63150: {'accuracy': 0.8577981651376146} 026885/063150, loss: 0.011020, avg_loss: 0.172872 026890/063150, loss: 0.031410, avg_loss: 0.172845 026895/063150, loss: 0.020815, avg_loss: 0.172824 026900/063150, loss: 0.043787, avg_loss: 0.172807 026905/063150, loss: 0.020614, avg_loss: 0.172790 026910/063150, loss: 0.019679, avg_loss: 0.172770 026915/063150, loss: 0.084147, avg_loss: 0.172748 026920/063150, loss: 0.176122, avg_loss: 0.172740 026925/063150, loss: 0.066586, avg_loss: 0.172716 026930/063150, loss: 0.112169, avg_loss: 0.172700 026935/063150, loss: 0.010257, avg_loss: 0.172678 026940/063150, loss: 0.006340, avg_loss: 0.172651 026945/063150, loss: 0.025723, avg_loss: 0.172632 026950/063150, loss: 0.026241, avg_loss: 0.172612 026955/063150, loss: 0.018900, avg_loss: 0.172583 026960/063150, loss: 0.028388, avg_loss: 0.172564 026965/063150, loss: 0.056258, avg_loss: 0.172541 026970/063150, loss: 0.023552, avg_loss: 0.172518 026975/063150, loss: 0.047599, avg_loss: 0.172491 026980/063150, loss: 0.094918, avg_loss: 0.172469 026985/063150, loss: 0.007090, avg_loss: 0.172446 026990/063150, loss: 0.002823, avg_loss: 0.172421 026995/063150, loss: 0.149088, avg_loss: 0.172404 027000/063150, loss: 0.065625, avg_loss: 0.172385 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 27000/63150: {'accuracy': 0.8612385321100917} 027005/063150, loss: 0.052874, avg_loss: 0.172360 027010/063150, loss: 0.237368, avg_loss: 0.172346 027015/063150, loss: 0.026472, avg_loss: 0.172325 027020/063150, loss: 0.023582, avg_loss: 0.172299 027025/063150, loss: 0.056197, avg_loss: 0.172281 027030/063150, loss: 0.007635, avg_loss: 0.172254 027035/063150, loss: 0.025254, avg_loss: 0.172230 027040/063150, loss: 0.152112, avg_loss: 0.172212 027045/063150, loss: 0.026842, avg_loss: 0.172186 027050/063150, loss: 0.018775, avg_loss: 0.172157 027055/063150, loss: 0.040150, avg_loss: 0.172138 027060/063150, loss: 0.011728, avg_loss: 0.172114 027065/063150, loss: 0.055900, avg_loss: 0.172096 027070/063150, loss: 0.022907, avg_loss: 0.172080 027075/063150, loss: 0.102153, avg_loss: 0.172061 027080/063150, loss: 0.008250, avg_loss: 0.172038 027085/063150, loss: 0.011941, avg_loss: 0.172008 027090/063150, loss: 0.022448, avg_loss: 0.171987 027095/063150, loss: 0.009386, avg_loss: 0.171959 027100/063150, loss: 0.000937, avg_loss: 0.171934 027105/063150, loss: 0.006629, avg_loss: 0.171908 027110/063150, loss: 0.030608, avg_loss: 0.171887 027115/063150, loss: 0.008867, avg_loss: 0.171866 027120/063150, loss: 0.087148, avg_loss: 0.171844 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 27120/63150: {'accuracy': 0.8463302752293578} 027125/063150, loss: 0.027899, avg_loss: 0.171820 027130/063150, loss: 0.006329, avg_loss: 0.171791 027135/063150, loss: 0.166962, avg_loss: 0.171779 027140/063150, loss: 0.026513, avg_loss: 0.171756 027145/063150, loss: 0.005434, avg_loss: 0.171741 027150/063150, loss: 0.041131, avg_loss: 0.171718 027155/063150, loss: 0.009396, avg_loss: 0.171697 027160/063150, loss: 0.019303, avg_loss: 0.171671 027165/063150, loss: 0.069126, avg_loss: 0.171648 027170/063150, loss: 0.166649, avg_loss: 0.171640 027175/063150, loss: 0.022423, avg_loss: 0.171613 027180/063150, loss: 0.035379, avg_loss: 0.171590 027185/063150, loss: 0.061413, avg_loss: 0.171565 027190/063150, loss: 0.032296, avg_loss: 0.171538 027195/063150, loss: 0.002724, avg_loss: 0.171509 027200/063150, loss: 0.035373, avg_loss: 0.171488 027205/063150, loss: 0.005380, avg_loss: 0.171467 027210/063150, loss: 0.030348, avg_loss: 0.171445 027215/063150, loss: 0.092380, avg_loss: 0.171420 027220/063150, loss: 0.003972, avg_loss: 0.171396 027225/063150, loss: 0.003893, avg_loss: 0.171378 027230/063150, loss: 0.055151, avg_loss: 0.171357 027235/063150, loss: 0.018740, avg_loss: 0.171328 027240/063150, loss: 0.069493, avg_loss: 0.171309 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 27240/63150: {'accuracy': 0.8612385321100917} 027245/063150, loss: 0.066344, avg_loss: 0.171284 027250/063150, loss: 0.052729, avg_loss: 0.171269 027255/063150, loss: 0.053928, avg_loss: 0.171245 027260/063150, loss: 0.038789, avg_loss: 0.171220 027265/063150, loss: 0.010700, avg_loss: 0.171196 027270/063150, loss: 0.051406, avg_loss: 0.171171 027275/063150, loss: 0.131780, avg_loss: 0.171155 027280/063150, loss: 0.040621, avg_loss: 0.171136 027285/063150, loss: 0.061473, avg_loss: 0.171118 027290/063150, loss: 0.002834, avg_loss: 0.171097 027295/063150, loss: 0.043497, avg_loss: 0.171077 027300/063150, loss: 0.051575, avg_loss: 0.171057 027305/063150, loss: 0.017404, avg_loss: 0.171034 027310/063150, loss: 0.027702, avg_loss: 0.171013 027315/063150, loss: 0.204149, avg_loss: 0.170995 027320/063150, loss: 0.051431, avg_loss: 0.170973 027325/063150, loss: 0.009916, avg_loss: 0.170952 027330/063150, loss: 0.003211, avg_loss: 0.170930 027335/063150, loss: 0.037592, avg_loss: 0.170912 027340/063150, loss: 0.071714, avg_loss: 0.170886 027345/063150, loss: 0.119424, avg_loss: 0.170870 027350/063150, loss: 0.083736, avg_loss: 0.170852 027355/063150, loss: 0.017677, avg_loss: 0.170826 027360/063150, loss: 0.023233, avg_loss: 0.170800 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 12, step 27360/63150: {'accuracy': 0.8577981651376146} 027365/063150, loss: 0.022504, avg_loss: 0.170783 027370/063150, loss: 0.005160, avg_loss: 0.170758 027375/063150, loss: 0.086048, avg_loss: 0.170735 027380/063150, loss: 0.020944, avg_loss: 0.170716 027385/063150, loss: 0.261572, avg_loss: 0.170703 027390/063150, loss: 0.043341, avg_loss: 0.170681 027395/063150, loss: 0.040651, avg_loss: 0.170658 027400/063150, loss: 0.237097, avg_loss: 0.170644 027405/063150, loss: 0.016556, avg_loss: 0.170620 027410/063150, loss: 0.012082, avg_loss: 0.170597 027415/063150, loss: 0.065450, avg_loss: 0.170575 027420/063150, loss: 0.225460, avg_loss: 0.170560 027425/063150, loss: 0.007740, avg_loss: 0.170533 027430/063150, loss: 0.009014, avg_loss: 0.170510 027435/063150, loss: 0.084257, avg_loss: 0.170485 027440/063150, loss: 0.198444, avg_loss: 0.170463 027445/063150, loss: 0.010752, avg_loss: 0.170435 027450/063150, loss: 0.041156, avg_loss: 0.170411 027455/063150, loss: 0.032650, avg_loss: 0.170397 027460/063150, loss: 0.008656, avg_loss: 0.170372 027465/063150, loss: 0.005737, avg_loss: 0.170349 027470/063150, loss: 0.248669, avg_loss: 0.170332 027475/063150, loss: 0.012696, avg_loss: 0.170310 027480/063150, loss: 0.023002, avg_loss: 0.170283 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 27480/63150: {'accuracy': 0.8555045871559633} 027485/063150, loss: 0.045720, avg_loss: 0.170255 027490/063150, loss: 0.044354, avg_loss: 0.170237 027495/063150, loss: 0.003510, avg_loss: 0.170213 027500/063150, loss: 0.018427, avg_loss: 0.170185 027505/063150, loss: 0.046245, avg_loss: 0.170166 027510/063150, loss: 0.084126, avg_loss: 0.170140 027515/063150, loss: 0.023775, avg_loss: 0.170120 027520/063150, loss: 0.199156, avg_loss: 0.170101 027525/063150, loss: 0.052722, avg_loss: 0.170079 027530/063150, loss: 0.007972, avg_loss: 0.170051 027535/063150, loss: 0.022698, avg_loss: 0.170026 027540/063150, loss: 0.103719, avg_loss: 0.170006 027545/063150, loss: 0.116714, avg_loss: 0.169989 027550/063150, loss: 0.037627, avg_loss: 0.169964 027555/063150, loss: 0.004612, avg_loss: 0.169939 027560/063150, loss: 0.008760, avg_loss: 0.169914 027565/063150, loss: 0.006600, avg_loss: 0.169892 027570/063150, loss: 0.151038, avg_loss: 0.169868 027575/063150, loss: 0.034564, avg_loss: 0.169844 027580/063150, loss: 0.011232, avg_loss: 0.169819 027585/063150, loss: 0.224483, avg_loss: 0.169803 027590/063150, loss: 0.017308, avg_loss: 0.169776 027595/063150, loss: 0.060893, avg_loss: 0.169750 027600/063150, loss: 0.076629, avg_loss: 0.169733 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 27600/63150: {'accuracy': 0.8646788990825688} 027605/063150, loss: 0.033323, avg_loss: 0.169706 027610/063150, loss: 0.008787, avg_loss: 0.169681 027615/063150, loss: 0.013909, avg_loss: 0.169653 027620/063150, loss: 0.031686, avg_loss: 0.169628 027625/063150, loss: 0.024723, avg_loss: 0.169617 027630/063150, loss: 0.003257, avg_loss: 0.169591 027635/063150, loss: 0.001370, avg_loss: 0.169571 027640/063150, loss: 0.032039, avg_loss: 0.169543 027645/063150, loss: 0.007934, avg_loss: 0.169523 027650/063150, loss: 0.015324, avg_loss: 0.169496 027655/063150, loss: 0.017464, avg_loss: 0.169470 027660/063150, loss: 0.014664, avg_loss: 0.169441 027665/063150, loss: 0.192293, avg_loss: 0.169420 027670/063150, loss: 0.009042, avg_loss: 0.169392 027675/063150, loss: 0.001605, avg_loss: 0.169366 027680/063150, loss: 0.003103, avg_loss: 0.169341 027685/063150, loss: 0.042236, avg_loss: 0.169321 027690/063150, loss: 0.006726, avg_loss: 0.169298 027695/063150, loss: 0.015539, avg_loss: 0.169280 027700/063150, loss: 0.010040, avg_loss: 0.169253 027705/063150, loss: 0.017646, avg_loss: 0.169233 027710/063150, loss: 0.094200, avg_loss: 0.169210 027715/063150, loss: 0.041670, avg_loss: 0.169186 027720/063150, loss: 0.004672, avg_loss: 0.169161 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 27720/63150: {'accuracy': 0.8222477064220184} 027725/063150, loss: 0.023786, avg_loss: 0.169144 027730/063150, loss: 0.002828, avg_loss: 0.169120 027735/063150, loss: 0.127803, avg_loss: 0.169105 027740/063150, loss: 0.217781, avg_loss: 0.169087 027745/063150, loss: 0.201857, avg_loss: 0.169071 027750/063150, loss: 0.022577, avg_loss: 0.169048 027755/063150, loss: 0.016370, avg_loss: 0.169022 027760/063150, loss: 0.029963, avg_loss: 0.169001 027765/063150, loss: 0.107884, avg_loss: 0.168980 027770/063150, loss: 0.018045, avg_loss: 0.168954 027775/063150, loss: 0.048940, avg_loss: 0.168938 027780/063150, loss: 0.024265, avg_loss: 0.168919 027785/063150, loss: 0.003584, avg_loss: 0.168897 027790/063150, loss: 0.041260, avg_loss: 0.168873 027795/063150, loss: 0.008950, avg_loss: 0.168848 027800/063150, loss: 0.002752, avg_loss: 0.168825 027805/063150, loss: 0.018023, avg_loss: 0.168799 027810/063150, loss: 0.048765, avg_loss: 0.168773 027815/063150, loss: 0.017355, avg_loss: 0.168757 027820/063150, loss: 0.106956, avg_loss: 0.168733 027825/063150, loss: 0.060166, avg_loss: 0.168709 027830/063150, loss: 0.001708, avg_loss: 0.168682 027835/063150, loss: 0.119290, avg_loss: 0.168664 027840/063150, loss: 0.031449, avg_loss: 0.168646 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 27840/63150: {'accuracy': 0.8532110091743119} 027845/063150, loss: 0.001824, avg_loss: 0.168620 027850/063150, loss: 0.010888, avg_loss: 0.168594 027855/063150, loss: 0.007230, avg_loss: 0.168566 027860/063150, loss: 0.047503, avg_loss: 0.168540 027865/063150, loss: 0.001900, avg_loss: 0.168512 027870/063150, loss: 0.004294, avg_loss: 0.168488 027875/063150, loss: 0.002686, avg_loss: 0.168466 027880/063150, loss: 0.137212, avg_loss: 0.168444 027885/063150, loss: 0.127088, avg_loss: 0.168423 027890/063150, loss: 0.098445, avg_loss: 0.168400 027895/063150, loss: 0.009915, avg_loss: 0.168374 027900/063150, loss: 0.088546, avg_loss: 0.168352 027905/063150, loss: 0.001617, avg_loss: 0.168326 027910/063150, loss: 0.010013, avg_loss: 0.168303 027915/063150, loss: 0.003222, avg_loss: 0.168275 027920/063150, loss: 0.146089, avg_loss: 0.168251 027925/063150, loss: 0.004163, avg_loss: 0.168233 027930/063150, loss: 0.087446, avg_loss: 0.168216 027935/063150, loss: 0.016721, avg_loss: 0.168191 027940/063150, loss: 0.012104, avg_loss: 0.168167 027945/063150, loss: 0.007010, avg_loss: 0.168144 027950/063150, loss: 0.032185, avg_loss: 0.168118 027955/063150, loss: 0.064780, avg_loss: 0.168094 027960/063150, loss: 0.001886, avg_loss: 0.168080 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 27960/63150: {'accuracy': 0.8600917431192661} 027965/063150, loss: 0.052768, avg_loss: 0.168053 027970/063150, loss: 0.001272, avg_loss: 0.168030 027975/063150, loss: 0.001667, avg_loss: 0.168010 027980/063150, loss: 0.173975, avg_loss: 0.167990 027985/063150, loss: 0.191033, avg_loss: 0.167973 027990/063150, loss: 0.076750, avg_loss: 0.167948 027995/063150, loss: 0.063395, avg_loss: 0.167924 028000/063150, loss: 0.247758, avg_loss: 0.167908 028005/063150, loss: 0.043394, avg_loss: 0.167892 028010/063150, loss: 0.068048, avg_loss: 0.167867 028015/063150, loss: 0.000950, avg_loss: 0.167839 028020/063150, loss: 0.023803, avg_loss: 0.167811 028025/063150, loss: 0.069647, avg_loss: 0.167791 028030/063150, loss: 0.006179, avg_loss: 0.167763 028035/063150, loss: 0.149412, avg_loss: 0.167753 028040/063150, loss: 0.260913, avg_loss: 0.167740 028045/063150, loss: 0.007112, avg_loss: 0.167721 028050/063150, loss: 0.062662, avg_loss: 0.167707 028055/063150, loss: 0.033384, avg_loss: 0.167686 028060/063150, loss: 0.086192, avg_loss: 0.167667 028065/063150, loss: 0.055420, avg_loss: 0.167645 028070/063150, loss: 0.175912, avg_loss: 0.167633 028075/063150, loss: 0.007357, avg_loss: 0.167605 028080/063150, loss: 0.016570, avg_loss: 0.167579 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 28080/63150: {'accuracy': 0.8589449541284404} 028085/063150, loss: 0.050216, avg_loss: 0.167556 028090/063150, loss: 0.010988, avg_loss: 0.167535 028095/063150, loss: 0.004236, avg_loss: 0.167509 028100/063150, loss: 0.006510, avg_loss: 0.167486 028105/063150, loss: 0.069831, avg_loss: 0.167465 028110/063150, loss: 0.004723, avg_loss: 0.167440 028115/063150, loss: 0.019658, avg_loss: 0.167421 028120/063150, loss: 0.027906, avg_loss: 0.167402 028125/063150, loss: 0.011655, avg_loss: 0.167375 028130/063150, loss: 0.024306, avg_loss: 0.167353 028135/063150, loss: 0.039729, avg_loss: 0.167331 028140/063150, loss: 0.011833, avg_loss: 0.167309 028145/063150, loss: 0.037030, avg_loss: 0.167284 028150/063150, loss: 0.002287, avg_loss: 0.167263 028155/063150, loss: 0.089799, avg_loss: 0.167243 028160/063150, loss: 0.016498, avg_loss: 0.167231 028165/063150, loss: 0.033099, avg_loss: 0.167208 028170/063150, loss: 0.122826, avg_loss: 0.167194 028175/063150, loss: 0.067804, avg_loss: 0.167171 028180/063150, loss: 0.017962, avg_loss: 0.167146 028185/063150, loss: 0.066817, avg_loss: 0.167137 028190/063150, loss: 0.023980, avg_loss: 0.167111 028195/063150, loss: 0.098249, avg_loss: 0.167089 028200/063150, loss: 0.001516, avg_loss: 0.167078 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 28200/63150: {'accuracy': 0.8612385321100917} 028205/063150, loss: 0.015147, avg_loss: 0.167057 028210/063150, loss: 0.006562, avg_loss: 0.167035 028215/063150, loss: 0.138221, avg_loss: 0.167022 028220/063150, loss: 0.012262, avg_loss: 0.167001 028225/063150, loss: 0.030977, avg_loss: 0.166985 028230/063150, loss: 0.002671, avg_loss: 0.166961 028235/063150, loss: 0.032277, avg_loss: 0.166940 028240/063150, loss: 0.029787, avg_loss: 0.166913 028245/063150, loss: 0.017316, avg_loss: 0.166898 028250/063150, loss: 0.053205, avg_loss: 0.166874 028255/063150, loss: 0.101664, avg_loss: 0.166850 028260/063150, loss: 0.041728, avg_loss: 0.166826 028265/063150, loss: 0.066572, avg_loss: 0.166806 028270/063150, loss: 0.029979, avg_loss: 0.166798 028275/063150, loss: 0.007725, avg_loss: 0.166772 028280/063150, loss: 0.004476, avg_loss: 0.166745 028285/063150, loss: 0.067404, avg_loss: 0.166722 028290/063150, loss: 0.024729, avg_loss: 0.166700 028295/063150, loss: 0.058260, avg_loss: 0.166675 028300/063150, loss: 0.010539, avg_loss: 0.166662 028305/063150, loss: 0.135680, avg_loss: 0.166647 028310/063150, loss: 0.011801, avg_loss: 0.166627 028315/063150, loss: 0.020173, avg_loss: 0.166604 028320/063150, loss: 0.009742, avg_loss: 0.166579 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 28320/63150: {'accuracy': 0.8520642201834863} 028325/063150, loss: 0.025849, avg_loss: 0.166559 028330/063150, loss: 0.084111, avg_loss: 0.166537 028335/063150, loss: 0.011020, avg_loss: 0.166515 028340/063150, loss: 0.001050, avg_loss: 0.166488 028345/063150, loss: 0.012184, avg_loss: 0.166466 028350/063150, loss: 0.021985, avg_loss: 0.166443 028355/063150, loss: 0.015199, avg_loss: 0.166434 028360/063150, loss: 0.002518, avg_loss: 0.166408 028365/063150, loss: 0.036101, avg_loss: 0.166390 028370/063150, loss: 0.006141, avg_loss: 0.166369 028375/063150, loss: 0.021799, avg_loss: 0.166348 028380/063150, loss: 0.070193, avg_loss: 0.166327 028385/063150, loss: 0.017686, avg_loss: 0.166304 028390/063150, loss: 0.026984, avg_loss: 0.166283 028395/063150, loss: 0.010160, avg_loss: 0.166255 028400/063150, loss: 0.007895, avg_loss: 0.166241 028405/063150, loss: 0.059838, avg_loss: 0.166217 028410/063150, loss: 0.058588, avg_loss: 0.166193 028415/063150, loss: 0.031011, avg_loss: 0.166167 028420/063150, loss: 0.005845, avg_loss: 0.166151 028425/063150, loss: 0.050391, avg_loss: 0.166127 028430/063150, loss: 0.016239, avg_loss: 0.166103 028435/063150, loss: 0.070611, avg_loss: 0.166085 028440/063150, loss: 0.010411, avg_loss: 0.166067 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 28440/63150: {'accuracy': 0.856651376146789} 028445/063150, loss: 0.016816, avg_loss: 0.166051 028450/063150, loss: 0.007382, avg_loss: 0.166030 028455/063150, loss: 0.008736, avg_loss: 0.166006 028460/063150, loss: 0.009211, avg_loss: 0.165983 028465/063150, loss: 0.011259, avg_loss: 0.165960 028470/063150, loss: 0.018984, avg_loss: 0.165934 028475/063150, loss: 0.009329, avg_loss: 0.165914 028480/063150, loss: 0.020577, avg_loss: 0.165892 028485/063150, loss: 0.223059, avg_loss: 0.165872 028490/063150, loss: 0.002591, avg_loss: 0.165853 028495/063150, loss: 0.004926, avg_loss: 0.165828 028500/063150, loss: 0.004833, avg_loss: 0.165803 028505/063150, loss: 0.058992, avg_loss: 0.165787 028510/063150, loss: 0.040001, avg_loss: 0.165763 028515/063150, loss: 0.126455, avg_loss: 0.165740 028520/063150, loss: 0.021724, avg_loss: 0.165717 028525/063150, loss: 0.026139, avg_loss: 0.165706 028530/063150, loss: 0.129199, avg_loss: 0.165685 028535/063150, loss: 0.006899, avg_loss: 0.165662 028540/063150, loss: 0.021979, avg_loss: 0.165639 028545/063150, loss: 0.008334, avg_loss: 0.165617 028550/063150, loss: 0.195612, avg_loss: 0.165598 028555/063150, loss: 0.004226, avg_loss: 0.165571 028560/063150, loss: 0.025035, avg_loss: 0.165545 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 28560/63150: {'accuracy': 0.8577981651376146} 028565/063150, loss: 0.027609, avg_loss: 0.165520 028570/063150, loss: 0.004033, avg_loss: 0.165496 028575/063150, loss: 0.050360, avg_loss: 0.165476 028580/063150, loss: 0.014290, avg_loss: 0.165455 028585/063150, loss: 0.235777, avg_loss: 0.165436 028590/063150, loss: 0.025435, avg_loss: 0.165420 028595/063150, loss: 0.005397, avg_loss: 0.165393 028600/063150, loss: 0.117717, avg_loss: 0.165375 028605/063150, loss: 0.002037, avg_loss: 0.165353 028610/063150, loss: 0.254127, avg_loss: 0.165336 028615/063150, loss: 0.002838, avg_loss: 0.165313 028620/063150, loss: 0.095126, avg_loss: 0.165302 028625/063150, loss: 0.022436, avg_loss: 0.165277 028630/063150, loss: 0.092213, avg_loss: 0.165263 028635/063150, loss: 0.104061, avg_loss: 0.165247 028640/063150, loss: 0.017404, avg_loss: 0.165228 028645/063150, loss: 0.017034, avg_loss: 0.165212 028650/063150, loss: 0.043462, avg_loss: 0.165200 028655/063150, loss: 0.010030, avg_loss: 0.165176 028660/063150, loss: 0.040578, avg_loss: 0.165153 028665/063150, loss: 0.094064, avg_loss: 0.165130 028670/063150, loss: 0.025566, avg_loss: 0.165115 028675/063150, loss: 0.048855, avg_loss: 0.165093 028680/063150, loss: 0.022516, avg_loss: 0.165068 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 28680/63150: {'accuracy': 0.8532110091743119} 028685/063150, loss: 0.001987, avg_loss: 0.165042 028690/063150, loss: 0.036959, avg_loss: 0.165025 028695/063150, loss: 0.153130, avg_loss: 0.165011 028700/063150, loss: 0.072899, avg_loss: 0.164991 028705/063150, loss: 0.058796, avg_loss: 0.164971 028710/063150, loss: 0.013077, avg_loss: 0.164954 028715/063150, loss: 0.009310, avg_loss: 0.164932 028720/063150, loss: 0.111735, avg_loss: 0.164909 028725/063150, loss: 0.034554, avg_loss: 0.164886 028730/063150, loss: 0.075338, avg_loss: 0.164869 028735/063150, loss: 0.016939, avg_loss: 0.164849 028740/063150, loss: 0.016666, avg_loss: 0.164827 028745/063150, loss: 0.130433, avg_loss: 0.164807 028750/063150, loss: 0.019661, avg_loss: 0.164782 028755/063150, loss: 0.025044, avg_loss: 0.164763 028760/063150, loss: 0.006909, avg_loss: 0.164737 028765/063150, loss: 0.008856, avg_loss: 0.164714 028770/063150, loss: 0.006263, avg_loss: 0.164697 028775/063150, loss: 0.006302, avg_loss: 0.164674 028780/063150, loss: 0.100488, avg_loss: 0.164652 028785/063150, loss: 0.048350, avg_loss: 0.164637 028790/063150, loss: 0.008412, avg_loss: 0.164616 028795/063150, loss: 0.071850, avg_loss: 0.164593 028800/063150, loss: 0.001472, avg_loss: 0.164566 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 28800/63150: {'accuracy': 0.8509174311926605} 028805/063150, loss: 0.023727, avg_loss: 0.164541 028810/063150, loss: 0.032263, avg_loss: 0.164523 028815/063150, loss: 0.003074, avg_loss: 0.164503 028820/063150, loss: 0.006534, avg_loss: 0.164480 028825/063150, loss: 0.007323, avg_loss: 0.164460 028830/063150, loss: 0.003284, avg_loss: 0.164445 028835/063150, loss: 0.062052, avg_loss: 0.164433 028840/063150, loss: 0.100321, avg_loss: 0.164413 028845/063150, loss: 0.022897, avg_loss: 0.164394 028850/063150, loss: 0.005485, avg_loss: 0.164375 028855/063150, loss: 0.062385, avg_loss: 0.164369 028860/063150, loss: 0.010690, avg_loss: 0.164346 028865/063150, loss: 0.011008, avg_loss: 0.164321 028870/063150, loss: 0.003571, avg_loss: 0.164306 028875/063150, loss: 0.030304, avg_loss: 0.164284 028880/063150, loss: 0.063094, avg_loss: 0.164268 028885/063150, loss: 0.132826, avg_loss: 0.164255 028890/063150, loss: 0.015198, avg_loss: 0.164232 028895/063150, loss: 0.269952, avg_loss: 0.164218 028900/063150, loss: 0.025004, avg_loss: 0.164201 028905/063150, loss: 0.029775, avg_loss: 0.164180 028910/063150, loss: 0.044002, avg_loss: 0.164157 028915/063150, loss: 0.042488, avg_loss: 0.164139 028920/063150, loss: 0.054601, avg_loss: 0.164117 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 28920/63150: {'accuracy': 0.8497706422018348} 028925/063150, loss: 0.025257, avg_loss: 0.164104 028930/063150, loss: 0.036359, avg_loss: 0.164080 028935/063150, loss: 0.026186, avg_loss: 0.164059 028940/063150, loss: 0.058101, avg_loss: 0.164038 028945/063150, loss: 0.041011, avg_loss: 0.164020 028950/063150, loss: 0.075152, avg_loss: 0.164001 028955/063150, loss: 0.080023, avg_loss: 0.163983 028960/063150, loss: 0.060679, avg_loss: 0.163967 028965/063150, loss: 0.011518, avg_loss: 0.163947 028970/063150, loss: 0.114544, avg_loss: 0.163924 028975/063150, loss: 0.004298, avg_loss: 0.163897 028980/063150, loss: 0.030343, avg_loss: 0.163877 028985/063150, loss: 0.020442, avg_loss: 0.163853 028990/063150, loss: 0.016661, avg_loss: 0.163839 028995/063150, loss: 0.016685, avg_loss: 0.163814 029000/063150, loss: 0.006103, avg_loss: 0.163787 029005/063150, loss: 0.034401, avg_loss: 0.163762 029010/063150, loss: 0.139720, avg_loss: 0.163763 029015/063150, loss: 0.024141, avg_loss: 0.163741 029020/063150, loss: 0.013081, avg_loss: 0.163717 029025/063150, loss: 0.002962, avg_loss: 0.163698 029030/063150, loss: 0.061370, avg_loss: 0.163679 029035/063150, loss: 0.029041, avg_loss: 0.163655 029040/063150, loss: 0.026278, avg_loss: 0.163638 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 29040/63150: {'accuracy': 0.8646788990825688} 029045/063150, loss: 0.010010, avg_loss: 0.163617 029050/063150, loss: 0.183986, avg_loss: 0.163604 029055/063150, loss: 0.255172, avg_loss: 0.163589 029060/063150, loss: 0.015355, avg_loss: 0.163568 029065/063150, loss: 0.164820, avg_loss: 0.163550 029070/063150, loss: 0.054091, avg_loss: 0.163526 029075/063150, loss: 0.087639, avg_loss: 0.163505 029080/063150, loss: 0.040989, avg_loss: 0.163486 029085/063150, loss: 0.004775, avg_loss: 0.163464 029090/063150, loss: 0.042325, avg_loss: 0.163445 029095/063150, loss: 0.122965, avg_loss: 0.163424 029100/063150, loss: 0.043591, avg_loss: 0.163401 029105/063150, loss: 0.032209, avg_loss: 0.163377 029110/063150, loss: 0.051691, avg_loss: 0.163355 029115/063150, loss: 0.142066, avg_loss: 0.163339 029120/063150, loss: 0.039663, avg_loss: 0.163320 029125/063150, loss: 0.014815, avg_loss: 0.163298 029130/063150, loss: 0.065197, avg_loss: 0.163277 029135/063150, loss: 0.011303, avg_loss: 0.163263 029140/063150, loss: 0.076263, avg_loss: 0.163243 029145/063150, loss: 0.128071, avg_loss: 0.163221 029150/063150, loss: 0.015133, avg_loss: 0.163200 029155/063150, loss: 0.018022, avg_loss: 0.163188 029160/063150, loss: 0.017963, avg_loss: 0.163169 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 29160/63150: {'accuracy': 0.8646788990825688} 029165/063150, loss: 0.042886, avg_loss: 0.163149 029170/063150, loss: 0.005423, avg_loss: 0.163129 029175/063150, loss: 0.022309, avg_loss: 0.163107 029180/063150, loss: 0.008760, avg_loss: 0.163093 029185/063150, loss: 0.009481, avg_loss: 0.163072 029190/063150, loss: 0.007929, avg_loss: 0.163050 029195/063150, loss: 0.107301, avg_loss: 0.163036 029200/063150, loss: 0.022963, avg_loss: 0.163018 029205/063150, loss: 0.084667, avg_loss: 0.163000 029210/063150, loss: 0.051043, avg_loss: 0.162980 029215/063150, loss: 0.038937, avg_loss: 0.162958 029220/063150, loss: 0.033412, avg_loss: 0.162945 029225/063150, loss: 0.005927, avg_loss: 0.162920 029230/063150, loss: 0.027636, avg_loss: 0.162898 029235/063150, loss: 0.126428, avg_loss: 0.162883 029240/063150, loss: 0.039391, avg_loss: 0.162862 029245/063150, loss: 0.037687, avg_loss: 0.162839 029250/063150, loss: 0.134654, avg_loss: 0.162820 029255/063150, loss: 0.003597, avg_loss: 0.162794 029260/063150, loss: 0.071330, avg_loss: 0.162777 029265/063150, loss: 0.106110, avg_loss: 0.162755 029270/063150, loss: 0.133331, avg_loss: 0.162739 029275/063150, loss: 0.019947, avg_loss: 0.162718 029280/063150, loss: 0.001610, avg_loss: 0.162702 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 29280/63150: {'accuracy': 0.8543577981651376} 029285/063150, loss: 0.012514, avg_loss: 0.162681 029290/063150, loss: 0.146907, avg_loss: 0.162680 029295/063150, loss: 0.003723, avg_loss: 0.162656 029300/063150, loss: 0.018421, avg_loss: 0.162638 029305/063150, loss: 0.071663, avg_loss: 0.162615 029310/063150, loss: 0.091821, avg_loss: 0.162597 029315/063150, loss: 0.054160, avg_loss: 0.162575 029320/063150, loss: 0.051946, avg_loss: 0.162557 029325/063150, loss: 0.055123, avg_loss: 0.162533 029330/063150, loss: 0.071310, avg_loss: 0.162516 029335/063150, loss: 0.076138, avg_loss: 0.162500 029340/063150, loss: 0.037010, avg_loss: 0.162480 029345/063150, loss: 0.033757, avg_loss: 0.162458 029350/063150, loss: 0.012211, avg_loss: 0.162439 029355/063150, loss: 0.054922, avg_loss: 0.162416 029360/063150, loss: 0.004186, avg_loss: 0.162396 029365/063150, loss: 0.041707, avg_loss: 0.162375 029370/063150, loss: 0.030861, avg_loss: 0.162351 029375/063150, loss: 0.006072, avg_loss: 0.162333 029380/063150, loss: 0.052050, avg_loss: 0.162312 029385/063150, loss: 0.165506, avg_loss: 0.162295 029390/063150, loss: 0.037339, avg_loss: 0.162273 029395/063150, loss: 0.006619, avg_loss: 0.162255 029400/063150, loss: 0.143072, avg_loss: 0.162241 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 13, step 29400/63150: {'accuracy': 0.856651376146789} 029405/063150, loss: 0.269459, avg_loss: 0.162227 029410/063150, loss: 0.032460, avg_loss: 0.162202 029415/063150, loss: 0.002599, avg_loss: 0.162185 029420/063150, loss: 0.070231, avg_loss: 0.162175 029425/063150, loss: 0.024736, avg_loss: 0.162151 029430/063150, loss: 0.121802, avg_loss: 0.162130 029435/063150, loss: 0.015743, avg_loss: 0.162109 029440/063150, loss: 0.048686, avg_loss: 0.162088 029445/063150, loss: 0.055546, avg_loss: 0.162066 029450/063150, loss: 0.196122, avg_loss: 0.162052 029455/063150, loss: 0.023091, avg_loss: 0.162033 029460/063150, loss: 0.151750, avg_loss: 0.162017 029465/063150, loss: 0.059146, avg_loss: 0.161999 029470/063150, loss: 0.078730, avg_loss: 0.161980 029475/063150, loss: 0.002122, avg_loss: 0.161958 029480/063150, loss: 0.090604, avg_loss: 0.161939 029485/063150, loss: 0.075021, avg_loss: 0.161916 029490/063150, loss: 0.121685, avg_loss: 0.161902 029495/063150, loss: 0.010097, avg_loss: 0.161879 029500/063150, loss: 0.125670, avg_loss: 0.161860 029505/063150, loss: 0.015610, avg_loss: 0.161837 029510/063150, loss: 0.012012, avg_loss: 0.161810 029515/063150, loss: 0.023708, avg_loss: 0.161790 029520/063150, loss: 0.208061, avg_loss: 0.161771 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 29520/63150: {'accuracy': 0.8543577981651376} 029525/063150, loss: 0.029809, avg_loss: 0.161747 029530/063150, loss: 0.158969, avg_loss: 0.161728 029535/063150, loss: 0.006258, avg_loss: 0.161710 029540/063150, loss: 0.026417, avg_loss: 0.161686 029545/063150, loss: 0.055589, avg_loss: 0.161664 029550/063150, loss: 0.040480, avg_loss: 0.161643 029555/063150, loss: 0.004644, avg_loss: 0.161622 029560/063150, loss: 0.002206, avg_loss: 0.161598 029565/063150, loss: 0.003171, avg_loss: 0.161577 029570/063150, loss: 0.230687, avg_loss: 0.161559 029575/063150, loss: 0.017428, avg_loss: 0.161537 029580/063150, loss: 0.006898, avg_loss: 0.161514 029585/063150, loss: 0.002860, avg_loss: 0.161494 029590/063150, loss: 0.042689, avg_loss: 0.161472 029595/063150, loss: 0.030789, avg_loss: 0.161452 029600/063150, loss: 0.001615, avg_loss: 0.161427 029605/063150, loss: 0.019307, avg_loss: 0.161401 029610/063150, loss: 0.156288, avg_loss: 0.161380 029615/063150, loss: 0.001029, avg_loss: 0.161358 029620/063150, loss: 0.036586, avg_loss: 0.161338 029625/063150, loss: 0.031545, avg_loss: 0.161317 029630/063150, loss: 0.001884, avg_loss: 0.161294 029635/063150, loss: 0.001182, avg_loss: 0.161283 029640/063150, loss: 0.018922, avg_loss: 0.161264 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 29640/63150: {'accuracy': 0.8520642201834863} 029645/063150, loss: 0.004678, avg_loss: 0.161242 029650/063150, loss: 0.038651, avg_loss: 0.161218 029655/063150, loss: 0.055575, avg_loss: 0.161198 029660/063150, loss: 0.030203, avg_loss: 0.161174 029665/063150, loss: 0.014210, avg_loss: 0.161152 029670/063150, loss: 0.027082, avg_loss: 0.161129 029675/063150, loss: 0.026579, avg_loss: 0.161108 029680/063150, loss: 0.011516, avg_loss: 0.161084 029685/063150, loss: 0.001934, avg_loss: 0.161062 029690/063150, loss: 0.118375, avg_loss: 0.161042 029695/063150, loss: 0.002767, avg_loss: 0.161024 029700/063150, loss: 0.012189, avg_loss: 0.161009 029705/063150, loss: 0.051321, avg_loss: 0.160991 029710/063150, loss: 0.032166, avg_loss: 0.160975 029715/063150, loss: 0.033893, avg_loss: 0.160953 029720/063150, loss: 0.030671, avg_loss: 0.160931 029725/063150, loss: 0.099010, avg_loss: 0.160913 029730/063150, loss: 0.003936, avg_loss: 0.160891 029735/063150, loss: 0.060138, avg_loss: 0.160868 029740/063150, loss: 0.095448, avg_loss: 0.160853 029745/063150, loss: 0.034447, avg_loss: 0.160829 029750/063150, loss: 0.011397, avg_loss: 0.160807 029755/063150, loss: 0.005566, avg_loss: 0.160782 029760/063150, loss: 0.000922, avg_loss: 0.160757 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 29760/63150: {'accuracy': 0.8612385321100917} 029765/063150, loss: 0.091732, avg_loss: 0.160738 029770/063150, loss: 0.002410, avg_loss: 0.160717 029775/063150, loss: 0.003940, avg_loss: 0.160697 029780/063150, loss: 0.006727, avg_loss: 0.160675 029785/063150, loss: 0.065086, avg_loss: 0.160657 029790/063150, loss: 0.032234, avg_loss: 0.160635 029795/063150, loss: 0.025019, avg_loss: 0.160618 029800/063150, loss: 0.004462, avg_loss: 0.160596 029805/063150, loss: 0.038650, avg_loss: 0.160580 029810/063150, loss: 0.044760, avg_loss: 0.160558 029815/063150, loss: 0.002990, avg_loss: 0.160534 029820/063150, loss: 0.006449, avg_loss: 0.160512 029825/063150, loss: 0.007298, avg_loss: 0.160491 029830/063150, loss: 0.026802, avg_loss: 0.160466 029835/063150, loss: 0.014093, avg_loss: 0.160441 029840/063150, loss: 0.023324, avg_loss: 0.160415 029845/063150, loss: 0.063629, avg_loss: 0.160402 029850/063150, loss: 0.009189, avg_loss: 0.160377 029855/063150, loss: 0.006470, avg_loss: 0.160355 029860/063150, loss: 0.037101, avg_loss: 0.160332 029865/063150, loss: 0.037977, avg_loss: 0.160310 029870/063150, loss: 0.001203, avg_loss: 0.160292 029875/063150, loss: 0.004675, avg_loss: 0.160271 029880/063150, loss: 0.019148, avg_loss: 0.160248 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 29880/63150: {'accuracy': 0.8348623853211009} 029885/063150, loss: 0.229325, avg_loss: 0.160231 029890/063150, loss: 0.014964, avg_loss: 0.160208 029895/063150, loss: 0.010728, avg_loss: 0.160186 029900/063150, loss: 0.006701, avg_loss: 0.160164 029905/063150, loss: 0.136334, avg_loss: 0.160146 029910/063150, loss: 0.005484, avg_loss: 0.160127 029915/063150, loss: 0.028629, avg_loss: 0.160106 029920/063150, loss: 0.055188, avg_loss: 0.160088 029925/063150, loss: 0.005606, avg_loss: 0.160066 029930/063150, loss: 0.023008, avg_loss: 0.160046 029935/063150, loss: 0.003997, avg_loss: 0.160027 029940/063150, loss: 0.046418, avg_loss: 0.160011 029945/063150, loss: 0.003882, avg_loss: 0.159988 029950/063150, loss: 0.020722, avg_loss: 0.159968 029955/063150, loss: 0.267843, avg_loss: 0.159955 029960/063150, loss: 0.033335, avg_loss: 0.159934 029965/063150, loss: 0.016816, avg_loss: 0.159913 029970/063150, loss: 0.105205, avg_loss: 0.159897 029975/063150, loss: 0.032642, avg_loss: 0.159881 029980/063150, loss: 0.017649, avg_loss: 0.159859 029985/063150, loss: 0.006750, avg_loss: 0.159838 029990/063150, loss: 0.051170, avg_loss: 0.159816 029995/063150, loss: 0.065801, avg_loss: 0.159797 030000/063150, loss: 0.047385, avg_loss: 0.159777 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30000/63150: {'accuracy': 0.856651376146789} 030005/063150, loss: 0.011315, avg_loss: 0.159752 030010/063150, loss: 0.001336, avg_loss: 0.159730 030015/063150, loss: 0.081847, avg_loss: 0.159716 030020/063150, loss: 0.030773, avg_loss: 0.159693 030025/063150, loss: 0.070896, avg_loss: 0.159671 030030/063150, loss: 0.002881, avg_loss: 0.159648 030035/063150, loss: 0.019054, avg_loss: 0.159624 030040/063150, loss: 0.074761, avg_loss: 0.159605 030045/063150, loss: 0.033359, avg_loss: 0.159583 030050/063150, loss: 0.024879, avg_loss: 0.159564 030055/063150, loss: 0.014777, avg_loss: 0.159541 030060/063150, loss: 0.001530, avg_loss: 0.159534 030065/063150, loss: 0.008303, avg_loss: 0.159512 030070/063150, loss: 0.004331, avg_loss: 0.159488 030075/063150, loss: 0.002786, avg_loss: 0.159465 030080/063150, loss: 0.010111, avg_loss: 0.159443 030085/063150, loss: 0.169485, avg_loss: 0.159428 030090/063150, loss: 0.022356, avg_loss: 0.159411 030095/063150, loss: 0.008158, avg_loss: 0.159392 030100/063150, loss: 0.128152, avg_loss: 0.159378 030105/063150, loss: 0.219819, avg_loss: 0.159362 030110/063150, loss: 0.015893, avg_loss: 0.159344 030115/063150, loss: 0.011225, avg_loss: 0.159324 030120/063150, loss: 0.006620, avg_loss: 0.159299 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30120/63150: {'accuracy': 0.8509174311926605} 030125/063150, loss: 0.000971, avg_loss: 0.159276 030130/063150, loss: 0.035606, avg_loss: 0.159253 030135/063150, loss: 0.002767, avg_loss: 0.159232 030140/063150, loss: 0.009203, avg_loss: 0.159208 030145/063150, loss: 0.018383, avg_loss: 0.159183 030150/063150, loss: 0.107643, avg_loss: 0.159164 030155/063150, loss: 0.077340, avg_loss: 0.159146 030160/063150, loss: 0.008419, avg_loss: 0.159139 030165/063150, loss: 0.065853, avg_loss: 0.159117 030170/063150, loss: 0.015150, avg_loss: 0.159097 030175/063150, loss: 0.004214, avg_loss: 0.159074 030180/063150, loss: 0.083018, avg_loss: 0.159052 030185/063150, loss: 0.049122, avg_loss: 0.159032 030190/063150, loss: 0.043142, avg_loss: 0.159028 030195/063150, loss: 0.003641, avg_loss: 0.159006 030200/063150, loss: 0.018801, avg_loss: 0.158997 030205/063150, loss: 0.001541, avg_loss: 0.158979 030210/063150, loss: 0.002915, avg_loss: 0.158959 030215/063150, loss: 0.048952, avg_loss: 0.158944 030220/063150, loss: 0.008590, avg_loss: 0.158925 030225/063150, loss: 0.118306, avg_loss: 0.158908 030230/063150, loss: 0.113757, avg_loss: 0.158888 030235/063150, loss: 0.114253, avg_loss: 0.158869 030240/063150, loss: 0.015734, avg_loss: 0.158852 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30240/63150: {'accuracy': 0.8555045871559633} 030245/063150, loss: 0.015323, avg_loss: 0.158841 030250/063150, loss: 0.052867, avg_loss: 0.158823 030255/063150, loss: 0.002096, avg_loss: 0.158806 030260/063150, loss: 0.003224, avg_loss: 0.158787 030265/063150, loss: 0.028060, avg_loss: 0.158766 030270/063150, loss: 0.010054, avg_loss: 0.158749 030275/063150, loss: 0.002811, avg_loss: 0.158727 030280/063150, loss: 0.021772, avg_loss: 0.158711 030285/063150, loss: 0.018648, avg_loss: 0.158694 030290/063150, loss: 0.051021, avg_loss: 0.158673 030295/063150, loss: 0.017929, avg_loss: 0.158651 030300/063150, loss: 0.206296, avg_loss: 0.158641 030305/063150, loss: 0.028120, avg_loss: 0.158626 030310/063150, loss: 0.025317, avg_loss: 0.158608 030315/063150, loss: 0.247147, avg_loss: 0.158598 030320/063150, loss: 0.010860, avg_loss: 0.158575 030325/063150, loss: 0.058276, avg_loss: 0.158556 030330/063150, loss: 0.030149, avg_loss: 0.158533 030335/063150, loss: 0.004202, avg_loss: 0.158515 030340/063150, loss: 0.005641, avg_loss: 0.158491 030345/063150, loss: 0.095417, avg_loss: 0.158480 030350/063150, loss: 0.013658, avg_loss: 0.158464 030355/063150, loss: 0.059661, avg_loss: 0.158443 030360/063150, loss: 0.032731, avg_loss: 0.158423 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30360/63150: {'accuracy': 0.8497706422018348} 030365/063150, loss: 0.002510, avg_loss: 0.158402 030370/063150, loss: 0.149434, avg_loss: 0.158384 030375/063150, loss: 0.001370, avg_loss: 0.158359 030380/063150, loss: 0.085941, avg_loss: 0.158343 030385/063150, loss: 0.009599, avg_loss: 0.158323 030390/063150, loss: 0.002361, avg_loss: 0.158310 030395/063150, loss: 0.000963, avg_loss: 0.158293 030400/063150, loss: 0.004199, avg_loss: 0.158274 030405/063150, loss: 0.001675, avg_loss: 0.158251 030410/063150, loss: 0.017970, avg_loss: 0.158228 030415/063150, loss: 0.041820, avg_loss: 0.158211 030420/063150, loss: 0.032792, avg_loss: 0.158192 030425/063150, loss: 0.153325, avg_loss: 0.158174 030430/063150, loss: 0.006793, avg_loss: 0.158157 030435/063150, loss: 0.005587, avg_loss: 0.158133 030440/063150, loss: 0.032403, avg_loss: 0.158111 030445/063150, loss: 0.012293, avg_loss: 0.158087 030450/063150, loss: 0.036248, avg_loss: 0.158068 030455/063150, loss: 0.018853, avg_loss: 0.158044 030460/063150, loss: 0.061686, avg_loss: 0.158028 030465/063150, loss: 0.098958, avg_loss: 0.158013 030470/063150, loss: 0.027358, avg_loss: 0.157994 030475/063150, loss: 0.038179, avg_loss: 0.157972 030480/063150, loss: 0.008890, avg_loss: 0.157951 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30480/63150: {'accuracy': 0.8543577981651376} 030485/063150, loss: 0.003946, avg_loss: 0.157934 030490/063150, loss: 0.014210, avg_loss: 0.157910 030495/063150, loss: 0.001265, avg_loss: 0.157889 030500/063150, loss: 0.106211, avg_loss: 0.157878 030505/063150, loss: 0.008263, avg_loss: 0.157856 030510/063150, loss: 0.085025, avg_loss: 0.157839 030515/063150, loss: 0.072008, avg_loss: 0.157821 030520/063150, loss: 0.001404, avg_loss: 0.157799 030525/063150, loss: 0.012265, avg_loss: 0.157777 030530/063150, loss: 0.147890, avg_loss: 0.157761 030535/063150, loss: 0.039767, avg_loss: 0.157739 030540/063150, loss: 0.005405, avg_loss: 0.157717 030545/063150, loss: 0.006588, avg_loss: 0.157700 030550/063150, loss: 0.018616, avg_loss: 0.157681 030555/063150, loss: 0.057121, avg_loss: 0.157661 030560/063150, loss: 0.099701, avg_loss: 0.157640 030565/063150, loss: 0.036851, avg_loss: 0.157623 030570/063150, loss: 0.047026, avg_loss: 0.157606 030575/063150, loss: 0.004419, avg_loss: 0.157582 030580/063150, loss: 0.044898, avg_loss: 0.157565 030585/063150, loss: 0.021919, avg_loss: 0.157550 030590/063150, loss: 0.003510, avg_loss: 0.157530 030595/063150, loss: 0.010405, avg_loss: 0.157510 030600/063150, loss: 0.039771, avg_loss: 0.157491 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30600/63150: {'accuracy': 0.8589449541284404} 030605/063150, loss: 0.036015, avg_loss: 0.157473 030610/063150, loss: 0.025019, avg_loss: 0.157452 030615/063150, loss: 0.002405, avg_loss: 0.157429 030620/063150, loss: 0.002226, avg_loss: 0.157408 030625/063150, loss: 0.081129, avg_loss: 0.157390 030630/063150, loss: 0.011506, avg_loss: 0.157372 030635/063150, loss: 0.014530, avg_loss: 0.157363 030640/063150, loss: 0.032427, avg_loss: 0.157342 030645/063150, loss: 0.008573, avg_loss: 0.157320 030650/063150, loss: 0.002463, avg_loss: 0.157299 030655/063150, loss: 0.024388, avg_loss: 0.157276 030660/063150, loss: 0.006331, avg_loss: 0.157261 030665/063150, loss: 0.010446, avg_loss: 0.157238 030670/063150, loss: 0.018412, avg_loss: 0.157218 030675/063150, loss: 0.008687, avg_loss: 0.157204 030680/063150, loss: 0.071305, avg_loss: 0.157186 030685/063150, loss: 0.032300, avg_loss: 0.157171 030690/063150, loss: 0.037697, avg_loss: 0.157151 030695/063150, loss: 0.005659, avg_loss: 0.157130 030700/063150, loss: 0.230399, avg_loss: 0.157121 030705/063150, loss: 0.127813, avg_loss: 0.157103 030710/063150, loss: 0.002888, avg_loss: 0.157086 030715/063150, loss: 0.022934, avg_loss: 0.157068 030720/063150, loss: 0.006306, avg_loss: 0.157045 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30720/63150: {'accuracy': 0.856651376146789} 030725/063150, loss: 0.005108, avg_loss: 0.157027 030730/063150, loss: 0.061327, avg_loss: 0.157007 030735/063150, loss: 0.053550, avg_loss: 0.156997 030740/063150, loss: 0.017894, avg_loss: 0.156974 030745/063150, loss: 0.003548, avg_loss: 0.156951 030750/063150, loss: 0.001552, avg_loss: 0.156928 030755/063150, loss: 0.062007, avg_loss: 0.156906 030760/063150, loss: 0.158605, avg_loss: 0.156890 030765/063150, loss: 0.070497, avg_loss: 0.156876 030770/063150, loss: 0.261261, avg_loss: 0.156860 030775/063150, loss: 0.075505, avg_loss: 0.156842 030780/063150, loss: 0.015446, avg_loss: 0.156827 030785/063150, loss: 0.017108, avg_loss: 0.156806 030790/063150, loss: 0.030346, avg_loss: 0.156789 030795/063150, loss: 0.008710, avg_loss: 0.156768 030800/063150, loss: 0.003268, avg_loss: 0.156745 030805/063150, loss: 0.055026, avg_loss: 0.156731 030810/063150, loss: 0.009798, avg_loss: 0.156718 030815/063150, loss: 0.003267, avg_loss: 0.156697 030820/063150, loss: 0.228870, avg_loss: 0.156687 030825/063150, loss: 0.048969, avg_loss: 0.156664 030830/063150, loss: 0.052599, avg_loss: 0.156642 030835/063150, loss: 0.003540, avg_loss: 0.156623 030840/063150, loss: 0.035328, avg_loss: 0.156602 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30840/63150: {'accuracy': 0.8509174311926605} 030845/063150, loss: 0.253468, avg_loss: 0.156588 030850/063150, loss: 0.004320, avg_loss: 0.156566 030855/063150, loss: 0.027654, avg_loss: 0.156546 030860/063150, loss: 0.040192, avg_loss: 0.156526 030865/063150, loss: 0.136494, avg_loss: 0.156516 030870/063150, loss: 0.032073, avg_loss: 0.156494 030875/063150, loss: 0.008085, avg_loss: 0.156472 030880/063150, loss: 0.032437, avg_loss: 0.156449 030885/063150, loss: 0.003916, avg_loss: 0.156433 030890/063150, loss: 0.001516, avg_loss: 0.156409 030895/063150, loss: 0.026923, avg_loss: 0.156388 030900/063150, loss: 0.060765, avg_loss: 0.156376 030905/063150, loss: 0.028656, avg_loss: 0.156358 030910/063150, loss: 0.000778, avg_loss: 0.156347 030915/063150, loss: 0.040568, avg_loss: 0.156324 030920/063150, loss: 0.001278, avg_loss: 0.156306 030925/063150, loss: 0.002350, avg_loss: 0.156285 030930/063150, loss: 0.009146, avg_loss: 0.156262 030935/063150, loss: 0.007925, avg_loss: 0.156242 030940/063150, loss: 0.023785, avg_loss: 0.156223 030945/063150, loss: 0.146085, avg_loss: 0.156207 030950/063150, loss: 0.115348, avg_loss: 0.156186 030955/063150, loss: 0.046772, avg_loss: 0.156168 030960/063150, loss: 0.047580, avg_loss: 0.156150 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 30960/63150: {'accuracy': 0.8486238532110092} 030965/063150, loss: 0.001757, avg_loss: 0.156129 030970/063150, loss: 0.004208, avg_loss: 0.156107 030975/063150, loss: 0.057633, avg_loss: 0.156091 030980/063150, loss: 0.053134, avg_loss: 0.156070 030985/063150, loss: 0.006671, avg_loss: 0.156047 030990/063150, loss: 0.030251, avg_loss: 0.156024 030995/063150, loss: 0.015772, avg_loss: 0.156007 031000/063150, loss: 0.051082, avg_loss: 0.155995 031005/063150, loss: 0.001429, avg_loss: 0.155975 031010/063150, loss: 0.010632, avg_loss: 0.155954 031015/063150, loss: 0.067887, avg_loss: 0.155934 031020/063150, loss: 0.011314, avg_loss: 0.155911 031025/063150, loss: 0.060011, avg_loss: 0.155891 031030/063150, loss: 0.017256, avg_loss: 0.155869 031035/063150, loss: 0.004272, avg_loss: 0.155846 031040/063150, loss: 0.000855, avg_loss: 0.155826 031045/063150, loss: 0.070024, avg_loss: 0.155811 031050/063150, loss: 0.008011, avg_loss: 0.155798 031055/063150, loss: 0.042606, avg_loss: 0.155782 031060/063150, loss: 0.073933, avg_loss: 0.155774 031065/063150, loss: 0.020848, avg_loss: 0.155752 031070/063150, loss: 0.095692, avg_loss: 0.155740 031075/063150, loss: 0.103942, avg_loss: 0.155729 031080/063150, loss: 0.039420, avg_loss: 0.155713 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 31080/63150: {'accuracy': 0.8497706422018348} 031085/063150, loss: 0.079751, avg_loss: 0.155696 031090/063150, loss: 0.045403, avg_loss: 0.155675 031095/063150, loss: 0.007622, avg_loss: 0.155654 031100/063150, loss: 0.024870, avg_loss: 0.155640 031105/063150, loss: 0.002540, avg_loss: 0.155622 031110/063150, loss: 0.031332, avg_loss: 0.155608 031115/063150, loss: 0.016780, avg_loss: 0.155594 031120/063150, loss: 0.061048, avg_loss: 0.155579 031125/063150, loss: 0.020524, avg_loss: 0.155558 031130/063150, loss: 0.014199, avg_loss: 0.155539 031135/063150, loss: 0.015381, avg_loss: 0.155515 031140/063150, loss: 0.003200, avg_loss: 0.155496 031145/063150, loss: 0.002946, avg_loss: 0.155478 031150/063150, loss: 0.018285, avg_loss: 0.155455 031155/063150, loss: 0.003634, avg_loss: 0.155433 031160/063150, loss: 0.016002, avg_loss: 0.155417 031165/063150, loss: 0.005283, avg_loss: 0.155396 031170/063150, loss: 0.081333, avg_loss: 0.155377 031175/063150, loss: 0.047920, avg_loss: 0.155357 031180/063150, loss: 0.004385, avg_loss: 0.155335 031185/063150, loss: 0.007167, avg_loss: 0.155315 031190/063150, loss: 0.017125, avg_loss: 0.155301 031195/063150, loss: 0.070240, avg_loss: 0.155288 031200/063150, loss: 0.020473, avg_loss: 0.155270 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 31200/63150: {'accuracy': 0.8486238532110092} 031205/063150, loss: 0.043037, avg_loss: 0.155253 031210/063150, loss: 0.006261, avg_loss: 0.155229 031215/063150, loss: 0.001673, avg_loss: 0.155210 031220/063150, loss: 0.008318, avg_loss: 0.155189 031225/063150, loss: 0.008910, avg_loss: 0.155175 031230/063150, loss: 0.007713, avg_loss: 0.155154 031235/063150, loss: 0.068322, avg_loss: 0.155134 031240/063150, loss: 0.037572, avg_loss: 0.155120 031245/063150, loss: 0.002862, avg_loss: 0.155104 031250/063150, loss: 0.002596, avg_loss: 0.155091 031255/063150, loss: 0.076086, avg_loss: 0.155072 031260/063150, loss: 0.046203, avg_loss: 0.155056 031265/063150, loss: 0.008974, avg_loss: 0.155036 031270/063150, loss: 0.024836, avg_loss: 0.155013 031275/063150, loss: 0.042243, avg_loss: 0.154996 031280/063150, loss: 0.020888, avg_loss: 0.154977 031285/063150, loss: 0.003390, avg_loss: 0.154959 031290/063150, loss: 0.026576, avg_loss: 0.154938 031295/063150, loss: 0.223176, avg_loss: 0.154928 031300/063150, loss: 0.061455, avg_loss: 0.154913 031305/063150, loss: 0.107567, avg_loss: 0.154896 031310/063150, loss: 0.029950, avg_loss: 0.154878 031315/063150, loss: 0.010695, avg_loss: 0.154859 031320/063150, loss: 0.011212, avg_loss: 0.154841 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 31320/63150: {'accuracy': 0.8440366972477065} 031325/063150, loss: 0.024456, avg_loss: 0.154826 031330/063150, loss: 0.057724, avg_loss: 0.154810 031335/063150, loss: 0.012927, avg_loss: 0.154794 031340/063150, loss: 0.002259, avg_loss: 0.154781 031345/063150, loss: 0.018965, avg_loss: 0.154759 031350/063150, loss: 0.019749, avg_loss: 0.154739 031355/063150, loss: 0.034422, avg_loss: 0.154720 031360/063150, loss: 0.012534, avg_loss: 0.154699 031365/063150, loss: 0.026447, avg_loss: 0.154683 031370/063150, loss: 0.161248, avg_loss: 0.154674 031375/063150, loss: 0.037981, avg_loss: 0.154656 031380/063150, loss: 0.037271, avg_loss: 0.154635 031385/063150, loss: 0.049310, avg_loss: 0.154615 031390/063150, loss: 0.015623, avg_loss: 0.154597 031395/063150, loss: 0.038250, avg_loss: 0.154579 031400/063150, loss: 0.021816, avg_loss: 0.154557 031405/063150, loss: 0.018133, avg_loss: 0.154537 031410/063150, loss: 0.009122, avg_loss: 0.154521 031415/063150, loss: 0.026153, avg_loss: 0.154502 031420/063150, loss: 0.002189, avg_loss: 0.154481 031425/063150, loss: 0.025579, avg_loss: 0.154459 031430/063150, loss: 0.062954, avg_loss: 0.154443 031435/063150, loss: 0.015800, avg_loss: 0.154426 031440/063150, loss: 0.143509, avg_loss: 0.154408 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 31440/63150: {'accuracy': 0.8555045871559633} 031445/063150, loss: 0.022480, avg_loss: 0.154388 031450/063150, loss: 0.024612, avg_loss: 0.154367 031455/063150, loss: 0.093458, avg_loss: 0.154359 031460/063150, loss: 0.006582, avg_loss: 0.154341 031465/063150, loss: 0.003654, avg_loss: 0.154322 031470/063150, loss: 0.008506, avg_loss: 0.154314 031475/063150, loss: 0.025396, avg_loss: 0.154295 031480/063150, loss: 0.011642, avg_loss: 0.154279 031485/063150, loss: 0.038146, avg_loss: 0.154260 031490/063150, loss: 0.011713, avg_loss: 0.154243 031495/063150, loss: 0.003430, avg_loss: 0.154222 031500/063150, loss: 0.106159, avg_loss: 0.154207 031505/063150, loss: 0.032232, avg_loss: 0.154186 031510/063150, loss: 0.012164, avg_loss: 0.154167 031515/063150, loss: 0.001131, avg_loss: 0.154148 031520/063150, loss: 0.018311, avg_loss: 0.154134 031525/063150, loss: 0.222614, avg_loss: 0.154119 031530/063150, loss: 0.067950, avg_loss: 0.154101 031535/063150, loss: 0.002931, avg_loss: 0.154081 031540/063150, loss: 0.037557, avg_loss: 0.154062 031545/063150, loss: 0.054188, avg_loss: 0.154045 031550/063150, loss: 0.033414, avg_loss: 0.154022 031555/063150, loss: 0.044104, avg_loss: 0.154006 031560/063150, loss: 0.139939, avg_loss: 0.153988 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 14, step 31560/63150: {'accuracy': 0.8555045871559633} 031565/063150, loss: 0.069428, avg_loss: 0.153969 031570/063150, loss: 0.048148, avg_loss: 0.153949 031575/063150, loss: 0.017297, avg_loss: 0.153933 031580/063150, loss: 0.215259, avg_loss: 0.153925 031585/063150, loss: 0.006609, avg_loss: 0.153902 031590/063150, loss: 0.011902, avg_loss: 0.153883 031595/063150, loss: 0.012367, avg_loss: 0.153866 031600/063150, loss: 0.079808, avg_loss: 0.153848 031605/063150, loss: 0.017448, avg_loss: 0.153827 031610/063150, loss: 0.004014, avg_loss: 0.153806 031615/063150, loss: 0.050524, avg_loss: 0.153787 031620/063150, loss: 0.003370, avg_loss: 0.153766 031625/063150, loss: 0.014273, avg_loss: 0.153750 031630/063150, loss: 0.037066, avg_loss: 0.153731 031635/063150, loss: 0.022845, avg_loss: 0.153709 031640/063150, loss: 0.223372, avg_loss: 0.153694 031645/063150, loss: 0.006216, avg_loss: 0.153673 031650/063150, loss: 0.020023, avg_loss: 0.153654 031655/063150, loss: 0.019544, avg_loss: 0.153632 031660/063150, loss: 0.001398, avg_loss: 0.153611 031665/063150, loss: 0.000737, avg_loss: 0.153588 031670/063150, loss: 0.003803, avg_loss: 0.153569 031675/063150, loss: 0.003401, avg_loss: 0.153549 031680/063150, loss: 0.003137, avg_loss: 0.153528 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 31680/63150: {'accuracy': 0.8451834862385321} 031685/063150, loss: 0.007107, avg_loss: 0.153509 031690/063150, loss: 0.000546, avg_loss: 0.153487 031695/063150, loss: 0.029930, avg_loss: 0.153466 031700/063150, loss: 0.010998, avg_loss: 0.153443 031705/063150, loss: 0.007357, avg_loss: 0.153426 031710/063150, loss: 0.001457, avg_loss: 0.153406 031715/063150, loss: 0.002038, avg_loss: 0.153384 031720/063150, loss: 0.018246, avg_loss: 0.153362 031725/063150, loss: 0.062755, avg_loss: 0.153349 031730/063150, loss: 0.029712, avg_loss: 0.153329 031735/063150, loss: 0.002405, avg_loss: 0.153309 031740/063150, loss: 0.023896, avg_loss: 0.153296 031745/063150, loss: 0.225884, avg_loss: 0.153281 031750/063150, loss: 0.004821, avg_loss: 0.153262 031755/063150, loss: 0.021150, avg_loss: 0.153242 031760/063150, loss: 0.071390, avg_loss: 0.153223 031765/063150, loss: 0.035106, avg_loss: 0.153204 031770/063150, loss: 0.001874, avg_loss: 0.153187 031775/063150, loss: 0.014004, avg_loss: 0.153170 031780/063150, loss: 0.078600, avg_loss: 0.153149 031785/063150, loss: 0.036884, avg_loss: 0.153129 031790/063150, loss: 0.012744, avg_loss: 0.153108 031795/063150, loss: 0.005733, avg_loss: 0.153090 031800/063150, loss: 0.260414, avg_loss: 0.153080 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 31800/63150: {'accuracy': 0.8543577981651376} 031805/063150, loss: 0.014929, avg_loss: 0.153064 031810/063150, loss: 0.009144, avg_loss: 0.153042 031815/063150, loss: 0.055491, avg_loss: 0.153022 031820/063150, loss: 0.065123, avg_loss: 0.153006 031825/063150, loss: 0.009100, avg_loss: 0.152984 031830/063150, loss: 0.122980, avg_loss: 0.152968 031835/063150, loss: 0.118446, avg_loss: 0.152954 031840/063150, loss: 0.000656, avg_loss: 0.152932 031845/063150, loss: 0.150223, avg_loss: 0.152917 031850/063150, loss: 0.139656, avg_loss: 0.152903 031855/063150, loss: 0.012258, avg_loss: 0.152886 031860/063150, loss: 0.054694, avg_loss: 0.152870 031865/063150, loss: 0.014799, avg_loss: 0.152848 031870/063150, loss: 0.005471, avg_loss: 0.152827 031875/063150, loss: 0.005437, avg_loss: 0.152812 031880/063150, loss: 0.006934, avg_loss: 0.152792 031885/063150, loss: 0.033596, avg_loss: 0.152770 031890/063150, loss: 0.013615, avg_loss: 0.152750 031895/063150, loss: 0.005589, avg_loss: 0.152731 031900/063150, loss: 0.066042, avg_loss: 0.152715 031905/063150, loss: 0.010067, avg_loss: 0.152696 031910/063150, loss: 0.005534, avg_loss: 0.152678 031915/063150, loss: 0.031302, avg_loss: 0.152659 031920/063150, loss: 0.015648, avg_loss: 0.152647 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 31920/63150: {'accuracy': 0.856651376146789} 031925/063150, loss: 0.004087, avg_loss: 0.152628 031930/063150, loss: 0.012033, avg_loss: 0.152606 031935/063150, loss: 0.132375, avg_loss: 0.152587 031940/063150, loss: 0.019506, avg_loss: 0.152567 031945/063150, loss: 0.006165, avg_loss: 0.152546 031950/063150, loss: 0.023628, avg_loss: 0.152526 031955/063150, loss: 0.029892, avg_loss: 0.152509 031960/063150, loss: 0.002362, avg_loss: 0.152489 031965/063150, loss: 0.088375, avg_loss: 0.152477 031970/063150, loss: 0.103584, avg_loss: 0.152460 031975/063150, loss: 0.006373, avg_loss: 0.152443 031980/063150, loss: 0.004424, avg_loss: 0.152429 031985/063150, loss: 0.005649, avg_loss: 0.152414 031990/063150, loss: 0.004369, avg_loss: 0.152393 031995/063150, loss: 0.003095, avg_loss: 0.152369 032000/063150, loss: 0.003324, avg_loss: 0.152346 032005/063150, loss: 0.007763, avg_loss: 0.152327 032010/063150, loss: 0.002517, avg_loss: 0.152310 032015/063150, loss: 0.008595, avg_loss: 0.152290 032020/063150, loss: 0.004567, avg_loss: 0.152270 032025/063150, loss: 0.001465, avg_loss: 0.152249 032030/063150, loss: 0.003452, avg_loss: 0.152227 032035/063150, loss: 0.001302, avg_loss: 0.152207 032040/063150, loss: 0.014194, avg_loss: 0.152194 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 32040/63150: {'accuracy': 0.8555045871559633} 032045/063150, loss: 0.046692, avg_loss: 0.152179 032050/063150, loss: 0.014825, avg_loss: 0.152162 032055/063150, loss: 0.096797, avg_loss: 0.152145 032060/063150, loss: 0.039867, avg_loss: 0.152127 032065/063150, loss: 0.004809, avg_loss: 0.152105 032070/063150, loss: 0.074303, avg_loss: 0.152085 032075/063150, loss: 0.021751, avg_loss: 0.152063 032080/063150, loss: 0.069260, avg_loss: 0.152045 032085/063150, loss: 0.020678, avg_loss: 0.152023 032090/063150, loss: 0.063975, avg_loss: 0.152004 032095/063150, loss: 0.003220, avg_loss: 0.151983 032100/063150, loss: 0.014715, avg_loss: 0.151964 032105/063150, loss: 0.004824, avg_loss: 0.151947 032110/063150, loss: 0.166059, avg_loss: 0.151934 032115/063150, loss: 0.011776, avg_loss: 0.151914 032120/063150, loss: 0.001186, avg_loss: 0.151897 032125/063150, loss: 0.003742, avg_loss: 0.151875 032130/063150, loss: 0.009916, avg_loss: 0.151852 032135/063150, loss: 0.003336, avg_loss: 0.151831 032140/063150, loss: 0.033249, avg_loss: 0.151830 032145/063150, loss: 0.048928, avg_loss: 0.151817 032150/063150, loss: 0.006084, avg_loss: 0.151797 032155/063150, loss: 0.050117, avg_loss: 0.151781 032160/063150, loss: 0.009630, avg_loss: 0.151763 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 32160/63150: {'accuracy': 0.8577981651376146} 032165/063150, loss: 0.082680, avg_loss: 0.151745 032170/063150, loss: 0.006695, avg_loss: 0.151725 032175/063150, loss: 0.101264, avg_loss: 0.151710 032180/063150, loss: 0.037066, avg_loss: 0.151693 032185/063150, loss: 0.001105, avg_loss: 0.151671 032190/063150, loss: 0.036753, avg_loss: 0.151651 032195/063150, loss: 0.001140, avg_loss: 0.151638 032200/063150, loss: 0.080171, avg_loss: 0.151621 032205/063150, loss: 0.024211, avg_loss: 0.151607 032210/063150, loss: 0.019681, avg_loss: 0.151589 032215/063150, loss: 0.068347, avg_loss: 0.151570 032220/063150, loss: 0.052649, avg_loss: 0.151551 032225/063150, loss: 0.117048, avg_loss: 0.151533 032230/063150, loss: 0.061031, avg_loss: 0.151519 032235/063150, loss: 0.031470, avg_loss: 0.151498 032240/063150, loss: 0.004966, avg_loss: 0.151483 032245/063150, loss: 0.010349, avg_loss: 0.151467 032250/063150, loss: 0.028626, avg_loss: 0.151450 032255/063150, loss: 0.022142, avg_loss: 0.151434 032260/063150, loss: 0.040613, avg_loss: 0.151415 032265/063150, loss: 0.032994, avg_loss: 0.151396 032270/063150, loss: 0.002019, avg_loss: 0.151375 032275/063150, loss: 0.005279, avg_loss: 0.151356 032280/063150, loss: 0.212202, avg_loss: 0.151347 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 32280/63150: {'accuracy': 0.8532110091743119} 032285/063150, loss: 0.012638, avg_loss: 0.151324 032290/063150, loss: 0.064646, avg_loss: 0.151308 032295/063150, loss: 0.081643, avg_loss: 0.151295 032300/063150, loss: 0.003165, avg_loss: 0.151274 032305/063150, loss: 0.011685, avg_loss: 0.151254 032310/063150, loss: 0.051202, avg_loss: 0.151238 032315/063150, loss: 0.044343, avg_loss: 0.151217 032320/063150, loss: 0.002301, avg_loss: 0.151197 032325/063150, loss: 0.001429, avg_loss: 0.151179 032330/063150, loss: 0.160796, avg_loss: 0.151165 032335/063150, loss: 0.070961, avg_loss: 0.151147 032340/063150, loss: 0.048449, avg_loss: 0.151134 032345/063150, loss: 0.008753, avg_loss: 0.151117 032350/063150, loss: 0.030852, avg_loss: 0.151100 032355/063150, loss: 0.004543, avg_loss: 0.151085 032360/063150, loss: 0.011918, avg_loss: 0.151068 032365/063150, loss: 0.197130, avg_loss: 0.151057 032370/063150, loss: 0.018620, avg_loss: 0.151041 032375/063150, loss: 0.087574, avg_loss: 0.151022 032380/063150, loss: 0.026363, avg_loss: 0.151001 032385/063150, loss: 0.017810, avg_loss: 0.150980 032390/063150, loss: 0.061095, avg_loss: 0.150962 032395/063150, loss: 0.004592, avg_loss: 0.150945 032400/063150, loss: 0.314244, avg_loss: 0.150932 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 32400/63150: {'accuracy': 0.8428899082568807} 032405/063150, loss: 0.010465, avg_loss: 0.150910 032410/063150, loss: 0.034104, avg_loss: 0.150891 032415/063150, loss: 0.080379, avg_loss: 0.150884 032420/063150, loss: 0.005336, avg_loss: 0.150861 032425/063150, loss: 0.018968, avg_loss: 0.150842 032430/063150, loss: 0.045775, avg_loss: 0.150830 032435/063150, loss: 0.010366, avg_loss: 0.150813 032440/063150, loss: 0.017216, avg_loss: 0.150793 032445/063150, loss: 0.062167, avg_loss: 0.150775 032450/063150, loss: 0.048706, avg_loss: 0.150764 032455/063150, loss: 0.001584, avg_loss: 0.150744 032460/063150, loss: 0.012657, avg_loss: 0.150726 032465/063150, loss: 0.042367, avg_loss: 0.150712 032470/063150, loss: 0.004825, avg_loss: 0.150695 032475/063150, loss: 0.081176, avg_loss: 0.150676 032480/063150, loss: 0.038419, avg_loss: 0.150656 032485/063150, loss: 0.089686, avg_loss: 0.150640 032490/063150, loss: 0.000725, avg_loss: 0.150622 032495/063150, loss: 0.151417, avg_loss: 0.150608 032500/063150, loss: 0.016084, avg_loss: 0.150590 032505/063150, loss: 0.006126, avg_loss: 0.150571 032510/063150, loss: 0.013508, avg_loss: 0.150555 032515/063150, loss: 0.030435, avg_loss: 0.150539 032520/063150, loss: 0.013757, avg_loss: 0.150518 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 32520/63150: {'accuracy': 0.8555045871559633} 032525/063150, loss: 0.002705, avg_loss: 0.150498 032530/063150, loss: 0.094880, avg_loss: 0.150482 032535/063150, loss: 0.022475, avg_loss: 0.150466 032540/063150, loss: 0.010388, avg_loss: 0.150445 032545/063150, loss: 0.002501, avg_loss: 0.150424 032550/063150, loss: 0.009591, avg_loss: 0.150403 032555/063150, loss: 0.019016, avg_loss: 0.150382 032560/063150, loss: 0.012960, avg_loss: 0.150366 032565/063150, loss: 0.003944, avg_loss: 0.150349 032570/063150, loss: 0.041830, avg_loss: 0.150334 032575/063150, loss: 0.011738, avg_loss: 0.150323 032580/063150, loss: 0.001348, avg_loss: 0.150301 032585/063150, loss: 0.001198, avg_loss: 0.150283 032590/063150, loss: 0.184000, avg_loss: 0.150276 032595/063150, loss: 0.040405, avg_loss: 0.150259 032600/063150, loss: 0.017433, avg_loss: 0.150242 032605/063150, loss: 0.064917, avg_loss: 0.150224 032610/063150, loss: 0.016867, avg_loss: 0.150204 032615/063150, loss: 0.039404, avg_loss: 0.150187 032620/063150, loss: 0.023279, avg_loss: 0.150171 032625/063150, loss: 0.009219, avg_loss: 0.150151 032630/063150, loss: 0.078166, avg_loss: 0.150141 032635/063150, loss: 0.018076, avg_loss: 0.150125 032640/063150, loss: 0.019608, avg_loss: 0.150108 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 32640/63150: {'accuracy': 0.8520642201834863} 032645/063150, loss: 0.029135, avg_loss: 0.150089 032650/063150, loss: 0.023187, avg_loss: 0.150068 032655/063150, loss: 0.071713, avg_loss: 0.150059 032660/063150, loss: 0.051653, avg_loss: 0.150044 032665/063150, loss: 0.110812, avg_loss: 0.150026 032670/063150, loss: 0.077080, avg_loss: 0.150008 032675/063150, loss: 0.020345, avg_loss: 0.149992 032680/063150, loss: 0.011876, avg_loss: 0.149973 032685/063150, loss: 0.014235, avg_loss: 0.149954 032690/063150, loss: 0.049570, avg_loss: 0.149937 032695/063150, loss: 0.004712, avg_loss: 0.149918 032700/063150, loss: 0.064658, avg_loss: 0.149900 032705/063150, loss: 0.001231, avg_loss: 0.149881 032710/063150, loss: 0.004263, avg_loss: 0.149872 032715/063150, loss: 0.040360, avg_loss: 0.149854 032720/063150, loss: 0.077648, avg_loss: 0.149836 032725/063150, loss: 0.020749, avg_loss: 0.149818 032730/063150, loss: 0.003180, avg_loss: 0.149799 032735/063150, loss: 0.088896, avg_loss: 0.149782 032740/063150, loss: 0.025926, avg_loss: 0.149767 032745/063150, loss: 0.080748, avg_loss: 0.149752 032750/063150, loss: 0.010617, avg_loss: 0.149745 032755/063150, loss: 0.017792, avg_loss: 0.149724 032760/063150, loss: 0.011277, avg_loss: 0.149707 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 32760/63150: {'accuracy': 0.8153669724770642} 032765/063150, loss: 0.095582, avg_loss: 0.149694 032770/063150, loss: 0.001406, avg_loss: 0.149675 032775/063150, loss: 0.013481, avg_loss: 0.149656 032780/063150, loss: 0.011827, avg_loss: 0.149641 032785/063150, loss: 0.023711, avg_loss: 0.149623 032790/063150, loss: 0.025093, avg_loss: 0.149606 032795/063150, loss: 0.023212, avg_loss: 0.149588 032800/063150, loss: 0.001642, avg_loss: 0.149569 032805/063150, loss: 0.019108, avg_loss: 0.149552 032810/063150, loss: 0.000516, avg_loss: 0.149532 032815/063150, loss: 0.000591, avg_loss: 0.149515 032820/063150, loss: 0.015859, avg_loss: 0.149500 032825/063150, loss: 0.001923, avg_loss: 0.149486 032830/063150, loss: 0.030517, avg_loss: 0.149465 032835/063150, loss: 0.026463, avg_loss: 0.149448 032840/063150, loss: 0.005815, avg_loss: 0.149431 032845/063150, loss: 0.103565, avg_loss: 0.149418 032850/063150, loss: 0.018569, avg_loss: 0.149401 032855/063150, loss: 0.013975, avg_loss: 0.149383 032860/063150, loss: 0.011660, avg_loss: 0.149368 032865/063150, loss: 0.170284, avg_loss: 0.149356 032870/063150, loss: 0.011980, avg_loss: 0.149342 032875/063150, loss: 0.001372, avg_loss: 0.149327 032880/063150, loss: 0.019010, avg_loss: 0.149306 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 32880/63150: {'accuracy': 0.8532110091743119} 032885/063150, loss: 0.014984, avg_loss: 0.149288 032890/063150, loss: 0.007146, avg_loss: 0.149268 032895/063150, loss: 0.014933, avg_loss: 0.149251 032900/063150, loss: 0.012553, avg_loss: 0.149233 032905/063150, loss: 0.001936, avg_loss: 0.149215 032910/063150, loss: 0.001917, avg_loss: 0.149205 032915/063150, loss: 0.030596, avg_loss: 0.149185 032920/063150, loss: 0.016542, avg_loss: 0.149166 032925/063150, loss: 0.052967, avg_loss: 0.149150 032930/063150, loss: 0.017940, avg_loss: 0.149130 032935/063150, loss: 0.054746, avg_loss: 0.149111 032940/063150, loss: 0.001472, avg_loss: 0.149093 032945/063150, loss: 0.006528, avg_loss: 0.149071 032950/063150, loss: 0.008536, avg_loss: 0.149051 032955/063150, loss: 0.106602, avg_loss: 0.149036 032960/063150, loss: 0.002405, avg_loss: 0.149015 032965/063150, loss: 0.057453, avg_loss: 0.149004 032970/063150, loss: 0.119037, avg_loss: 0.148987 032975/063150, loss: 0.001901, avg_loss: 0.148975 032980/063150, loss: 0.049795, avg_loss: 0.148958 032985/063150, loss: 0.057811, avg_loss: 0.148943 032990/063150, loss: 0.002982, avg_loss: 0.148922 032995/063150, loss: 0.009201, avg_loss: 0.148915 033000/063150, loss: 0.017153, avg_loss: 0.148894 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 33000/63150: {'accuracy': 0.8577981651376146} 033005/063150, loss: 0.043624, avg_loss: 0.148876 033010/063150, loss: 0.025370, avg_loss: 0.148856 033015/063150, loss: 0.041446, avg_loss: 0.148838 033020/063150, loss: 0.050597, avg_loss: 0.148818 033025/063150, loss: 0.001196, avg_loss: 0.148798 033030/063150, loss: 0.004848, avg_loss: 0.148788 033035/063150, loss: 0.032267, avg_loss: 0.148768 033040/063150, loss: 0.004520, avg_loss: 0.148750 033045/063150, loss: 0.029751, avg_loss: 0.148732 033050/063150, loss: 0.013777, avg_loss: 0.148715 033055/063150, loss: 0.061514, avg_loss: 0.148701 033060/063150, loss: 0.012740, avg_loss: 0.148700 033065/063150, loss: 0.014615, avg_loss: 0.148686 033070/063150, loss: 0.011086, avg_loss: 0.148673 033075/063150, loss: 0.100312, avg_loss: 0.148656 033080/063150, loss: 0.040799, avg_loss: 0.148638 033085/063150, loss: 0.013348, avg_loss: 0.148629 033090/063150, loss: 0.068454, avg_loss: 0.148613 033095/063150, loss: 0.068554, avg_loss: 0.148594 033100/063150, loss: 0.010253, avg_loss: 0.148580 033105/063150, loss: 0.032851, avg_loss: 0.148562 033110/063150, loss: 0.045854, avg_loss: 0.148541 033115/063150, loss: 0.023658, avg_loss: 0.148521 033120/063150, loss: 0.024104, avg_loss: 0.148503 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 33120/63150: {'accuracy': 0.8463302752293578} 033125/063150, loss: 0.129521, avg_loss: 0.148490 033130/063150, loss: 0.004246, avg_loss: 0.148472 033135/063150, loss: 0.005289, avg_loss: 0.148455 033140/063150, loss: 0.003993, avg_loss: 0.148434 033145/063150, loss: 0.039762, avg_loss: 0.148418 033150/063150, loss: 0.043979, avg_loss: 0.148397 033155/063150, loss: 0.001181, avg_loss: 0.148380 033160/063150, loss: 0.168182, avg_loss: 0.148368 033165/063150, loss: 0.029405, avg_loss: 0.148351 033170/063150, loss: 0.008169, avg_loss: 0.148333 033175/063150, loss: 0.083457, avg_loss: 0.148316 033180/063150, loss: 0.002800, avg_loss: 0.148295 033185/063150, loss: 0.001302, avg_loss: 0.148276 033190/063150, loss: 0.016164, avg_loss: 0.148262 033195/063150, loss: 0.010633, avg_loss: 0.148241 033200/063150, loss: 0.004194, avg_loss: 0.148221 033205/063150, loss: 0.006342, avg_loss: 0.148207 033210/063150, loss: 0.060946, avg_loss: 0.148197 033215/063150, loss: 0.021701, avg_loss: 0.148183 033220/063150, loss: 0.091852, avg_loss: 0.148174 033225/063150, loss: 0.028652, avg_loss: 0.148159 033230/063150, loss: 0.006561, avg_loss: 0.148144 033235/063150, loss: 0.036666, avg_loss: 0.148125 033240/063150, loss: 0.005548, avg_loss: 0.148108 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 33240/63150: {'accuracy': 0.8486238532110092} 033245/063150, loss: 0.121300, avg_loss: 0.148095 033250/063150, loss: 0.002188, avg_loss: 0.148078 033255/063150, loss: 0.022848, avg_loss: 0.148059 033260/063150, loss: 0.004626, avg_loss: 0.148040 033265/063150, loss: 0.016482, avg_loss: 0.148022 033270/063150, loss: 0.002042, avg_loss: 0.148006 033275/063150, loss: 0.043554, avg_loss: 0.147990 033280/063150, loss: 0.008069, avg_loss: 0.147970 033285/063150, loss: 0.020968, avg_loss: 0.147955 033290/063150, loss: 0.097583, avg_loss: 0.147939 033295/063150, loss: 0.148115, avg_loss: 0.147933 033300/063150, loss: 0.018813, avg_loss: 0.147913 033305/063150, loss: 0.116478, avg_loss: 0.147898 033310/063150, loss: 0.088976, avg_loss: 0.147882 033315/063150, loss: 0.014793, avg_loss: 0.147863 033320/063150, loss: 0.031540, avg_loss: 0.147848 033325/063150, loss: 0.001784, avg_loss: 0.147831 033330/063150, loss: 0.023705, avg_loss: 0.147810 033335/063150, loss: 0.001648, avg_loss: 0.147792 033340/063150, loss: 0.038400, avg_loss: 0.147777 033345/063150, loss: 0.038439, avg_loss: 0.147758 033350/063150, loss: 0.001341, avg_loss: 0.147745 033355/063150, loss: 0.042025, avg_loss: 0.147730 033360/063150, loss: 0.021579, avg_loss: 0.147716 ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 epoch 15, step 33360/63150: {'accuracy': 0.841743119266055} ***** Running train evaluation ***** Num examples = 67349 Instantaneous batch size per device = 32 Train Dataset Result: {'accuracy': 0.9946992531440705} ***** Running dev evaluation ***** Num examples = 872 Instantaneous batch size per device = 32 Dev Dataset Result: {'accuracy': 0.841743119266055} DEV Best Result: accuracy, 0.8727064220183486 Training time 0:35:24