Upload ./training.log with huggingface_hub
Browse files- training.log +260 -0
 
    	
        training.log
    ADDED
    
    | 
         @@ -0,0 +1,260 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            2023-10-19 02:07:58,307 ----------------------------------------------------------------------------------------------------
         
     | 
| 2 | 
         
            +
            2023-10-19 02:07:58,308 Model: "SequenceTagger(
         
     | 
| 3 | 
         
            +
              (embeddings): TransformerWordEmbeddings(
         
     | 
| 4 | 
         
            +
                (model): BertModel(
         
     | 
| 5 | 
         
            +
                  (embeddings): BertEmbeddings(
         
     | 
| 6 | 
         
            +
                    (word_embeddings): Embedding(31103, 768)
         
     | 
| 7 | 
         
            +
                    (position_embeddings): Embedding(512, 768)
         
     | 
| 8 | 
         
            +
                    (token_type_embeddings): Embedding(2, 768)
         
     | 
| 9 | 
         
            +
                    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
         
     | 
| 10 | 
         
            +
                    (dropout): Dropout(p=0.1, inplace=False)
         
     | 
| 11 | 
         
            +
                  )
         
     | 
| 12 | 
         
            +
                  (encoder): BertEncoder(
         
     | 
| 13 | 
         
            +
                    (layer): ModuleList(
         
     | 
| 14 | 
         
            +
                      (0-11): 12 x BertLayer(
         
     | 
| 15 | 
         
            +
                        (attention): BertAttention(
         
     | 
| 16 | 
         
            +
                          (self): BertSelfAttention(
         
     | 
| 17 | 
         
            +
                            (query): Linear(in_features=768, out_features=768, bias=True)
         
     | 
| 18 | 
         
            +
                            (key): Linear(in_features=768, out_features=768, bias=True)
         
     | 
| 19 | 
         
            +
                            (value): Linear(in_features=768, out_features=768, bias=True)
         
     | 
| 20 | 
         
            +
                            (dropout): Dropout(p=0.1, inplace=False)
         
     | 
| 21 | 
         
            +
                          )
         
     | 
| 22 | 
         
            +
                          (output): BertSelfOutput(
         
     | 
| 23 | 
         
            +
                            (dense): Linear(in_features=768, out_features=768, bias=True)
         
     | 
| 24 | 
         
            +
                            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
         
     | 
| 25 | 
         
            +
                            (dropout): Dropout(p=0.1, inplace=False)
         
     | 
| 26 | 
         
            +
                          )
         
     | 
| 27 | 
         
            +
                        )
         
     | 
| 28 | 
         
            +
                        (intermediate): BertIntermediate(
         
     | 
| 29 | 
         
            +
                          (dense): Linear(in_features=768, out_features=3072, bias=True)
         
     | 
| 30 | 
         
            +
                          (intermediate_act_fn): GELUActivation()
         
     | 
| 31 | 
         
            +
                        )
         
     | 
| 32 | 
         
            +
                        (output): BertOutput(
         
     | 
| 33 | 
         
            +
                          (dense): Linear(in_features=3072, out_features=768, bias=True)
         
     | 
| 34 | 
         
            +
                          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
         
     | 
| 35 | 
         
            +
                          (dropout): Dropout(p=0.1, inplace=False)
         
     | 
| 36 | 
         
            +
                        )
         
     | 
| 37 | 
         
            +
                      )
         
     | 
| 38 | 
         
            +
                    )
         
     | 
| 39 | 
         
            +
                  )
         
     | 
| 40 | 
         
            +
                  (pooler): BertPooler(
         
     | 
| 41 | 
         
            +
                    (dense): Linear(in_features=768, out_features=768, bias=True)
         
     | 
| 42 | 
         
            +
                    (activation): Tanh()
         
     | 
| 43 | 
         
            +
                  )
         
     | 
| 44 | 
         
            +
                )
         
     | 
| 45 | 
         
            +
              )
         
     | 
| 46 | 
         
            +
              (locked_dropout): LockedDropout(p=0.5)
         
     | 
| 47 | 
         
            +
              (linear): Linear(in_features=768, out_features=81, bias=True)
         
     | 
| 48 | 
         
            +
              (loss_function): CrossEntropyLoss()
         
     | 
| 49 | 
         
            +
            )"
         
     | 
| 50 | 
         
            +
            2023-10-19 02:07:58,308 ----------------------------------------------------------------------------------------------------
         
     | 
| 51 | 
         
            +
            2023-10-19 02:07:58,309 Corpus: 6900 train + 1576 dev + 1833 test sentences
         
     | 
| 52 | 
         
            +
            2023-10-19 02:07:58,309 ----------------------------------------------------------------------------------------------------
         
     | 
| 53 | 
         
            +
            2023-10-19 02:07:58,309 Train:  6900 sentences
         
     | 
| 54 | 
         
            +
            2023-10-19 02:07:58,309         (train_with_dev=False, train_with_test=False)
         
     | 
| 55 | 
         
            +
            2023-10-19 02:07:58,309 ----------------------------------------------------------------------------------------------------
         
     | 
| 56 | 
         
            +
            2023-10-19 02:07:58,309 Training Params:
         
     | 
| 57 | 
         
            +
            2023-10-19 02:07:58,309  - learning_rate: "3e-05" 
         
     | 
| 58 | 
         
            +
            2023-10-19 02:07:58,309  - mini_batch_size: "16"
         
     | 
| 59 | 
         
            +
            2023-10-19 02:07:58,309  - max_epochs: "10"
         
     | 
| 60 | 
         
            +
            2023-10-19 02:07:58,309  - shuffle: "True"
         
     | 
| 61 | 
         
            +
            2023-10-19 02:07:58,309 ----------------------------------------------------------------------------------------------------
         
     | 
| 62 | 
         
            +
            2023-10-19 02:07:58,309 Plugins:
         
     | 
| 63 | 
         
            +
            2023-10-19 02:07:58,309  - TensorboardLogger
         
     | 
| 64 | 
         
            +
            2023-10-19 02:07:58,309  - LinearScheduler | warmup_fraction: '0.1'
         
     | 
| 65 | 
         
            +
            2023-10-19 02:07:58,309 ----------------------------------------------------------------------------------------------------
         
     | 
| 66 | 
         
            +
            2023-10-19 02:07:58,309 Final evaluation on model from best epoch (best-model.pt)
         
     | 
| 67 | 
         
            +
            2023-10-19 02:07:58,309  - metric: "('micro avg', 'f1-score')"
         
     | 
| 68 | 
         
            +
            2023-10-19 02:07:58,309 ----------------------------------------------------------------------------------------------------
         
     | 
| 69 | 
         
            +
            2023-10-19 02:07:58,309 Computation:
         
     | 
| 70 | 
         
            +
            2023-10-19 02:07:58,310  - compute on device: cuda:0
         
     | 
| 71 | 
         
            +
            2023-10-19 02:07:58,310  - embedding storage: none
         
     | 
| 72 | 
         
            +
            2023-10-19 02:07:58,310 ----------------------------------------------------------------------------------------------------
         
     | 
| 73 | 
         
            +
            2023-10-19 02:07:58,310 Model training base path: "autotrain-flair-mobie-gbert_base-bs16-e10-lr3e-05-4"
         
     | 
| 74 | 
         
            +
            2023-10-19 02:07:58,310 ----------------------------------------------------------------------------------------------------
         
     | 
| 75 | 
         
            +
            2023-10-19 02:07:58,310 ----------------------------------------------------------------------------------------------------
         
     | 
| 76 | 
         
            +
            2023-10-19 02:07:58,310 Logging anything other than scalars to TensorBoard is currently not supported.
         
     | 
| 77 | 
         
            +
            2023-10-19 02:08:13,517 epoch 1 - iter 43/432 - loss 4.93250462 - time (sec): 15.21 - samples/sec: 429.82 - lr: 0.000003 - momentum: 0.000000
         
     | 
| 78 | 
         
            +
            2023-10-19 02:08:28,839 epoch 1 - iter 86/432 - loss 3.97734066 - time (sec): 30.53 - samples/sec: 411.99 - lr: 0.000006 - momentum: 0.000000
         
     | 
| 79 | 
         
            +
            2023-10-19 02:08:43,873 epoch 1 - iter 129/432 - loss 3.26708172 - time (sec): 45.56 - samples/sec: 408.49 - lr: 0.000009 - momentum: 0.000000
         
     | 
| 80 | 
         
            +
            2023-10-19 02:08:59,143 epoch 1 - iter 172/432 - loss 2.86543349 - time (sec): 60.83 - samples/sec: 410.11 - lr: 0.000012 - momentum: 0.000000
         
     | 
| 81 | 
         
            +
            2023-10-19 02:09:15,576 epoch 1 - iter 215/432 - loss 2.59028422 - time (sec): 77.26 - samples/sec: 401.88 - lr: 0.000015 - momentum: 0.000000
         
     | 
| 82 | 
         
            +
            2023-10-19 02:09:30,082 epoch 1 - iter 258/432 - loss 2.35703794 - time (sec): 91.77 - samples/sec: 406.04 - lr: 0.000018 - momentum: 0.000000
         
     | 
| 83 | 
         
            +
            2023-10-19 02:09:44,936 epoch 1 - iter 301/432 - loss 2.15806965 - time (sec): 106.63 - samples/sec: 406.83 - lr: 0.000021 - momentum: 0.000000
         
     | 
| 84 | 
         
            +
            2023-10-19 02:09:59,763 epoch 1 - iter 344/432 - loss 2.00613180 - time (sec): 121.45 - samples/sec: 408.73 - lr: 0.000024 - momentum: 0.000000
         
     | 
| 85 | 
         
            +
            2023-10-19 02:10:13,958 epoch 1 - iter 387/432 - loss 1.88001586 - time (sec): 135.65 - samples/sec: 409.08 - lr: 0.000027 - momentum: 0.000000
         
     | 
| 86 | 
         
            +
            2023-10-19 02:10:29,196 epoch 1 - iter 430/432 - loss 1.76470516 - time (sec): 150.89 - samples/sec: 408.67 - lr: 0.000030 - momentum: 0.000000
         
     | 
| 87 | 
         
            +
            2023-10-19 02:10:29,817 ----------------------------------------------------------------------------------------------------
         
     | 
| 88 | 
         
            +
            2023-10-19 02:10:29,817 EPOCH 1 done: loss 1.7614 - lr: 0.000030
         
     | 
| 89 | 
         
            +
            2023-10-19 02:10:43,326 DEV : loss 0.5575976967811584 - f1-score (micro avg)  0.6297
         
     | 
| 90 | 
         
            +
            2023-10-19 02:10:43,351 saving best model
         
     | 
| 91 | 
         
            +
            2023-10-19 02:10:43,792 ----------------------------------------------------------------------------------------------------
         
     | 
| 92 | 
         
            +
            2023-10-19 02:10:58,128 epoch 2 - iter 43/432 - loss 0.62560817 - time (sec): 14.33 - samples/sec: 414.75 - lr: 0.000030 - momentum: 0.000000
         
     | 
| 93 | 
         
            +
            2023-10-19 02:11:12,293 epoch 2 - iter 86/432 - loss 0.61003222 - time (sec): 28.50 - samples/sec: 441.84 - lr: 0.000029 - momentum: 0.000000
         
     | 
| 94 | 
         
            +
            2023-10-19 02:11:27,392 epoch 2 - iter 129/432 - loss 0.58076243 - time (sec): 43.60 - samples/sec: 420.07 - lr: 0.000029 - momentum: 0.000000
         
     | 
| 95 | 
         
            +
            2023-10-19 02:11:41,758 epoch 2 - iter 172/432 - loss 0.56106454 - time (sec): 57.96 - samples/sec: 421.61 - lr: 0.000029 - momentum: 0.000000
         
     | 
| 96 | 
         
            +
            2023-10-19 02:11:56,333 epoch 2 - iter 215/432 - loss 0.54986490 - time (sec): 72.54 - samples/sec: 419.66 - lr: 0.000028 - momentum: 0.000000
         
     | 
| 97 | 
         
            +
            2023-10-19 02:12:11,136 epoch 2 - iter 258/432 - loss 0.53748417 - time (sec): 87.34 - samples/sec: 421.39 - lr: 0.000028 - momentum: 0.000000
         
     | 
| 98 | 
         
            +
            2023-10-19 02:12:26,691 epoch 2 - iter 301/432 - loss 0.52065632 - time (sec): 102.90 - samples/sec: 417.10 - lr: 0.000028 - momentum: 0.000000
         
     | 
| 99 | 
         
            +
            2023-10-19 02:12:42,153 epoch 2 - iter 344/432 - loss 0.50823824 - time (sec): 118.36 - samples/sec: 411.50 - lr: 0.000027 - momentum: 0.000000
         
     | 
| 100 | 
         
            +
            2023-10-19 02:12:58,398 epoch 2 - iter 387/432 - loss 0.49509518 - time (sec): 134.60 - samples/sec: 409.60 - lr: 0.000027 - momentum: 0.000000
         
     | 
| 101 | 
         
            +
            2023-10-19 02:13:13,413 epoch 2 - iter 430/432 - loss 0.48286405 - time (sec): 149.62 - samples/sec: 412.11 - lr: 0.000027 - momentum: 0.000000
         
     | 
| 102 | 
         
            +
            2023-10-19 02:13:13,985 ----------------------------------------------------------------------------------------------------
         
     | 
| 103 | 
         
            +
            2023-10-19 02:13:13,986 EPOCH 2 done: loss 0.4828 - lr: 0.000027
         
     | 
| 104 | 
         
            +
            2023-10-19 02:13:27,322 DEV : loss 0.3526449203491211 - f1-score (micro avg)  0.7754
         
     | 
| 105 | 
         
            +
            2023-10-19 02:13:27,346 saving best model
         
     | 
| 106 | 
         
            +
            2023-10-19 02:13:28,590 ----------------------------------------------------------------------------------------------------
         
     | 
| 107 | 
         
            +
            2023-10-19 02:13:43,341 epoch 3 - iter 43/432 - loss 0.31230210 - time (sec): 14.75 - samples/sec: 421.77 - lr: 0.000026 - momentum: 0.000000
         
     | 
| 108 | 
         
            +
            2023-10-19 02:13:57,500 epoch 3 - iter 86/432 - loss 0.30333100 - time (sec): 28.91 - samples/sec: 427.74 - lr: 0.000026 - momentum: 0.000000
         
     | 
| 109 | 
         
            +
            2023-10-19 02:14:12,357 epoch 3 - iter 129/432 - loss 0.29986924 - time (sec): 43.76 - samples/sec: 420.79 - lr: 0.000026 - momentum: 0.000000
         
     | 
| 110 | 
         
            +
            2023-10-19 02:14:27,258 epoch 3 - iter 172/432 - loss 0.30292860 - time (sec): 58.67 - samples/sec: 422.53 - lr: 0.000025 - momentum: 0.000000
         
     | 
| 111 | 
         
            +
            2023-10-19 02:14:42,390 epoch 3 - iter 215/432 - loss 0.30311275 - time (sec): 73.80 - samples/sec: 417.71 - lr: 0.000025 - momentum: 0.000000
         
     | 
| 112 | 
         
            +
            2023-10-19 02:14:57,121 epoch 3 - iter 258/432 - loss 0.30056148 - time (sec): 88.53 - samples/sec: 417.74 - lr: 0.000025 - momentum: 0.000000
         
     | 
| 113 | 
         
            +
            2023-10-19 02:15:12,777 epoch 3 - iter 301/432 - loss 0.30074375 - time (sec): 104.19 - samples/sec: 416.07 - lr: 0.000024 - momentum: 0.000000
         
     | 
| 114 | 
         
            +
            2023-10-19 02:15:27,941 epoch 3 - iter 344/432 - loss 0.30006914 - time (sec): 119.35 - samples/sec: 414.44 - lr: 0.000024 - momentum: 0.000000
         
     | 
| 115 | 
         
            +
            2023-10-19 02:15:43,189 epoch 3 - iter 387/432 - loss 0.29793746 - time (sec): 134.60 - samples/sec: 414.38 - lr: 0.000024 - momentum: 0.000000
         
     | 
| 116 | 
         
            +
            2023-10-19 02:15:57,165 epoch 3 - iter 430/432 - loss 0.29556403 - time (sec): 148.57 - samples/sec: 414.85 - lr: 0.000023 - momentum: 0.000000
         
     | 
| 117 | 
         
            +
            2023-10-19 02:15:57,703 ----------------------------------------------------------------------------------------------------
         
     | 
| 118 | 
         
            +
            2023-10-19 02:15:57,703 EPOCH 3 done: loss 0.2954 - lr: 0.000023
         
     | 
| 119 | 
         
            +
            2023-10-19 02:16:11,087 DEV : loss 0.30149412155151367 - f1-score (micro avg)  0.8069
         
     | 
| 120 | 
         
            +
            2023-10-19 02:16:11,111 saving best model
         
     | 
| 121 | 
         
            +
            2023-10-19 02:16:12,352 ----------------------------------------------------------------------------------------------------
         
     | 
| 122 | 
         
            +
            2023-10-19 02:16:27,084 epoch 4 - iter 43/432 - loss 0.21264161 - time (sec): 14.73 - samples/sec: 411.11 - lr: 0.000023 - momentum: 0.000000
         
     | 
| 123 | 
         
            +
            2023-10-19 02:16:43,109 epoch 4 - iter 86/432 - loss 0.22163872 - time (sec): 30.76 - samples/sec: 394.07 - lr: 0.000023 - momentum: 0.000000
         
     | 
| 124 | 
         
            +
            2023-10-19 02:16:58,411 epoch 4 - iter 129/432 - loss 0.22101676 - time (sec): 46.06 - samples/sec: 396.91 - lr: 0.000022 - momentum: 0.000000
         
     | 
| 125 | 
         
            +
            2023-10-19 02:17:13,941 epoch 4 - iter 172/432 - loss 0.22361025 - time (sec): 61.59 - samples/sec: 395.37 - lr: 0.000022 - momentum: 0.000000
         
     | 
| 126 | 
         
            +
            2023-10-19 02:17:27,914 epoch 4 - iter 215/432 - loss 0.22111072 - time (sec): 75.56 - samples/sec: 402.30 - lr: 0.000022 - momentum: 0.000000
         
     | 
| 127 | 
         
            +
            2023-10-19 02:17:43,302 epoch 4 - iter 258/432 - loss 0.21935857 - time (sec): 90.95 - samples/sec: 397.99 - lr: 0.000021 - momentum: 0.000000
         
     | 
| 128 | 
         
            +
            2023-10-19 02:17:57,934 epoch 4 - iter 301/432 - loss 0.21595980 - time (sec): 105.58 - samples/sec: 403.99 - lr: 0.000021 - momentum: 0.000000
         
     | 
| 129 | 
         
            +
            2023-10-19 02:18:13,452 epoch 4 - iter 344/432 - loss 0.21581270 - time (sec): 121.10 - samples/sec: 408.20 - lr: 0.000021 - momentum: 0.000000
         
     | 
| 130 | 
         
            +
            2023-10-19 02:18:28,710 epoch 4 - iter 387/432 - loss 0.21528790 - time (sec): 136.36 - samples/sec: 406.51 - lr: 0.000020 - momentum: 0.000000
         
     | 
| 131 | 
         
            +
            2023-10-19 02:18:43,163 epoch 4 - iter 430/432 - loss 0.21420583 - time (sec): 150.81 - samples/sec: 408.75 - lr: 0.000020 - momentum: 0.000000
         
     | 
| 132 | 
         
            +
            2023-10-19 02:18:43,749 ----------------------------------------------------------------------------------------------------
         
     | 
| 133 | 
         
            +
            2023-10-19 02:18:43,749 EPOCH 4 done: loss 0.2144 - lr: 0.000020
         
     | 
| 134 | 
         
            +
            2023-10-19 02:18:57,091 DEV : loss 0.3102978467941284 - f1-score (micro avg)  0.8163
         
     | 
| 135 | 
         
            +
            2023-10-19 02:18:57,116 saving best model
         
     | 
| 136 | 
         
            +
            2023-10-19 02:18:58,362 ----------------------------------------------------------------------------------------------------
         
     | 
| 137 | 
         
            +
            2023-10-19 02:19:12,811 epoch 5 - iter 43/432 - loss 0.15083744 - time (sec): 14.45 - samples/sec: 412.38 - lr: 0.000020 - momentum: 0.000000
         
     | 
| 138 | 
         
            +
            2023-10-19 02:19:27,463 epoch 5 - iter 86/432 - loss 0.15320825 - time (sec): 29.10 - samples/sec: 418.60 - lr: 0.000019 - momentum: 0.000000
         
     | 
| 139 | 
         
            +
            2023-10-19 02:19:42,164 epoch 5 - iter 129/432 - loss 0.15857775 - time (sec): 43.80 - samples/sec: 428.40 - lr: 0.000019 - momentum: 0.000000
         
     | 
| 140 | 
         
            +
            2023-10-19 02:19:57,171 epoch 5 - iter 172/432 - loss 0.15560054 - time (sec): 58.81 - samples/sec: 426.51 - lr: 0.000019 - momentum: 0.000000
         
     | 
| 141 | 
         
            +
            2023-10-19 02:20:12,653 epoch 5 - iter 215/432 - loss 0.15299635 - time (sec): 74.29 - samples/sec: 412.79 - lr: 0.000018 - momentum: 0.000000
         
     | 
| 142 | 
         
            +
            2023-10-19 02:20:26,815 epoch 5 - iter 258/432 - loss 0.15415565 - time (sec): 88.45 - samples/sec: 413.63 - lr: 0.000018 - momentum: 0.000000
         
     | 
| 143 | 
         
            +
            2023-10-19 02:20:41,252 epoch 5 - iter 301/432 - loss 0.15470623 - time (sec): 102.89 - samples/sec: 415.99 - lr: 0.000018 - momentum: 0.000000
         
     | 
| 144 | 
         
            +
            2023-10-19 02:20:57,180 epoch 5 - iter 344/432 - loss 0.15738806 - time (sec): 118.82 - samples/sec: 413.47 - lr: 0.000017 - momentum: 0.000000
         
     | 
| 145 | 
         
            +
            2023-10-19 02:21:12,778 epoch 5 - iter 387/432 - loss 0.15861707 - time (sec): 134.41 - samples/sec: 411.86 - lr: 0.000017 - momentum: 0.000000
         
     | 
| 146 | 
         
            +
            2023-10-19 02:21:28,825 epoch 5 - iter 430/432 - loss 0.15871889 - time (sec): 150.46 - samples/sec: 409.68 - lr: 0.000017 - momentum: 0.000000
         
     | 
| 147 | 
         
            +
            2023-10-19 02:21:29,386 ----------------------------------------------------------------------------------------------------
         
     | 
| 148 | 
         
            +
            2023-10-19 02:21:29,387 EPOCH 5 done: loss 0.1591 - lr: 0.000017
         
     | 
| 149 | 
         
            +
            2023-10-19 02:21:42,711 DEV : loss 0.3180293142795563 - f1-score (micro avg)  0.8294
         
     | 
| 150 | 
         
            +
            2023-10-19 02:21:42,736 saving best model
         
     | 
| 151 | 
         
            +
            2023-10-19 02:21:43,978 ----------------------------------------------------------------------------------------------------
         
     | 
| 152 | 
         
            +
            2023-10-19 02:21:58,649 epoch 6 - iter 43/432 - loss 0.11722725 - time (sec): 14.67 - samples/sec: 429.78 - lr: 0.000016 - momentum: 0.000000
         
     | 
| 153 | 
         
            +
            2023-10-19 02:22:13,269 epoch 6 - iter 86/432 - loss 0.11922248 - time (sec): 29.29 - samples/sec: 428.61 - lr: 0.000016 - momentum: 0.000000
         
     | 
| 154 | 
         
            +
            2023-10-19 02:22:29,215 epoch 6 - iter 129/432 - loss 0.11571691 - time (sec): 45.24 - samples/sec: 417.24 - lr: 0.000016 - momentum: 0.000000
         
     | 
| 155 | 
         
            +
            2023-10-19 02:22:44,481 epoch 6 - iter 172/432 - loss 0.11512543 - time (sec): 60.50 - samples/sec: 415.94 - lr: 0.000015 - momentum: 0.000000
         
     | 
| 156 | 
         
            +
            2023-10-19 02:22:59,365 epoch 6 - iter 215/432 - loss 0.11922747 - time (sec): 75.39 - samples/sec: 415.99 - lr: 0.000015 - momentum: 0.000000
         
     | 
| 157 | 
         
            +
            2023-10-19 02:23:13,706 epoch 6 - iter 258/432 - loss 0.12213480 - time (sec): 89.73 - samples/sec: 412.85 - lr: 0.000015 - momentum: 0.000000
         
     | 
| 158 | 
         
            +
            2023-10-19 02:23:28,113 epoch 6 - iter 301/432 - loss 0.12324684 - time (sec): 104.13 - samples/sec: 413.96 - lr: 0.000014 - momentum: 0.000000
         
     | 
| 159 | 
         
            +
            2023-10-19 02:23:42,639 epoch 6 - iter 344/432 - loss 0.12430268 - time (sec): 118.66 - samples/sec: 415.94 - lr: 0.000014 - momentum: 0.000000
         
     | 
| 160 | 
         
            +
            2023-10-19 02:23:57,051 epoch 6 - iter 387/432 - loss 0.12584428 - time (sec): 133.07 - samples/sec: 416.41 - lr: 0.000014 - momentum: 0.000000
         
     | 
| 161 | 
         
            +
            2023-10-19 02:24:11,447 epoch 6 - iter 430/432 - loss 0.12693815 - time (sec): 147.47 - samples/sec: 418.17 - lr: 0.000013 - momentum: 0.000000
         
     | 
| 162 | 
         
            +
            2023-10-19 02:24:12,214 ----------------------------------------------------------------------------------------------------
         
     | 
| 163 | 
         
            +
            2023-10-19 02:24:12,215 EPOCH 6 done: loss 0.1269 - lr: 0.000013
         
     | 
| 164 | 
         
            +
            2023-10-19 02:24:25,523 DEV : loss 0.32838910818099976 - f1-score (micro avg)  0.8206
         
     | 
| 165 | 
         
            +
            2023-10-19 02:24:25,547 ----------------------------------------------------------------------------------------------------
         
     | 
| 166 | 
         
            +
            2023-10-19 02:24:39,692 epoch 7 - iter 43/432 - loss 0.08912504 - time (sec): 14.14 - samples/sec: 441.62 - lr: 0.000013 - momentum: 0.000000
         
     | 
| 167 | 
         
            +
            2023-10-19 02:24:54,333 epoch 7 - iter 86/432 - loss 0.09693013 - time (sec): 28.78 - samples/sec: 423.29 - lr: 0.000013 - momentum: 0.000000
         
     | 
| 168 | 
         
            +
            2023-10-19 02:25:09,997 epoch 7 - iter 129/432 - loss 0.09674099 - time (sec): 44.45 - samples/sec: 416.87 - lr: 0.000012 - momentum: 0.000000
         
     | 
| 169 | 
         
            +
            2023-10-19 02:25:24,036 epoch 7 - iter 172/432 - loss 0.09757105 - time (sec): 58.49 - samples/sec: 418.36 - lr: 0.000012 - momentum: 0.000000
         
     | 
| 170 | 
         
            +
            2023-10-19 02:25:38,207 epoch 7 - iter 215/432 - loss 0.09807207 - time (sec): 72.66 - samples/sec: 416.52 - lr: 0.000012 - momentum: 0.000000
         
     | 
| 171 | 
         
            +
            2023-10-19 02:25:53,354 epoch 7 - iter 258/432 - loss 0.09815033 - time (sec): 87.81 - samples/sec: 414.87 - lr: 0.000011 - momentum: 0.000000
         
     | 
| 172 | 
         
            +
            2023-10-19 02:26:08,690 epoch 7 - iter 301/432 - loss 0.09658140 - time (sec): 103.14 - samples/sec: 415.61 - lr: 0.000011 - momentum: 0.000000
         
     | 
| 173 | 
         
            +
            2023-10-19 02:26:23,350 epoch 7 - iter 344/432 - loss 0.09822029 - time (sec): 117.80 - samples/sec: 414.25 - lr: 0.000011 - momentum: 0.000000
         
     | 
| 174 | 
         
            +
            2023-10-19 02:26:38,076 epoch 7 - iter 387/432 - loss 0.09934983 - time (sec): 132.53 - samples/sec: 417.40 - lr: 0.000010 - momentum: 0.000000
         
     | 
| 175 | 
         
            +
            2023-10-19 02:26:53,511 epoch 7 - iter 430/432 - loss 0.10039982 - time (sec): 147.96 - samples/sec: 416.66 - lr: 0.000010 - momentum: 0.000000
         
     | 
| 176 | 
         
            +
            2023-10-19 02:26:53,991 ----------------------------------------------------------------------------------------------------
         
     | 
| 177 | 
         
            +
            2023-10-19 02:26:53,991 EPOCH 7 done: loss 0.1008 - lr: 0.000010
         
     | 
| 178 | 
         
            +
            2023-10-19 02:27:07,624 DEV : loss 0.34814995527267456 - f1-score (micro avg)  0.832
         
     | 
| 179 | 
         
            +
            2023-10-19 02:27:07,647 saving best model
         
     | 
| 180 | 
         
            +
            2023-10-19 02:27:08,914 ----------------------------------------------------------------------------------------------------
         
     | 
| 181 | 
         
            +
            2023-10-19 02:27:22,666 epoch 8 - iter 43/432 - loss 0.10350894 - time (sec): 13.75 - samples/sec: 470.25 - lr: 0.000010 - momentum: 0.000000
         
     | 
| 182 | 
         
            +
            2023-10-19 02:27:36,004 epoch 8 - iter 86/432 - loss 0.09852409 - time (sec): 27.09 - samples/sec: 476.91 - lr: 0.000009 - momentum: 0.000000
         
     | 
| 183 | 
         
            +
            2023-10-19 02:27:50,049 epoch 8 - iter 129/432 - loss 0.09286219 - time (sec): 41.13 - samples/sec: 465.85 - lr: 0.000009 - momentum: 0.000000
         
     | 
| 184 | 
         
            +
            2023-10-19 02:28:03,617 epoch 8 - iter 172/432 - loss 0.08743720 - time (sec): 54.70 - samples/sec: 455.60 - lr: 0.000009 - momentum: 0.000000
         
     | 
| 185 | 
         
            +
            2023-10-19 02:28:17,277 epoch 8 - iter 215/432 - loss 0.08573511 - time (sec): 68.36 - samples/sec: 459.17 - lr: 0.000008 - momentum: 0.000000
         
     | 
| 186 | 
         
            +
            2023-10-19 02:28:30,969 epoch 8 - iter 258/432 - loss 0.08331380 - time (sec): 82.05 - samples/sec: 462.63 - lr: 0.000008 - momentum: 0.000000
         
     | 
| 187 | 
         
            +
            2023-10-19 02:28:44,354 epoch 8 - iter 301/432 - loss 0.08249316 - time (sec): 95.44 - samples/sec: 457.26 - lr: 0.000008 - momentum: 0.000000
         
     | 
| 188 | 
         
            +
            2023-10-19 02:28:58,899 epoch 8 - iter 344/432 - loss 0.08196643 - time (sec): 109.98 - samples/sec: 448.87 - lr: 0.000007 - momentum: 0.000000
         
     | 
| 189 | 
         
            +
            2023-10-19 02:29:12,883 epoch 8 - iter 387/432 - loss 0.08297009 - time (sec): 123.97 - samples/sec: 447.78 - lr: 0.000007 - momentum: 0.000000
         
     | 
| 190 | 
         
            +
            2023-10-19 02:29:26,795 epoch 8 - iter 430/432 - loss 0.08266864 - time (sec): 137.88 - samples/sec: 447.49 - lr: 0.000007 - momentum: 0.000000
         
     | 
| 191 | 
         
            +
            2023-10-19 02:29:27,277 ----------------------------------------------------------------------------------------------------
         
     | 
| 192 | 
         
            +
            2023-10-19 02:29:27,277 EPOCH 8 done: loss 0.0826 - lr: 0.000007
         
     | 
| 193 | 
         
            +
            2023-10-19 02:29:39,277 DEV : loss 0.34838762879371643 - f1-score (micro avg)  0.8366
         
     | 
| 194 | 
         
            +
            2023-10-19 02:29:39,301 saving best model
         
     | 
| 195 | 
         
            +
            2023-10-19 02:29:40,574 ----------------------------------------------------------------------------------------------------
         
     | 
| 196 | 
         
            +
            2023-10-19 02:29:53,319 epoch 9 - iter 43/432 - loss 0.06186272 - time (sec): 12.74 - samples/sec: 474.43 - lr: 0.000006 - momentum: 0.000000
         
     | 
| 197 | 
         
            +
            2023-10-19 02:30:08,597 epoch 9 - iter 86/432 - loss 0.06634905 - time (sec): 28.02 - samples/sec: 422.46 - lr: 0.000006 - momentum: 0.000000
         
     | 
| 198 | 
         
            +
            2023-10-19 02:30:22,346 epoch 9 - iter 129/432 - loss 0.07167375 - time (sec): 41.77 - samples/sec: 424.58 - lr: 0.000006 - momentum: 0.000000
         
     | 
| 199 | 
         
            +
            2023-10-19 02:30:36,275 epoch 9 - iter 172/432 - loss 0.06969364 - time (sec): 55.70 - samples/sec: 426.04 - lr: 0.000005 - momentum: 0.000000
         
     | 
| 200 | 
         
            +
            2023-10-19 02:30:50,140 epoch 9 - iter 215/432 - loss 0.06782716 - time (sec): 69.56 - samples/sec: 430.13 - lr: 0.000005 - momentum: 0.000000
         
     | 
| 201 | 
         
            +
            2023-10-19 02:31:04,623 epoch 9 - iter 258/432 - loss 0.06830674 - time (sec): 84.05 - samples/sec: 428.69 - lr: 0.000005 - momentum: 0.000000
         
     | 
| 202 | 
         
            +
            2023-10-19 02:31:18,536 epoch 9 - iter 301/432 - loss 0.06808899 - time (sec): 97.96 - samples/sec: 431.85 - lr: 0.000004 - momentum: 0.000000
         
     | 
| 203 | 
         
            +
            2023-10-19 02:31:31,833 epoch 9 - iter 344/432 - loss 0.06544761 - time (sec): 111.26 - samples/sec: 438.17 - lr: 0.000004 - momentum: 0.000000
         
     | 
| 204 | 
         
            +
            2023-10-19 02:31:45,008 epoch 9 - iter 387/432 - loss 0.06628105 - time (sec): 124.43 - samples/sec: 443.94 - lr: 0.000004 - momentum: 0.000000
         
     | 
| 205 | 
         
            +
            2023-10-19 02:31:58,464 epoch 9 - iter 430/432 - loss 0.06698673 - time (sec): 137.89 - samples/sec: 446.90 - lr: 0.000003 - momentum: 0.000000
         
     | 
| 206 | 
         
            +
            2023-10-19 02:31:58,887 ----------------------------------------------------------------------------------------------------
         
     | 
| 207 | 
         
            +
            2023-10-19 02:31:58,887 EPOCH 9 done: loss 0.0669 - lr: 0.000003
         
     | 
| 208 | 
         
            +
            2023-10-19 02:32:10,770 DEV : loss 0.37735414505004883 - f1-score (micro avg)  0.8355
         
     | 
| 209 | 
         
            +
            2023-10-19 02:32:10,795 ----------------------------------------------------------------------------------------------------
         
     | 
| 210 | 
         
            +
            2023-10-19 02:32:24,493 epoch 10 - iter 43/432 - loss 0.06688660 - time (sec): 13.70 - samples/sec: 479.67 - lr: 0.000003 - momentum: 0.000000
         
     | 
| 211 | 
         
            +
            2023-10-19 02:32:38,648 epoch 10 - iter 86/432 - loss 0.06022775 - time (sec): 27.85 - samples/sec: 445.00 - lr: 0.000003 - momentum: 0.000000
         
     | 
| 212 | 
         
            +
            2023-10-19 02:32:52,077 epoch 10 - iter 129/432 - loss 0.05771265 - time (sec): 41.28 - samples/sec: 452.85 - lr: 0.000002 - momentum: 0.000000
         
     | 
| 213 | 
         
            +
            2023-10-19 02:33:05,697 epoch 10 - iter 172/432 - loss 0.05574235 - time (sec): 54.90 - samples/sec: 453.57 - lr: 0.000002 - momentum: 0.000000
         
     | 
| 214 | 
         
            +
            2023-10-19 02:33:19,731 epoch 10 - iter 215/432 - loss 0.05793529 - time (sec): 68.93 - samples/sec: 450.88 - lr: 0.000002 - momentum: 0.000000
         
     | 
| 215 | 
         
            +
            2023-10-19 02:33:32,567 epoch 10 - iter 258/432 - loss 0.05732065 - time (sec): 81.77 - samples/sec: 451.59 - lr: 0.000001 - momentum: 0.000000
         
     | 
| 216 | 
         
            +
            2023-10-19 02:33:45,896 epoch 10 - iter 301/432 - loss 0.05663405 - time (sec): 95.10 - samples/sec: 448.98 - lr: 0.000001 - momentum: 0.000000
         
     | 
| 217 | 
         
            +
            2023-10-19 02:33:59,944 epoch 10 - iter 344/432 - loss 0.05723962 - time (sec): 109.15 - samples/sec: 448.98 - lr: 0.000001 - momentum: 0.000000
         
     | 
| 218 | 
         
            +
            2023-10-19 02:34:13,929 epoch 10 - iter 387/432 - loss 0.05878775 - time (sec): 123.13 - samples/sec: 447.55 - lr: 0.000000 - momentum: 0.000000
         
     | 
| 219 | 
         
            +
            2023-10-19 02:34:27,981 epoch 10 - iter 430/432 - loss 0.05901642 - time (sec): 137.18 - samples/sec: 449.90 - lr: 0.000000 - momentum: 0.000000
         
     | 
| 220 | 
         
            +
            2023-10-19 02:34:28,415 ----------------------------------------------------------------------------------------------------
         
     | 
| 221 | 
         
            +
            2023-10-19 02:34:28,415 EPOCH 10 done: loss 0.0589 - lr: 0.000000
         
     | 
| 222 | 
         
            +
            2023-10-19 02:34:40,654 DEV : loss 0.38429826498031616 - f1-score (micro avg)  0.8381
         
     | 
| 223 | 
         
            +
            2023-10-19 02:34:40,679 saving best model
         
     | 
| 224 | 
         
            +
            2023-10-19 02:34:42,621 ----------------------------------------------------------------------------------------------------
         
     | 
| 225 | 
         
            +
            2023-10-19 02:34:42,623 Loading model from best epoch ...
         
     | 
| 226 | 
         
            +
            2023-10-19 02:34:44,813 SequenceTagger predicts: Dictionary with 81 tags: O, S-location-route, B-location-route, E-location-route, I-location-route, S-location-stop, B-location-stop, E-location-stop, I-location-stop, S-trigger, B-trigger, E-trigger, I-trigger, S-organization-company, B-organization-company, E-organization-company, I-organization-company, S-location-city, B-location-city, E-location-city, I-location-city, S-location, B-location, E-location, I-location, S-event-cause, B-event-cause, E-event-cause, I-event-cause, S-location-street, B-location-street, E-location-street, I-location-street, S-time, B-time, E-time, I-time, S-date, B-date, E-date, I-date, S-number, B-number, E-number, I-number, S-duration, B-duration, E-duration, I-duration, S-organization
         
     | 
| 227 | 
         
            +
            2023-10-19 02:35:01,412 
         
     | 
| 228 | 
         
            +
            Results:
         
     | 
| 229 | 
         
            +
            - F-score (micro) 0.7634
         
     | 
| 230 | 
         
            +
            - F-score (macro) 0.5759
         
     | 
| 231 | 
         
            +
            - Accuracy 0.6618
         
     | 
| 232 | 
         
            +
             
     | 
| 233 | 
         
            +
            By class:
         
     | 
| 234 | 
         
            +
                                  precision    recall  f1-score   support
         
     | 
| 235 | 
         
            +
             
     | 
| 236 | 
         
            +
                         trigger     0.7056    0.6158    0.6577       833
         
     | 
| 237 | 
         
            +
                   location-stop     0.8486    0.8353    0.8419       765
         
     | 
| 238 | 
         
            +
                        location     0.7905    0.8286    0.8091       665
         
     | 
| 239 | 
         
            +
                   location-city     0.8088    0.8746    0.8404       566
         
     | 
| 240 | 
         
            +
                            date     0.8836    0.8477    0.8653       394
         
     | 
| 241 | 
         
            +
                 location-street     0.9315    0.8808    0.9055       386
         
     | 
| 242 | 
         
            +
                            time     0.7889    0.8906    0.8367       256
         
     | 
| 243 | 
         
            +
                  location-route     0.7976    0.6937    0.7420       284
         
     | 
| 244 | 
         
            +
            organization-company     0.7946    0.7063    0.7479       252
         
     | 
| 245 | 
         
            +
                        distance     0.9940    1.0000    0.9970       167
         
     | 
| 246 | 
         
            +
                          number     0.6721    0.8255    0.7410       149
         
     | 
| 247 | 
         
            +
                        duration     0.3455    0.3497    0.3476       163
         
     | 
| 248 | 
         
            +
                     event-cause     0.0000    0.0000    0.0000         0
         
     | 
| 249 | 
         
            +
                   disaster-type     0.9375    0.4348    0.5941        69
         
     | 
| 250 | 
         
            +
                    organization     0.4706    0.5714    0.5161        28
         
     | 
| 251 | 
         
            +
                          person     0.3636    0.8000    0.5000        10
         
     | 
| 252 | 
         
            +
                             set     0.0000    0.0000    0.0000         0
         
     | 
| 253 | 
         
            +
                    org-position     0.0000    0.0000    0.0000         1
         
     | 
| 254 | 
         
            +
                           money     0.0000    0.0000    0.0000         0
         
     | 
| 255 | 
         
            +
             
     | 
| 256 | 
         
            +
                       micro avg     0.7503    0.7771    0.7634      4988
         
     | 
| 257 | 
         
            +
                       macro avg     0.5860    0.5871    0.5759      4988
         
     | 
| 258 | 
         
            +
                    weighted avg     0.7941    0.7771    0.7826      4988
         
     | 
| 259 | 
         
            +
             
     | 
| 260 | 
         
            +
            2023-10-19 02:35:01,413 ----------------------------------------------------------------------------------------------------
         
     |