jinaai
/

jina-bert-flash-implementation

Markus28 commited on Feb 22, 2024

Commit

0f43653

1 Parent(s): 3160695

feat: updated modeling_bert.py to allow MLM-only training

Files changed (1) hide show

modeling_bert.py CHANGED Viewed

@@ -494,24 +494,28 @@ class BertForPreTraining(BertPreTrainedModel):
                 )
         prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output)
-        total_loss = None
-        if labels is not None and next_sentence_label is not None:
-            if (
-                self.dense_seq_output and labels is not None
-            ):  # prediction_scores are already flattened
-                masked_lm_loss = self.mlm_loss(
-                    prediction_scores, labels.flatten()[masked_token_idx]
-                )
-            else:
-                masked_lm_loss = self.mlm_loss(
-                    rearrange(prediction_scores, "... v -> (...) v"),
-                    rearrange(labels, "... -> (...)"),
-                )
             next_sentence_loss = self.nsp_loss(
                 rearrange(seq_relationship_score, "... t -> (...) t"),
                 rearrange(next_sentence_label, "... -> (...)"),
-            )
-            total_loss = masked_lm_loss.float() + next_sentence_loss.float()
         return BertForPreTrainingOutput(
             loss=total_loss,

                 )
         prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output)
+        if (
+            self.dense_seq_output and labels is not None
+        ):  # prediction_scores are already flattened
+            masked_lm_loss = self.mlm_loss(
+                prediction_scores, labels.flatten()[masked_token_idx]
+            ).float()
+        elif labels is not None:
+            masked_lm_loss = self.mlm_loss(
+                rearrange(prediction_scores, "... v -> (...) v"),
+                rearrange(labels, "... -> (...)"),
+            ).float()
+        else:
+            masked_lm_loss = 0
+        if next_sentence_label is not None:
             next_sentence_loss = self.nsp_loss(
                 rearrange(seq_relationship_score, "... t -> (...) t"),
                 rearrange(next_sentence_label, "... -> (...)"),
+            ).float()
+        else:
+            next_sentence_loss = 0
+        total_loss = masked_lm_loss + next_sentence_loss
         return BertForPreTrainingOutput(
             loss=total_loss,