jinaai
/

jina-bert-flash-implementation

🇪🇺 Region: EU

Model card Files Files and versions

Markus28 commited on Mar 5, 2024

Commit

599c64e

·

1 Parent(s): 767b681

feat: added head_mask

Files changed (1) hide show

modeling_bert.py +4 -0

modeling_bert.py CHANGED Viewed

@@ -379,12 +379,16 @@ class BertModel(BertPreTrainedModel):
         task_type_ids=None,
         attention_mask=None,
         masked_tokens_mask=None,
     ):
         """If masked_tokens_mask is not None (i.e. last_layer_subset == True in BertForPreTraining),
         we only want the output for the masked tokens. This means that we only compute the last
         layer output for these tokens.
         masked_tokens_mask: (batch, seqlen), dtype=torch.bool
         """
         hidden_states = self.embeddings(
             input_ids, position_ids=position_ids, token_type_ids=token_type_ids
         )

         task_type_ids=None,
         attention_mask=None,
         masked_tokens_mask=None,
+        head_mask=None,
     ):
         """If masked_tokens_mask is not None (i.e. last_layer_subset == True in BertForPreTraining),
         we only want the output for the masked tokens. This means that we only compute the last
         layer output for these tokens.
         masked_tokens_mask: (batch, seqlen), dtype=torch.bool
         """
+        if head_mask is not None:
+            raise NotImplementedError('Masking heads is not supported')
         hidden_states = self.embeddings(
             input_ids, position_ids=position_ids, token_type_ids=token_type_ids
         )