internlm
/

internlm-xcomposer2d5-7b

@@ -423,6 +423,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
                 Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
                 config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
                 (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
         Returns:
         """
@@ -458,7 +459,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
                     image, text, image_nums)
             else:
                 to_regress_tokens, targets = self.text2emb(
-                    text, add_special_tokens=True)
                 to_regress_embeds = self.model.tok_embeddings(
                     to_regress_tokens.input_ids)
                 attention_mask = to_regress_tokens.attention_mask

                 Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
                 config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
                 (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+            kwargs['samples']['text_input] should have dimension 1 x bs
         Returns:
         """
                     image, text, image_nums)
             else:
                 to_regress_tokens, targets = self.text2emb(
+                    text[0], add_special_tokens=True)
                 to_regress_embeds = self.model.tok_embeddings(
                     to_regress_tokens.input_ids)
                 attention_mask = to_regress_tokens.attention_mask