unifying the input shape of the text-only branch and the text-image branch
#19
by
cardcounter
- opened
modeling_internlm_xcomposer2.py
CHANGED
|
@@ -423,6 +423,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
|
|
| 423 |
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
|
| 424 |
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
|
| 425 |
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
|
|
|
|
| 426 |
Returns:
|
| 427 |
"""
|
| 428 |
|
|
@@ -458,7 +459,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
|
|
| 458 |
image, text, image_nums)
|
| 459 |
else:
|
| 460 |
to_regress_tokens, targets = self.text2emb(
|
| 461 |
-
text, add_special_tokens=True)
|
| 462 |
to_regress_embeds = self.model.tok_embeddings(
|
| 463 |
to_regress_tokens.input_ids)
|
| 464 |
attention_mask = to_regress_tokens.attention_mask
|
|
|
|
| 423 |
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
|
| 424 |
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
|
| 425 |
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
|
| 426 |
+
kwargs['samples']['text_input] should have dimension 1 x bs
|
| 427 |
Returns:
|
| 428 |
"""
|
| 429 |
|
|
|
|
| 459 |
image, text, image_nums)
|
| 460 |
else:
|
| 461 |
to_regress_tokens, targets = self.text2emb(
|
| 462 |
+
text[0], add_special_tokens=True)
|
| 463 |
to_regress_embeds = self.model.tok_embeddings(
|
| 464 |
to_regress_tokens.input_ids)
|
| 465 |
attention_mask = to_regress_tokens.attention_mask
|