Update modeling_phi3.py
Browse files- modeling_phi3.py +1 -2
modeling_phi3.py
CHANGED
|
@@ -35,7 +35,6 @@ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS
|
|
| 35 |
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
| 36 |
from transformers.processing_utils import Unpack
|
| 37 |
from transformers.utils import (
|
| 38 |
-
LossKwargs,
|
| 39 |
add_code_sample_docstrings,
|
| 40 |
add_start_docstrings,
|
| 41 |
add_start_docstrings_to_model_forward,
|
|
@@ -817,7 +816,7 @@ class Phi3Model(Phi3PreTrainedModel):
|
|
| 817 |
return causal_mask
|
| 818 |
|
| 819 |
|
| 820 |
-
class KwargsForCausalLM(FlashAttentionKwargs
|
| 821 |
|
| 822 |
|
| 823 |
class Phi3ForCausalLM(Phi3PreTrainedModel, GenerationMixin):
|
|
|
|
| 35 |
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
| 36 |
from transformers.processing_utils import Unpack
|
| 37 |
from transformers.utils import (
|
|
|
|
| 38 |
add_code_sample_docstrings,
|
| 39 |
add_start_docstrings,
|
| 40 |
add_start_docstrings_to_model_forward,
|
|
|
|
| 816 |
return causal_mask
|
| 817 |
|
| 818 |
|
| 819 |
+
class KwargsForCausalLM(FlashAttentionKwargs): ...
|
| 820 |
|
| 821 |
|
| 822 |
class Phi3ForCausalLM(Phi3PreTrainedModel, GenerationMixin):
|