Deci
/

DeciLM-7B-instruct

@@ -43,14 +43,11 @@ from transformers.utils import (
 from transformers.utils.import_utils import is_torch_fx_available
 from .transformers_v4_35_2__configuration_llama import LlamaConfig
-if is_flash_attn_2_available():
-    def import_flash_attn():
-        from flash_attn import flash_attn_func, flash_attn_varlen_func
-        from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input  # noqa
-        return flash_attn_func, flash_attn_varlen_func, index_first_axis, pad_input, unpad_input
-    flash_attn_func, flash_attn_varlen_func, index_first_axis, pad_input, unpad_input = import_flash_attn()
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.

 from transformers.utils.import_utils import is_torch_fx_available
 from .transformers_v4_35_2__configuration_llama import LlamaConfig
+# Deci: commented out to prevent unnecessary dependency
+# if is_flash_attn_2_available():
+#     from flash_attn import flash_attn_func, flash_attn_varlen_func
+#     from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input  # noqa
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.