Niharmahesh
/

Qwen_2.5_finetuned_using_cambrian_recipe

Safetensors

Model card Files Files and versions

xet

Community

Niharmahesh commited on Mar 28

Commit

b87a05f

verified ·

1 Parent(s): faa3867

Update stage_1/model_setup.py

Browse files

Files changed (1) hide show

stage_1/model_setup.py +14 -17

stage_1/model_setup.py CHANGED Viewed

@@ -31,50 +31,47 @@ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 MIN_PIXELS = 256 * 28 * 28
 MAX_PIXELS = 256 * 28 * 28
 def setup_model():
     """
-    Stage 2 configuration: Unfreeze LLM + connector while keeping vision encoder frozen
     """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(f"Using device: {device}")
     # Initialize model
     model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-        MODEL_ID,
         torch_dtype=torch.bfloat16,
         attn_implementation="flash_attention_2",
         device_map="auto" if torch.cuda.is_available() else None
     )
-    # Freeze entire model first
     # Freeze entire model first
     for param in model.parameters():
         param.requires_grad = False
-    # 1. Unfreeze vision merger (connector)
     for name, param in model.visual.named_parameters():
         if "merger" in name:
-            param.requires_grad = True
-    # 2. Unfreeze LLM (model + lm_head) WITHOUT affecting visual.merger
-    for name, param in model.named_parameters():
-        if any(k in name for k in ("model", "lm_head")):
-            param.requires_grad = True  # Only modifies LLM params
-    # 3. Training modes (rotary_emb auto-included)
     model.visual.merger.train()
-    model.model.train()
-    model.lm_head.train()
-    # Print trainable parameters
-    print("\n✅ Stage 2 Trainable Parameters:")
     for name, param in model.named_parameters():
         if param.requires_grad:
             print(f"- {name}")
     print("\nModule training states:")
     for name, module in model.named_modules():
         state = "train" if module.training else "eval"
         print(f"{name}: {state}")
     return model

 MIN_PIXELS = 256 * 28 * 28
 MAX_PIXELS = 256 * 28 * 28
+# Define model setup function (unchanged)
 def setup_model():
     """
+    Initialize and configure the Qwen2.5 VL model with selective parameter freezing.
+    Only the vision merger layers will be trainable, while the rest of the model will be frozen.
     """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(f"Using device: {device}")
     # Initialize model
     model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        "Qwen/Qwen2.5-VL-3B-Instruct",
         torch_dtype=torch.bfloat16,
         attn_implementation="flash_attention_2",
         device_map="auto" if torch.cuda.is_available() else None
     )
     # Freeze entire model first
     for param in model.parameters():
         param.requires_grad = False
+    # Unfreeze only vision merger layers
     for name, param in model.visual.named_parameters():
         if "merger" in name:
+            param.requires_grad = True  # Enable training for these parameters
+    # Force the merger to train mode
     model.visual.merger.train()
+    # Print trainable parameter names
+    print("\n✅ Verified trainable parameters:")
     for name, param in model.named_parameters():
         if param.requires_grad:
             print(f"- {name}")
+    # Print out the training state of all modules:
     print("\nModule training states:")
     for name, module in model.named_modules():
         state = "train" if module.training else "eval"
         print(f"{name}: {state}")
     return model