HelpingAI
/

hai3.1-checkpoint-0002

@@ -5,13 +5,13 @@ library_name: transformers
 ---
 CURRENTLY IN TRAINING :)
-Currently, only the LLM section of this model is fully ready.
 ```py
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import torch
 # Load model and tokenizer
-model_name = "Abhaykoul/hai3.1-pretrainedv3"
 # Set device to CUDA if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -49,32 +49,56 @@ model.generate(
 ```
 Classfication section undertraining
 ```py
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-ckpt = "Abhaykoul/hai3.1-pretrainedv3"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = AutoModelForCausalLM.from_pretrained(ckpt, trust_remote_code=True).to(device).eval()
 tok = AutoTokenizer.from_pretrained(ckpt, trust_remote_code=True)
 if tok.pad_token is None:
     tok.pad_token = tok.eos_token
 text = "I am thrilled about my new job!"
 enc = tok([text], padding=True, truncation=True, max_length=2048, return_tensors="pt")
 enc = {k: v.to(device) for k, v in enc.items()}
 with torch.no_grad():
     out = model(input_ids=enc["input_ids"], attention_mask=enc.get("attention_mask"), output_hidden_states=True, return_dict=True, use_cache=False)
-    last = out.hidden_states[-1]
-    idx = (enc["attention_mask"].sum(dim=1) - 1).clamp(min=0)
-    pooled = last[torch.arange(last.size(0)), idx]
     logits = model.structured_lm_head(pooled)
-    pred_id = logits.argmax(dim=-1).item()
 print("Predicted class id:", pred_id)
-# Map id -> label using your dataset’s label list, e.g.:
-id2label = ["sadness","joy","love","anger","fear","surprise"]  # dair-ai/emotion
 print("Predicted label:", id2label[pred_id] if pred_id < len(id2label) else "unknown")
 ```

 ---
 CURRENTLY IN TRAINING :)
+Currently, only the LLM and Classfication section of this model are fully ready.
 ```py
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import torch
 # Load model and tokenizer
+model_name = "HelpingAI/hai3.1-checkpoint-0002"
 # Set device to CUDA if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 ```
 Classfication section undertraining
 ```py
+import os
+import json
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# Path to saved model (change if needed)
+ckpt = "HelpingAI/hai3.1-checkpoint-0002"  # or the HF name like HelpingAI/hai3.1-checkpoint-0001
+device = "cpu"
+print("Device:", device)
+model = AutoModelForCausalLM.from_pretrained(ckpt, trust_remote_code=True)
+model.to(device).eval()
 tok = AutoTokenizer.from_pretrained(ckpt, trust_remote_code=True)
 if tok.pad_token is None:
     tok.pad_token = tok.eos_token
+# Optional: try to load id2label from saved metadata
+id2label = None
+meta_path = os.path.join(ckpt, "label_map.json")
+if os.path.exists(meta_path):
+    try:
+        with open(meta_path, "r") as f:
+            meta = json.load(f)
+            id2label = meta.get("id2label")
+            print("Loaded id2label from", meta_path)
+    except Exception as e:
+        print("Failed to read label_map.json:", e)
+# Fallback id2label (only used if no metadata)
+if id2label is None:
+    id2label = ["HARMFUL_SEXUAL","HARMFUL_HATE","HARMFUL_VIOLENCE","HARMFUL_HARASSMENT","HARMFUL_LANGUAGE","HARMFUL_MISINFORMATION","SAFE"]
 text = "I am thrilled about my new job!"
 enc = tok([text], padding=True, truncation=True, max_length=2048, return_tensors="pt")
 enc = {k: v.to(device) for k, v in enc.items()}
 with torch.no_grad():
     out = model(input_ids=enc["input_ids"], attention_mask=enc.get("attention_mask"), output_hidden_states=True, return_dict=True, use_cache=False)
+    last = out.hidden_states[-1]  # [B, T, H]
+    # compute last-token index using attention_mask if available
+    if enc.get("attention_mask") is not None:
+        idx = (enc["attention_mask"].sum(dim=1) - 1).clamp(min=0)
+        pooled = last[torch.arange(last.size(0)), idx]
+    else:
+        pooled = last[:, -1, :]
     logits = model.structured_lm_head(pooled)
+    pred_id = int(logits.argmax(dim=-1).item())
 print("Predicted class id:", pred_id)
 print("Predicted label:", id2label[pred_id] if pred_id < len(id2label) else "unknown")
 ```