AbstractPhil commited on
Commit
0a86595
·
verified ·
1 Parent(s): cd72730

Update best model - Epoch 0 - Acc: 0.0892

Browse files
Files changed (3) hide show
  1. README.md +15 -8
  2. config.json +6 -6
  3. model.safetensors +2 -2
README.md CHANGED
@@ -18,8 +18,8 @@ for vision tasks. Trained on CIFAR-10.
18
 
19
  - **Architecture**: Vision Transformer with fractal positional encoding
20
  - **Dataset**: CIFAR-100 (100 classes)
21
- - **Embedding Dimension**: 512
22
- - **Depth**: 4 layers
23
  - **Patch Size**: 4x4
24
  - **PE Levels**: 12
25
  - **Simplex Dimension**: 5-simplex
@@ -27,8 +27,8 @@ for vision tasks. Trained on CIFAR-10.
27
  ## Training
28
 
29
  - **Dataset**: CIFAR-100
30
- - **Epochs**: 8
31
- - **Best Accuracy**: 0.2937
32
  - **Batch Size**: 512
33
  - **Learning Rate**: 0.001
34
 
@@ -44,17 +44,24 @@ for vision tasks. Trained on CIFAR-10.
44
  ```python
45
  from geovocab2.train.model.vit_beatrix import SimplifiedGeometricClassifier
46
  from safetensors.torch import load_file
 
 
 
 
 
 
 
47
 
48
  # Load model
49
  model = SimplifiedGeometricClassifier(
50
  num_classes=100, # CIFAR-100
51
  img_size=32,
52
- embed_dim=512,
53
- depth=4
54
  )
55
 
56
- # Load weights (renamed from model_best.safetensors to model.safetensors in Hub)
57
- state_dict = load_file("model.safetensors")
58
  model.load_state_dict(state_dict)
59
  model.eval()
60
 
 
18
 
19
  - **Architecture**: Vision Transformer with fractal positional encoding
20
  - **Dataset**: CIFAR-100 (100 classes)
21
+ - **Embedding Dimension**: 256
22
+ - **Depth**: 12 layers
23
  - **Patch Size**: 4x4
24
  - **PE Levels**: 12
25
  - **Simplex Dimension**: 5-simplex
 
27
  ## Training
28
 
29
  - **Dataset**: CIFAR-100
30
+ - **Epochs**: 0
31
+ - **Best Accuracy**: 0.0892
32
  - **Batch Size**: 512
33
  - **Learning Rate**: 0.001
34
 
 
44
  ```python
45
  from geovocab2.train.model.vit_beatrix import SimplifiedGeometricClassifier
46
  from safetensors.torch import load_file
47
+ from huggingface_hub import hf_hub_download
48
+
49
+ # Download weights from Hub
50
+ weights_path = hf_hub_download(
51
+ repo_id="AbstractPhil/vit-beatrix",
52
+ filename="weights/beatrix-cifar100/20251007_181653/model.safetensors"
53
+ )
54
 
55
  # Load model
56
  model = SimplifiedGeometricClassifier(
57
  num_classes=100, # CIFAR-100
58
  img_size=32,
59
+ embed_dim=256,
60
+ depth=12
61
  )
62
 
63
+ # Load weights
64
+ state_dict = load_file(weights_path)
65
  model.load_state_dict(state_dict)
66
  model.eval()
67
 
config.json CHANGED
@@ -2,9 +2,9 @@
2
  "num_classes": 100,
3
  "img_size": 32,
4
  "patch_size": 4,
5
- "embed_dim": 512,
6
  "k_simplex": 5,
7
- "depth": 4,
8
  "num_heads": 8,
9
  "mlp_ratio": 4.0,
10
  "dropout": 0.0,
@@ -13,10 +13,10 @@
13
  "pe_smooth_tau": 0.25,
14
  "simplex_feature_weight": 0.2,
15
  "batch_size": 512,
16
- "num_epochs": 50,
17
  "learning_rate": 0.001,
18
  "weight_decay": 0.005,
19
- "warmup_epochs": 10,
20
  "task_loss_weight": 1.0,
21
  "flow_loss_weight": 0.5,
22
  "coherence_loss_weight": 0.3,
@@ -34,10 +34,10 @@
34
  "timestamp_dirs": true,
35
  "push_to_hub": true,
36
  "hub_model_id": "AbstractPhil/vit-beatrix",
 
37
  "hub_upload_best_only": true,
38
  "use_tensorboard": true,
39
  "log_dir": "./logs",
40
  "log_every": 50,
41
- "train_baseline": false,
42
- "hub_token": null
43
  }
 
2
  "num_classes": 100,
3
  "img_size": 32,
4
  "patch_size": 4,
5
+ "embed_dim": 256,
6
  "k_simplex": 5,
7
+ "depth": 12,
8
  "num_heads": 8,
9
  "mlp_ratio": 4.0,
10
  "dropout": 0.0,
 
13
  "pe_smooth_tau": 0.25,
14
  "simplex_feature_weight": 0.2,
15
  "batch_size": 512,
16
+ "num_epochs": 10,
17
  "learning_rate": 0.001,
18
  "weight_decay": 0.005,
19
+ "warmup_epochs": 2,
20
  "task_loss_weight": 1.0,
21
  "flow_loss_weight": 0.5,
22
  "coherence_loss_weight": 0.3,
 
34
  "timestamp_dirs": true,
35
  "push_to_hub": true,
36
  "hub_model_id": "AbstractPhil/vit-beatrix",
37
+ "hub_model_name": "beatrix-cifar100",
38
  "hub_upload_best_only": true,
39
  "use_tensorboard": true,
40
  "log_dir": "./logs",
41
  "log_every": 50,
42
+ "train_baseline": false
 
43
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d014abe00fd634e8607d5421a9834b4dc7aae0b905e55e4491cd5612a052284b
3
- size 50831604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f25f11e2851e52c4ee631b191044282ff7e23236f4b6a7fed2388d32f045a29
3
+ size 38117196