Add new models (#17)
Browse files- Add new models (217c82c37c43f78656b9b13478fbd8b89a3f6310)
Co-authored-by: Ivan Arcuschin <[email protected]>
- 67/edges.pkl +3 -0
- 67/ll_model.pth +3 -0
- 67/ll_model_cfg.pkl +3 -0
- 67/meta.json +1 -0
- 71/edges.pkl +3 -0
- 71/ll_model.pth +3 -0
- 71/ll_model_cfg.pkl +3 -0
- 71/meta.json +1 -0
- 91/edges.pkl +3 -0
- 91/ll_model.pth +3 -0
- 91/ll_model_cfg.pkl +3 -0
- 91/meta.json +1 -0
- benchmark_cases_metadata.csv +3 -0
- benchmark_cases_metadata.parquet +2 -2
- benchmark_metadata.json +375 -0
67/edges.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e945aab5d78b85637385b757100d2ecca98cd757462e675249ce4a6c74ef4fb3
|
| 3 |
+
size 667
|
67/ll_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c03fe8705b1b01ab8f9020df80f3f06bd0d37780ec97f1de35ed385148ea79ba
|
| 3 |
+
size 75626
|
67/ll_model_cfg.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ede2a8d814262f05c481f13b50315814be1ec23294e84ce9fdc0c86574f7a87c
|
| 3 |
+
size 1100
|
67/meta.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.7, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-0.7-iit-0.7-b-0.4", "wandb_name": "case-67-seed-92-s-0.7-b-0.4-iit-0.7", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
71/edges.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e945aab5d78b85637385b757100d2ecca98cd757462e675249ce4a6c74ef4fb3
|
| 3 |
+
size 667
|
71/ll_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c34ead7aa3822ede0aa18af62f496f7482f392f73e509a8aacd564f13bc4dde3
|
| 3 |
+
size 77454
|
71/ll_model_cfg.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87f7b3480ffb7944e2fc076320ff630bde55792609c0c56104352f379f593ad9
|
| 3 |
+
size 1100
|
71/meta.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.4, "behavior_weight": 0.4, "strict_weight": 1.0, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-1-iit-0.4-b-0.4", "wandb_name": "case-71-seed-92-s-1-b-0.4-iit-0.4", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
91/edges.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
| 3 |
+
size 113
|
91/ll_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb2901707d8b6cac3afc14343af4defb5eeb00887f8b3bc9d77437be16cd0364
|
| 3 |
+
size 14734
|
91/ll_model_cfg.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55939fede379285f7ea82d2f609634380a17b39d053cb68be0ae278d1f34cb52
|
| 3 |
+
size 1093
|
91/meta.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.4, "behavior_weight": 0.4, "strict_weight": 1.0, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-1-iit-0.4-b-0.4", "wandb_name": "case-91-seed-92-s-1-b-0.4-iit-0.4", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
benchmark_cases_metadata.csv
CHANGED
|
@@ -8,6 +8,7 @@ case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,tr
|
|
| 8 |
79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 9 |
24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 10 |
82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
|
| 11 |
31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 12 |
72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 13 |
104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
@@ -32,6 +33,7 @@ ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect O
|
|
| 32 |
101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 33 |
ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
|
| 34 |
65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
|
| 35 |
14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 36 |
95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 37 |
84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
@@ -49,6 +51,7 @@ ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi
|
|
| 49 |
122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 50 |
85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 51 |
2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
|
| 52 |
44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 53 |
113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 54 |
77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
|
| 8 |
79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 9 |
24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 10 |
82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 11 |
+
71,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/71,Divide each element by the length of the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.08251369970070348,True,False,standard,False,60,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.4,0.4,1.0,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 12 |
31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 13 |
72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 14 |
104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
|
| 33 |
101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 34 |
ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
|
| 35 |
65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 36 |
+
91,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/91,Set all values below a threshold to 0,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.14368424162141993,True,False,standard,False,9,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.4,0.4,1.0,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 37 |
14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 38 |
95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 39 |
84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
|
| 51 |
122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 52 |
85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 53 |
2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 54 |
+
67,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/67,Multiply each element of the sequence by the length of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0917662935482247,True,False,standard,False,42,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.7,0.4,0.7,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 55 |
44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 56 |
113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
| 57 |
77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
benchmark_cases_metadata.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:085f02859e3d07ce9c964936ce063397f84ac7a80e01429a9da0efabd2aa2dea
|
| 3 |
+
size 76400
|
benchmark_metadata.json
CHANGED
|
@@ -1051,6 +1051,131 @@
|
|
| 1051 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
|
| 1052 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
|
| 1053 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1054 |
{
|
| 1055 |
"case_id": "31",
|
| 1056 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
|
|
@@ -3991,6 +4116,131 @@
|
|
| 3991 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
|
| 3992 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
|
| 3993 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3994 |
{
|
| 3995 |
"case_id": "14",
|
| 3996 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
|
|
@@ -6181,6 +6431,131 @@
|
|
| 6181 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
|
| 6182 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
|
| 6183 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6184 |
{
|
| 6185 |
"case_id": "44",
|
| 6186 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",
|
|
|
|
| 1051 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
|
| 1052 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
|
| 1053 |
},
|
| 1054 |
+
{
|
| 1055 |
+
"case_id": "71",
|
| 1056 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/71",
|
| 1057 |
+
"task_description": "Divide each element by the length of the sequence",
|
| 1058 |
+
"vocab": [
|
| 1059 |
+
0,
|
| 1060 |
+
1,
|
| 1061 |
+
2,
|
| 1062 |
+
3,
|
| 1063 |
+
4,
|
| 1064 |
+
5,
|
| 1065 |
+
6,
|
| 1066 |
+
7,
|
| 1067 |
+
8,
|
| 1068 |
+
9,
|
| 1069 |
+
10
|
| 1070 |
+
],
|
| 1071 |
+
"max_seq_len": 10,
|
| 1072 |
+
"min_seq_len": 4,
|
| 1073 |
+
"files": [
|
| 1074 |
+
{
|
| 1075 |
+
"file_name": "edges.pkl",
|
| 1076 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl"
|
| 1077 |
+
},
|
| 1078 |
+
{
|
| 1079 |
+
"file_name": "ll_model.pth",
|
| 1080 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth"
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"file_name": "ll_model_cfg.pkl",
|
| 1084 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl"
|
| 1085 |
+
},
|
| 1086 |
+
{
|
| 1087 |
+
"file_name": "meta.json",
|
| 1088 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json"
|
| 1089 |
+
}
|
| 1090 |
+
],
|
| 1091 |
+
"transformer_cfg": {
|
| 1092 |
+
"n_layers": 2,
|
| 1093 |
+
"d_model": 24,
|
| 1094 |
+
"n_ctx": 10,
|
| 1095 |
+
"d_head": 6,
|
| 1096 |
+
"model_name": "custom",
|
| 1097 |
+
"n_heads": 4,
|
| 1098 |
+
"d_mlp": 96,
|
| 1099 |
+
"act_fn": "gelu",
|
| 1100 |
+
"d_vocab": 13,
|
| 1101 |
+
"eps": 1e-05,
|
| 1102 |
+
"use_attn_result": true,
|
| 1103 |
+
"use_attn_scale": true,
|
| 1104 |
+
"use_split_qkv_input": true,
|
| 1105 |
+
"use_hook_mlp_in": true,
|
| 1106 |
+
"use_attn_in": false,
|
| 1107 |
+
"use_local_attn": false,
|
| 1108 |
+
"original_architecture": null,
|
| 1109 |
+
"from_checkpoint": false,
|
| 1110 |
+
"checkpoint_index": null,
|
| 1111 |
+
"checkpoint_label_type": null,
|
| 1112 |
+
"checkpoint_value": null,
|
| 1113 |
+
"tokenizer_name": null,
|
| 1114 |
+
"window_size": null,
|
| 1115 |
+
"attn_types": null,
|
| 1116 |
+
"init_mode": "gpt2",
|
| 1117 |
+
"normalization_type": null,
|
| 1118 |
+
"n_devices": 1,
|
| 1119 |
+
"attention_dir": "bidirectional",
|
| 1120 |
+
"attn_only": false,
|
| 1121 |
+
"seed": 0,
|
| 1122 |
+
"initializer_range": 0.08251369970070348,
|
| 1123 |
+
"init_weights": true,
|
| 1124 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 1125 |
+
"positional_embedding_type": "standard",
|
| 1126 |
+
"final_rms": false,
|
| 1127 |
+
"d_vocab_out": 60,
|
| 1128 |
+
"parallel_attn_mlp": false,
|
| 1129 |
+
"rotary_dim": null,
|
| 1130 |
+
"n_params": 13824,
|
| 1131 |
+
"use_hook_tokens": false,
|
| 1132 |
+
"gated_mlp": false,
|
| 1133 |
+
"default_prepend_bos": true,
|
| 1134 |
+
"dtype": "torch.float32",
|
| 1135 |
+
"tokenizer_prepends_bos": null,
|
| 1136 |
+
"n_key_value_heads": null,
|
| 1137 |
+
"post_embedding_ln": false,
|
| 1138 |
+
"rotary_base": 10000,
|
| 1139 |
+
"trust_remote_code": false,
|
| 1140 |
+
"rotary_adjacent_pairs": false,
|
| 1141 |
+
"load_in_4bit": false,
|
| 1142 |
+
"num_experts": null,
|
| 1143 |
+
"experts_per_token": null
|
| 1144 |
+
},
|
| 1145 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl",
|
| 1146 |
+
"training_args": {
|
| 1147 |
+
"output_dir": "/circuits-benchmark/results",
|
| 1148 |
+
"atol": 0.05,
|
| 1149 |
+
"lr": 0.001,
|
| 1150 |
+
"use_single_loss": true,
|
| 1151 |
+
"iit_weight": 0.4,
|
| 1152 |
+
"behavior_weight": 0.4,
|
| 1153 |
+
"strict_weight": 1.0,
|
| 1154 |
+
"epochs": 3000,
|
| 1155 |
+
"early_stop_accuracy_threshold": 99.9,
|
| 1156 |
+
"act_fn": "gelu",
|
| 1157 |
+
"use_wandb": true,
|
| 1158 |
+
"save_model_to_wandb": true,
|
| 1159 |
+
"clip_grad_norm": 0.1,
|
| 1160 |
+
"lr_scheduler": "linear",
|
| 1161 |
+
"model_pair": "strict",
|
| 1162 |
+
"same_size": false,
|
| 1163 |
+
"seed": 92,
|
| 1164 |
+
"batch_size": 256,
|
| 1165 |
+
"include_mlp": false,
|
| 1166 |
+
"detach_while_caching": true,
|
| 1167 |
+
"scheduler_val_metric": [
|
| 1168 |
+
"val/accuracy",
|
| 1169 |
+
"val/IIA",
|
| 1170 |
+
"val/strict_accuracy"
|
| 1171 |
+
],
|
| 1172 |
+
"siit_sampling": "sample_all",
|
| 1173 |
+
"val_iia_sampling": "all"
|
| 1174 |
+
},
|
| 1175 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json",
|
| 1176 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth",
|
| 1177 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl"
|
| 1178 |
+
},
|
| 1179 |
{
|
| 1180 |
"case_id": "31",
|
| 1181 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
|
|
|
|
| 4116 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
|
| 4117 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
|
| 4118 |
},
|
| 4119 |
+
{
|
| 4120 |
+
"case_id": "91",
|
| 4121 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/91",
|
| 4122 |
+
"task_description": "Set all values below a threshold to 0",
|
| 4123 |
+
"vocab": [
|
| 4124 |
+
0,
|
| 4125 |
+
1,
|
| 4126 |
+
2,
|
| 4127 |
+
3,
|
| 4128 |
+
4,
|
| 4129 |
+
5,
|
| 4130 |
+
6,
|
| 4131 |
+
7,
|
| 4132 |
+
8,
|
| 4133 |
+
9,
|
| 4134 |
+
10
|
| 4135 |
+
],
|
| 4136 |
+
"max_seq_len": 10,
|
| 4137 |
+
"min_seq_len": 4,
|
| 4138 |
+
"files": [
|
| 4139 |
+
{
|
| 4140 |
+
"file_name": "edges.pkl",
|
| 4141 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl"
|
| 4142 |
+
},
|
| 4143 |
+
{
|
| 4144 |
+
"file_name": "ll_model.pth",
|
| 4145 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth"
|
| 4146 |
+
},
|
| 4147 |
+
{
|
| 4148 |
+
"file_name": "ll_model_cfg.pkl",
|
| 4149 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl"
|
| 4150 |
+
},
|
| 4151 |
+
{
|
| 4152 |
+
"file_name": "meta.json",
|
| 4153 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json"
|
| 4154 |
+
}
|
| 4155 |
+
],
|
| 4156 |
+
"transformer_cfg": {
|
| 4157 |
+
"n_layers": 2,
|
| 4158 |
+
"d_model": 4,
|
| 4159 |
+
"n_ctx": 10,
|
| 4160 |
+
"d_head": 1,
|
| 4161 |
+
"model_name": "custom",
|
| 4162 |
+
"n_heads": 4,
|
| 4163 |
+
"d_mlp": 16,
|
| 4164 |
+
"act_fn": "gelu",
|
| 4165 |
+
"d_vocab": 13,
|
| 4166 |
+
"eps": 1e-05,
|
| 4167 |
+
"use_attn_result": true,
|
| 4168 |
+
"use_attn_scale": true,
|
| 4169 |
+
"use_split_qkv_input": true,
|
| 4170 |
+
"use_hook_mlp_in": true,
|
| 4171 |
+
"use_attn_in": false,
|
| 4172 |
+
"use_local_attn": false,
|
| 4173 |
+
"original_architecture": null,
|
| 4174 |
+
"from_checkpoint": false,
|
| 4175 |
+
"checkpoint_index": null,
|
| 4176 |
+
"checkpoint_label_type": null,
|
| 4177 |
+
"checkpoint_value": null,
|
| 4178 |
+
"tokenizer_name": null,
|
| 4179 |
+
"window_size": null,
|
| 4180 |
+
"attn_types": null,
|
| 4181 |
+
"init_mode": "gpt2",
|
| 4182 |
+
"normalization_type": null,
|
| 4183 |
+
"n_devices": 1,
|
| 4184 |
+
"attention_dir": "causal",
|
| 4185 |
+
"attn_only": false,
|
| 4186 |
+
"seed": 0,
|
| 4187 |
+
"initializer_range": 0.14368424162141993,
|
| 4188 |
+
"init_weights": true,
|
| 4189 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 4190 |
+
"positional_embedding_type": "standard",
|
| 4191 |
+
"final_rms": false,
|
| 4192 |
+
"d_vocab_out": 9,
|
| 4193 |
+
"parallel_attn_mlp": false,
|
| 4194 |
+
"rotary_dim": null,
|
| 4195 |
+
"n_params": 384,
|
| 4196 |
+
"use_hook_tokens": false,
|
| 4197 |
+
"gated_mlp": false,
|
| 4198 |
+
"default_prepend_bos": true,
|
| 4199 |
+
"dtype": "torch.float32",
|
| 4200 |
+
"tokenizer_prepends_bos": null,
|
| 4201 |
+
"n_key_value_heads": null,
|
| 4202 |
+
"post_embedding_ln": false,
|
| 4203 |
+
"rotary_base": 10000,
|
| 4204 |
+
"trust_remote_code": false,
|
| 4205 |
+
"rotary_adjacent_pairs": false,
|
| 4206 |
+
"load_in_4bit": false,
|
| 4207 |
+
"num_experts": null,
|
| 4208 |
+
"experts_per_token": null
|
| 4209 |
+
},
|
| 4210 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl",
|
| 4211 |
+
"training_args": {
|
| 4212 |
+
"output_dir": "/circuits-benchmark/results",
|
| 4213 |
+
"atol": 0.05,
|
| 4214 |
+
"lr": 0.001,
|
| 4215 |
+
"use_single_loss": true,
|
| 4216 |
+
"iit_weight": 0.4,
|
| 4217 |
+
"behavior_weight": 0.4,
|
| 4218 |
+
"strict_weight": 1.0,
|
| 4219 |
+
"epochs": 3000,
|
| 4220 |
+
"early_stop_accuracy_threshold": 99.9,
|
| 4221 |
+
"act_fn": "gelu",
|
| 4222 |
+
"use_wandb": true,
|
| 4223 |
+
"save_model_to_wandb": true,
|
| 4224 |
+
"clip_grad_norm": 0.1,
|
| 4225 |
+
"lr_scheduler": "linear",
|
| 4226 |
+
"model_pair": "strict",
|
| 4227 |
+
"same_size": false,
|
| 4228 |
+
"seed": 92,
|
| 4229 |
+
"batch_size": 256,
|
| 4230 |
+
"include_mlp": false,
|
| 4231 |
+
"detach_while_caching": true,
|
| 4232 |
+
"scheduler_val_metric": [
|
| 4233 |
+
"val/accuracy",
|
| 4234 |
+
"val/IIA",
|
| 4235 |
+
"val/strict_accuracy"
|
| 4236 |
+
],
|
| 4237 |
+
"siit_sampling": "sample_all",
|
| 4238 |
+
"val_iia_sampling": "all"
|
| 4239 |
+
},
|
| 4240 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json",
|
| 4241 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth",
|
| 4242 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl"
|
| 4243 |
+
},
|
| 4244 |
{
|
| 4245 |
"case_id": "14",
|
| 4246 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
|
|
|
|
| 6431 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
|
| 6432 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
|
| 6433 |
},
|
| 6434 |
+
{
|
| 6435 |
+
"case_id": "67",
|
| 6436 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/67",
|
| 6437 |
+
"task_description": "Multiply each element of the sequence by the length of the sequence.",
|
| 6438 |
+
"vocab": [
|
| 6439 |
+
0,
|
| 6440 |
+
1,
|
| 6441 |
+
2,
|
| 6442 |
+
3,
|
| 6443 |
+
4,
|
| 6444 |
+
5,
|
| 6445 |
+
6,
|
| 6446 |
+
7,
|
| 6447 |
+
8,
|
| 6448 |
+
9,
|
| 6449 |
+
10
|
| 6450 |
+
],
|
| 6451 |
+
"max_seq_len": 10,
|
| 6452 |
+
"min_seq_len": 4,
|
| 6453 |
+
"files": [
|
| 6454 |
+
{
|
| 6455 |
+
"file_name": "edges.pkl",
|
| 6456 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl"
|
| 6457 |
+
},
|
| 6458 |
+
{
|
| 6459 |
+
"file_name": "ll_model.pth",
|
| 6460 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth"
|
| 6461 |
+
},
|
| 6462 |
+
{
|
| 6463 |
+
"file_name": "ll_model_cfg.pkl",
|
| 6464 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl"
|
| 6465 |
+
},
|
| 6466 |
+
{
|
| 6467 |
+
"file_name": "meta.json",
|
| 6468 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json"
|
| 6469 |
+
}
|
| 6470 |
+
],
|
| 6471 |
+
"transformer_cfg": {
|
| 6472 |
+
"n_layers": 2,
|
| 6473 |
+
"d_model": 24,
|
| 6474 |
+
"n_ctx": 10,
|
| 6475 |
+
"d_head": 6,
|
| 6476 |
+
"model_name": "custom",
|
| 6477 |
+
"n_heads": 4,
|
| 6478 |
+
"d_mlp": 96,
|
| 6479 |
+
"act_fn": "gelu",
|
| 6480 |
+
"d_vocab": 13,
|
| 6481 |
+
"eps": 1e-05,
|
| 6482 |
+
"use_attn_result": true,
|
| 6483 |
+
"use_attn_scale": true,
|
| 6484 |
+
"use_split_qkv_input": true,
|
| 6485 |
+
"use_hook_mlp_in": true,
|
| 6486 |
+
"use_attn_in": false,
|
| 6487 |
+
"use_local_attn": false,
|
| 6488 |
+
"original_architecture": null,
|
| 6489 |
+
"from_checkpoint": false,
|
| 6490 |
+
"checkpoint_index": null,
|
| 6491 |
+
"checkpoint_label_type": null,
|
| 6492 |
+
"checkpoint_value": null,
|
| 6493 |
+
"tokenizer_name": null,
|
| 6494 |
+
"window_size": null,
|
| 6495 |
+
"attn_types": null,
|
| 6496 |
+
"init_mode": "gpt2",
|
| 6497 |
+
"normalization_type": null,
|
| 6498 |
+
"n_devices": 1,
|
| 6499 |
+
"attention_dir": "bidirectional",
|
| 6500 |
+
"attn_only": false,
|
| 6501 |
+
"seed": 0,
|
| 6502 |
+
"initializer_range": 0.0917662935482247,
|
| 6503 |
+
"init_weights": true,
|
| 6504 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 6505 |
+
"positional_embedding_type": "standard",
|
| 6506 |
+
"final_rms": false,
|
| 6507 |
+
"d_vocab_out": 42,
|
| 6508 |
+
"parallel_attn_mlp": false,
|
| 6509 |
+
"rotary_dim": null,
|
| 6510 |
+
"n_params": 13824,
|
| 6511 |
+
"use_hook_tokens": false,
|
| 6512 |
+
"gated_mlp": false,
|
| 6513 |
+
"default_prepend_bos": true,
|
| 6514 |
+
"dtype": "torch.float32",
|
| 6515 |
+
"tokenizer_prepends_bos": null,
|
| 6516 |
+
"n_key_value_heads": null,
|
| 6517 |
+
"post_embedding_ln": false,
|
| 6518 |
+
"rotary_base": 10000,
|
| 6519 |
+
"trust_remote_code": false,
|
| 6520 |
+
"rotary_adjacent_pairs": false,
|
| 6521 |
+
"load_in_4bit": false,
|
| 6522 |
+
"num_experts": null,
|
| 6523 |
+
"experts_per_token": null
|
| 6524 |
+
},
|
| 6525 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl",
|
| 6526 |
+
"training_args": {
|
| 6527 |
+
"output_dir": "/circuits-benchmark/results",
|
| 6528 |
+
"atol": 0.05,
|
| 6529 |
+
"lr": 0.001,
|
| 6530 |
+
"use_single_loss": true,
|
| 6531 |
+
"iit_weight": 0.7,
|
| 6532 |
+
"behavior_weight": 0.4,
|
| 6533 |
+
"strict_weight": 0.7,
|
| 6534 |
+
"epochs": 3000,
|
| 6535 |
+
"early_stop_accuracy_threshold": 99.9,
|
| 6536 |
+
"act_fn": "gelu",
|
| 6537 |
+
"use_wandb": true,
|
| 6538 |
+
"save_model_to_wandb": true,
|
| 6539 |
+
"clip_grad_norm": 0.1,
|
| 6540 |
+
"lr_scheduler": "linear",
|
| 6541 |
+
"model_pair": "strict",
|
| 6542 |
+
"same_size": false,
|
| 6543 |
+
"seed": 92,
|
| 6544 |
+
"batch_size": 256,
|
| 6545 |
+
"include_mlp": false,
|
| 6546 |
+
"detach_while_caching": true,
|
| 6547 |
+
"scheduler_val_metric": [
|
| 6548 |
+
"val/accuracy",
|
| 6549 |
+
"val/IIA",
|
| 6550 |
+
"val/strict_accuracy"
|
| 6551 |
+
],
|
| 6552 |
+
"siit_sampling": "sample_all",
|
| 6553 |
+
"val_iia_sampling": "all"
|
| 6554 |
+
},
|
| 6555 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json",
|
| 6556 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth",
|
| 6557 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl"
|
| 6558 |
+
},
|
| 6559 |
{
|
| 6560 |
"case_id": "44",
|
| 6561 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",
|