Add new models (#17)

Browse files

- Add new models (217c82c37c43f78656b9b13478fbd8b89a3f6310)

Co-authored-by: Ivan Arcuschin <[email protected]>

Files changed (15) hide show

67/edges.pkl +3 -0
67/ll_model.pth +3 -0
67/ll_model_cfg.pkl +3 -0
67/meta.json +1 -0
71/edges.pkl +3 -0
71/ll_model.pth +3 -0
71/ll_model_cfg.pkl +3 -0
71/meta.json +1 -0
91/edges.pkl +3 -0
91/ll_model.pth +3 -0
91/ll_model_cfg.pkl +3 -0
91/meta.json +1 -0
benchmark_cases_metadata.csv +3 -0
benchmark_cases_metadata.parquet +2 -2
benchmark_metadata.json +375 -0

67/edges.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e945aab5d78b85637385b757100d2ecca98cd757462e675249ce4a6c74ef4fb3
+size 667

67/ll_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c03fe8705b1b01ab8f9020df80f3f06bd0d37780ec97f1de35ed385148ea79ba
+size 75626

67/ll_model_cfg.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ede2a8d814262f05c481f13b50315814be1ec23294e84ce9fdc0c86574f7a87c
+size 1100

67/meta.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.7, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-0.7-iit-0.7-b-0.4", "wandb_name": "case-67-seed-92-s-0.7-b-0.4-iit-0.7", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}

71/edges.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e945aab5d78b85637385b757100d2ecca98cd757462e675249ce4a6c74ef4fb3
+size 667

71/ll_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c34ead7aa3822ede0aa18af62f496f7482f392f73e509a8aacd564f13bc4dde3
+size 77454

71/ll_model_cfg.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87f7b3480ffb7944e2fc076320ff630bde55792609c0c56104352f379f593ad9
+size 1100

71/meta.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.4, "behavior_weight": 0.4, "strict_weight": 1.0, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-1-iit-0.4-b-0.4", "wandb_name": "case-71-seed-92-s-1-b-0.4-iit-0.4", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}

91/edges.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113

91/ll_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb2901707d8b6cac3afc14343af4defb5eeb00887f8b3bc9d77437be16cd0364
+size 14734

91/ll_model_cfg.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55939fede379285f7ea82d2f609634380a17b39d053cb68be0ae278d1f34cb52
+size 1093

91/meta.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.4, "behavior_weight": 0.4, "strict_weight": 1.0, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-1-iit-0.4-b-0.4", "wandb_name": "case-91-seed-92-s-1-b-0.4-iit-0.4", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}

benchmark_cases_metadata.csv CHANGED Viewed

@@ -8,6 +8,7 @@ case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,tr
 79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
@@ -32,6 +33,7 @@ ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect O
 101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
 65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
@@ -49,6 +51,7 @@ ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi
 122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,

 79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+71,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/71,Divide each element by the length of the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.08251369970070348,True,False,standard,False,60,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.4,0.4,1.0,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
 65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+91,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/91,Set all values below a threshold to 0,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.14368424162141993,True,False,standard,False,9,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.4,0.4,1.0,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+67,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/67,Multiply each element of the sequence by the length of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0917662935482247,True,False,standard,False,42,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.7,0.4,0.7,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,

benchmark_cases_metadata.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:049d63f22c918edb98d89c9eb69f091016c4780b033e5559523abd7e50188238
-size 76100

 version https://git-lfs.github.com/spec/v1
+oid sha256:085f02859e3d07ce9c964936ce063397f84ac7a80e01429a9da0efabd2aa2dea
+size 76400

benchmark_metadata.json CHANGED Viewed

@@ -1051,6 +1051,131 @@
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
     },
     {
       "case_id": "31",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
@@ -3991,6 +4116,131 @@
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
     },
     {
       "case_id": "14",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
@@ -6181,6 +6431,131 @@
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
     },
     {
       "case_id": "44",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",

       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
     },
+    {
+      "case_id": "71",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/71",
+      "task_description": "Divide each element by the length of the sequence",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 24,
+        "n_ctx": 10,
+        "d_head": 6,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 96,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.08251369970070348,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 60,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 13824,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 0.4,
+        "behavior_weight": 0.4,
+        "strict_weight": 1.0,
+        "epochs": 3000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 92,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl"
+    },
     {
       "case_id": "31",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
     },
+    {
+      "case_id": "91",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/91",
+      "task_description": "Set all values below a threshold to 0",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.14368424162141993,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 9,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 0.4,
+        "behavior_weight": 0.4,
+        "strict_weight": 1.0,
+        "epochs": 3000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 92,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl"
+    },
     {
       "case_id": "14",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
     },
+    {
+      "case_id": "67",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/67",
+      "task_description": "Multiply each element of the sequence by the length of the sequence.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 24,
+        "n_ctx": 10,
+        "d_head": 6,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 96,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.0917662935482247,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 42,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 13824,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 0.7,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 3000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 92,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl"
+    },
     {
       "case_id": "44",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",