GY2233 commited on
Commit
7d5d5ba
·
verified ·
1 Parent(s): 46f874b

Upload R2R router config

Browse files
Files changed (1) hide show
  1. config.json +97 -0
config.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model_type": "HiddenStatesTokenLMHeadLogitsClassifier",
4
+ "init_args": {
5
+ "hidden_states_size": 2048,
6
+ "logits_size": 100,
7
+ "hidden_dims": [
8
+ 1024,
9
+ 1024,
10
+ 1024,
11
+ 1024,
12
+ 1024,
13
+ 1024
14
+ ],
15
+ "expansion_factor": 4,
16
+ "dropout_rate": 0.1,
17
+ "use_position_embedding": false,
18
+ "freeze_lm_head": true,
19
+ "normalize_input": false,
20
+ "pretrained_model_name": "Qwen/Qwen3-1.7B"
21
+ },
22
+ "model_specific_args": {},
23
+ "input_type": [
24
+ "hidden_states",
25
+ "token",
26
+ "logits"
27
+ ],
28
+ "output_type": "binary"
29
+ },
30
+ "data": {
31
+ "train": {
32
+ "path": [
33
+ "local:output_qwen3_1_7b/query_dataset_train/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset"
34
+ ],
35
+ "type": "divergent",
36
+ "input_prefix": "small_"
37
+ },
38
+ "test": {
39
+ "path": [
40
+ "local:output_qwen3_1_7b/query_dataset_validation/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset"
41
+ ],
42
+ "type": "divergent",
43
+ "input_prefix": "small_"
44
+ }
45
+ },
46
+ "training": {
47
+ "optimizer": {
48
+ "lr": 5e-05,
49
+ "weight_decay": 0.0005
50
+ },
51
+ "params": {
52
+ "num_epochs": 50,
53
+ "batch_size": 1024,
54
+ "patience": 10,
55
+ "device": "cuda"
56
+ },
57
+ "loss": {
58
+ "type": "BCEWithLogitsLoss",
59
+ "recall_factor": 1.0
60
+ },
61
+ "validation": {
62
+ "valid_freq": 2
63
+ },
64
+ "dtype": "float32"
65
+ },
66
+ "optimizing": {
67
+ "type": "threshold",
68
+ "min_recall": 0.95
69
+ },
70
+ "output": {
71
+ "output_dir": "resource/default_router_qwen3_1_7b.pt",
72
+ "checkpoint_dir": "output/checkpoint_qwen3_1_7b",
73
+ "model_name": null
74
+ },
75
+ "result": {
76
+ "model_path": "resource/default_router_qwen3_1_7b.pt/classifier_20250721_183847.pt",
77
+ "results": {
78
+ "threshold": 0.36636363636363634,
79
+ "best_epoch": 7,
80
+ "best_val_loss": 0.7067983349265603,
81
+ "final_metrics": {
82
+ "accuracy": 0.7341007184884746,
83
+ "precision": 0.17696475971794748,
84
+ "recall": 0.951471505652902,
85
+ "f1": 0.29842523063720233,
86
+ "positive_rate": 0.3195670204357768
87
+ },
88
+ "pre_opt_metrics": {
89
+ "accuracy": 0.6010652523999477,
90
+ "precision": 0.5730381077349015,
91
+ "recall": 0.9129135702481148,
92
+ "f1": 0.7041066981808973,
93
+ "positive_rate": 0.8283077168204037
94
+ }
95
+ }
96
+ }
97
+ }