| { | |
| "compression": [ | |
| { | |
| "algorithm": "movement_sparsity", | |
| "ignored_scopes": [ | |
| "{re}.*NNCFEmbedding.*", | |
| "{re}.*LayerNorm.*", | |
| "{re}.*pooler.*", | |
| "{re}.*classifier.*" | |
| ], | |
| "params": { | |
| "enable_structured_masking": true, | |
| "importance_regularization_factor": 0.05, | |
| "warmup_end_epoch": 2, | |
| "warmup_start_epoch": 1 | |
| }, | |
| "sparse_structure_by_scopes": [ | |
| { | |
| "mode": "block", | |
| "sparse_factors": [ | |
| 32, | |
| 32 | |
| ], | |
| "target_scopes": "{re}.*BertAttention.*" | |
| }, | |
| { | |
| "axis": 0, | |
| "mode": "per_dim", | |
| "target_scopes": "{re}.*BertIntermediate.*" | |
| }, | |
| { | |
| "axis": 1, | |
| "mode": "per_dim", | |
| "target_scopes": "{re}.*BertOutput.*" | |
| } | |
| ] | |
| }, | |
| { | |
| "algorithm": "quantization", | |
| "export_to_onnx_standard_ops": false, | |
| "ignored_scopes": [ | |
| "{re}.*__add___[0-1]", | |
| "{re}.*layer_norm_0", | |
| "{re}.*matmul_1", | |
| "{re}.*__truediv__*" | |
| ], | |
| "initializer": { | |
| "batchnorm_adaptation": { | |
| "num_bn_adaptation_samples": 200 | |
| }, | |
| "range": { | |
| "num_init_samples": 32, | |
| "params": { | |
| "max_percentile": 99.99, | |
| "min_percentile": 0.01 | |
| }, | |
| "type": "percentile" | |
| } | |
| }, | |
| "overflow_fix": "disable", | |
| "preset": "mixed", | |
| "scope_overrides": { | |
| "activations": { | |
| "{re}.*matmul_0": { | |
| "mode": "symmetric" | |
| } | |
| } | |
| } | |
| } | |
| ], | |
| "input_info": [ | |
| { | |
| "keyword": "input_ids", | |
| "sample_size": [ | |
| 32, | |
| 128 | |
| ], | |
| "type": "long" | |
| }, | |
| { | |
| "keyword": "token_type_ids", | |
| "sample_size": [ | |
| 32, | |
| 128 | |
| ], | |
| "type": "long" | |
| }, | |
| { | |
| "keyword": "attention_mask", | |
| "sample_size": [ | |
| 32, | |
| 128 | |
| ], | |
| "type": "long" | |
| } | |
| ], | |
| "log_dir": "jpqd-bert-base-ft-sst2", | |
| "optimum_version": "1.6.4", | |
| "save_onnx_model": false, | |
| "transformers_version": "4.26.1" | |
| } | |