| { | |
| "batcher": null, | |
| "cacher": null, | |
| "compiler": null, | |
| "factorizer": null, | |
| "pruner": null, | |
| "quantizer": "llm_int8", | |
| "llm_int8_compute_dtype": "bfloat16", | |
| "llm_int8_double_quant": false, | |
| "llm_int8_enable_fp32_cpu_offload": false, | |
| "llm_int8_has_fp16_weight": false, | |
| "llm_int8_quant_type": "fp4", | |
| "llm_int8_threshold": 6.0, | |
| "llm_int8_weight_bits": 8, | |
| "batch_size": 1, | |
| "device": "cuda", | |
| "save_fns": [], | |
| "load_fns": [ | |
| "transformers", | |
| "transformers", | |
| "transformers" | |
| ], | |
| "reapply_after_load": { | |
| "factorizer": null, | |
| "pruner": null, | |
| "quantizer": null, | |
| "cacher": null, | |
| "compiler": null, | |
| "batcher": null | |
| }, | |
| "api_key": null | |
| } |