File size: 396 Bytes
quant_stage:
  quant_modifiers:
    SmoothQuantModifier:
      smoothing_strength: 0.885
      mappings:
      - - ['re:.*qkv_proj']
        - re:.*input_layernorm
      - - ['re:.*gate_up_proj']
        - re:.*post_attention_layernorm
    GPTQModifier:
      sequential_update: true
      dampening_frac: 0.01
      ignore: [lm_head]
      scheme: W8A8
      targets: Linear
      observer: mse