| merge_method: della_linear | |
| base_model: migtissera/Tess-3-Llama-3.1-70B | |
| models: | |
| - model: Sao10K/L3-70B-Euryale-v2.1 | |
| parameters: | |
| weight: | |
| - filter: q_proj | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - filter: k_proj | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - filter: v_proj | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - filter: o_proj | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - filter: input_layernorm | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - filter: up_proj | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - filter: gate_proj | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - filter: down_proj | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - filter: post_attention_layernorm | |
| value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] | |
| - value: 0 | |
| density: 0.5 | |
| epsilon: 0.1 | |
| lambda: 1.0 | |
| - model: migtissera/Tess-3-Llama-3.1-70B | |
| parameters: | |
| weight: 1.0 | |
| density: | |
| - filter: q_proj | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - filter: k_proj | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - filter: v_proj | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - filter: o_proj | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - filter: input_layernorm | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - filter: up_proj | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - filter: gate_proj | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - filter: down_proj | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - filter: post_attention_layernorm | |
| value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] | |
| - value: 0.5 | |
| epsilon: | |
| - filter: q_proj | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - filter: k_proj | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - filter: v_proj | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - filter: o_proj | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - filter: input_layernorm | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - filter: up_proj | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - filter: gate_proj | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - filter: down_proj | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - filter: post_attention_layernorm | |
| value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] | |
| - value: 0.1 | |
| lambda: 1.0 | |
| dtype: bfloat16 | |
| out_dtype: bfloat16 | |
| parameters: | |
| int8_mask: true | |
| normalize: true | |
| rescale: true | |
| filter_wise: false | |
| chat_template: auto | |
| tokenizer: | |
| source: union | |