| base_model: huggyllama/llama-13b | |
| base_model_config: huggyllama/llama-13b | |
| model_type: LlamaForCausalLM | |
| tokenizer_type: LlamaTokenizer | |
| load_in_8bit: false | |
| load_in_4bit: false | |
| gptq: false | |
| strict: false | |
| push_dataset_to_hub: winglian | |
| hf_use_auth_token: true | |
| datasets: | |
| - path: winglian/evals | |
| data_files: | |
| - hf/ARC-Challenge.jsonl | |
| - hf/ARC-Easy.jsonl | |
| - hf/riddle_sense.jsonl | |
| - hf/piqa.jsonl | |
| type: explainchoice:chat | |
| - path: winglian/evals | |
| data_files: | |
| - hf/gsm8k.jsonl | |
| - hf/winogrande.jsonl | |
| type: alpaca_chat.load_qa | |
| - path: winglian/evals | |
| data_files: | |
| - custom/n_task.jsonl | |
| - custom/misconceptions.jsonl | |
| - custom/context_insensitivity.jsonl | |
| type: alpaca_chat | |
| - path: camel-ai/math | |
| type: alpaca_chat.load_camel_ai | |
| - path: camel-ai/biology | |
| type: alpaca_chat.load_camel_ai | |
| - path: camel-ai/physics | |
| type: alpaca_chat.load_camel_ai | |
| - path: camel-ai/chemistry | |
| type: alpaca_chat.load_camel_ai | |
| - path: winglian/evals | |
| data_files: | |
| - custom/in_context_qa.jsonl | |
| type: context_qa | |
| - path: winglian/evals | |
| data_files: | |
| - custom/in_context_qa.jsonl | |
| type: context_qa.load_404 | |
| - path: winglian/evals | |
| data_files: | |
| - custom/jokes_explained_500up.jsonl | |
| type: sharegpt_jokes | |
| - path: winglian/evals | |
| data_files: | |
| - custom/classify-self-chat.sharegpt.jsonl | |
| - custom/coding-self-chat.sharegpt.jsonl | |
| - custom/prose-gpt4.sharegpt.jsonl | |
| - custom/prose-rewrite-gpt4.sharegpt.jsonl | |
| type: sharegpt_simple.load_role | |
| - path: winglian/evals | |
| data_files: | |
| - openai/tldr.jsonl | |
| type: summarizetldr:chat | |
| - path: winglian/evals | |
| data_files: | |
| - hellaswag/hellaswag.jsonl | |
| type: explainchoice:chat | |
| - path: metaeval/ScienceQA_text_only | |
| type: concisechoice:chat | |
| - path: teknium/GPT4-LLM-Cleaned | |
| type: alpaca_chat | |
| - path: teknium/GPTeacher-General-Instruct | |
| data_files: gpt4-instruct-similarity-0.6-dataset.json | |
| type: gpteacher:chat | |
| - path: QingyiSi/Alpaca-CoT | |
| data_files: | |
| - Chain-of-Thought/formatted_cot_data/aqua_train.json | |
| - Chain-of-Thought/formatted_cot_data/creak_train.json | |
| - Chain-of-Thought/formatted_cot_data/ecqa_train.json | |
| - Chain-of-Thought/formatted_cot_data/esnli_train.json | |
| - Chain-of-Thought/formatted_cot_data/qasc_train.json | |
| - Chain-of-Thought/formatted_cot_data/qed_train.json | |
| - Chain-of-Thought/formatted_cot_data/sensemaking_train.json | |
| - Chain-of-Thought/formatted_cot_data/strategyqa_train.json | |
| - GPTeacher/Roleplay/formatted_roleplay-similarity_0.6-instruct-dataset.json | |
| type: alpaca_chat | |
| - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered | |
| type: alpaca_chat | |
| - path: ehartford/wizard_vicuna_70k_unfiltered | |
| type: sharegpt:chat | |
| dataset_prepared_path: last_run_prepared | |
| val_set_size: 0.02 | |
| adapter: | |
| lora_model_dir: | |
| sequence_len: 2048 | |
| max_packed_sequence_len: 2048 | |
| lora_r: | |
| lora_alpha: | |
| lora_dropout: | |
| lora_target_modules: | |
| lora_target_linear: true | |
| lora_fan_in_fan_out: | |
| wandb_project: minotaur-13b | |
| wandb_watch: | |
| wandb_run_id: | |
| wandb_log_model: | |
| output_dir: ./minotaur-13b | |
| gradient_accumulation_steps: 1 | |
| micro_batch_size: 12 | |
| num_epochs: 3 | |
| optimizer: adamw_bnb_8bit | |
| torchdistx_path: | |
| lr_scheduler: cosine | |
| learning_rate: 0.00003 | |
| train_on_inputs: false | |
| group_by_length: true | |
| bf16: true | |
| fp16: false | |
| tf32: true | |
| gradient_checkpointing: true | |
| early_stopping_patience: | |
| resume_from_checkpoint: | |
| local_rank: | |
| logging_steps: 1 | |
| xformers_attention: true | |
| flash_attention: | |
| gptq_groupsize: | |
| gptq_model_v1: | |
| warmup_steps: 20 | |
| eval_steps: 38 | |
| save_steps: 56 | |
| load_best_model_at_end: false | |
| debug: | |
| deepspeed: | |
| weight_decay: 0.001 | |
| fsdp: | |
| - full_shard | |
| - auto_wrap | |
| fsdp_config: | |
| fsdp_offload_params: true | |
| fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer | |
| special_tokens: | |
| bos_token: "<s>" | |
| eos_token: "</s>" | |
| unk_token: "<unk>" | |