| export RUN_NAME=single_latent | |
| # TODO update to not use tokenizer, instead use gpt2 one | |
| ./venv/bin/python train.py \ | |
| --t5_model_name_or_path="t5-base" \ | |
| --output_dir="output/${RUN_NAME}" \ | |
| --overwrite_output_dir \ | |
| --do_train \ | |
| --n_latent_tokens 1 \ | |
| --latent_token_size 32 \ | |
| --save_steps="2000" \ | |
| --block_size="128" \ | |
| --per_device_train_batch_size="100" \ | |
| --train_file="INVALID.txt" \ | |
| --overwrite_output_dir \ | |
| --num_train_epochs="1" \ | |
| # 200 batch size, 128 sequence len: ? (breaks) | |
| # 100 batch size, 128 sequence len: 252:38:58 | |
| # 10 batch size, 128 sequence len: 281:32:53 | |
| # Got ~12 hours to train, want 3 saves so one save every 4 hours | |