task: llm-orpo base_model: HuggingFaceTB/SmolLM2-1.7B-Instruct project_name: autotrain-smallm2-orpo log: tensorboard backend: local data: path: argilla/distilabel-capybara-dpo-7k-binarized train_split: train valid_split: null chat_template: chatml column_mapping: text_column: chosen rejected_text_column: rejected prompt_text_column: prompt params: block_size: 1024 model_max_length: 2048 max_prompt_length: 512 epochs: 3 batch_size: 2 lr: 3e-5 peft: true quantization: int4 target_modules: all-linear padding: right optimizer: adamw_torch scheduler: linear gradient_accumulation: 4 mixed_precision: fp16 hub: username: ${HF_USERNAME} token: ${HF_TOKEN} push_to_hub: false