LLM Training API

Complete API reference for LLM training.

LLMTrainingParams

The main configuration class for LLM training.

from autotrain.trainers.clm.params import LLMTrainingParams

Basic Parameters

params = LLMTrainingParams(
    # Core parameters (always specify these)
    model="google/gemma-3-270m",       # Default: "google/gemma-3-270m"
    data_path="./data.jsonl",          # Default: "data"
    project_name="my-model",           # Default: "project-name"

    # Data splits
    train_split="train",               # Default: "train"
    valid_split=None,                  # Default: None
    max_samples=None,                  # Default: None (use all)
)

Trainer Selection

params = LLMTrainingParams(
    model="google/gemma-3-270m",
    data_path="./data.jsonl",
    project_name="my-model",

    trainer="sft",  # Default: "default" (pretraining). Options: sft, dpo, orpo, ppo, grpo, reward
)

Training Hyperparameters

params = LLMTrainingParams(
    model="google/gemma-3-270m",
    data_path="./data.jsonl",
    project_name="my-model",

    # Core hyperparameters (showing defaults)
    epochs=1,           # Default: 1
    batch_size=2,       # Default: 2
    lr=3e-5,            # Default: 3e-5
    warmup_ratio=0.1,   # Default: 0.1
    gradient_accumulation=4,  # Default: 4
    weight_decay=0.0,   # Default: 0.0
    max_grad_norm=1.0,  # Default: 1.0

    # Precision
    mixed_precision=None,  # Default: None (options: bf16, fp16, None)

    # Optimization
    optimizer="adamw_torch",  # Default: adamw_torch
    scheduler="linear",       # Default: linear
    seed=42,                  # Default: 42
)

PEFT/LoRA Configuration

params = LLMTrainingParams(
    model="meta-llama/Llama-3.2-1B",
    data_path="./data.jsonl",
    project_name="my-model",

    # Enable LoRA (default: False)
    peft=True,
    lora_r=16,           # Default: 16
    lora_alpha=32,       # Default: 32
    lora_dropout=0.05,   # Default: 0.05
    target_modules="all-linear",  # Default: all-linear

    # Quantization (optional)
    quantization="int4",  # Options: int4, int8, or None (default: None)

    # Merge after training (default is True - LoRA merged automatically)
    merge_adapter=True,
)

Data Processing

params = LLMTrainingParams(
    model="google/gemma-3-270m",
    data_path="./data.jsonl",
    project_name="my-model",

    # Text processing
    text_column="text",
    block_size=-1,            # Default: -1 (model default)
    model_max_length=2048,    # Auto-detected from model config (see note below)
    add_eos_token=True,       # Default: True
    padding="right",          # Default: "right"

    # Chat format
    chat_template=None,       # Auto-detect or specify
    apply_chat_template=True, # Default: True

    # Efficiency
    packing=None,             # Default: None (set True to enable)
    use_flash_attention_2=False,  # Default: False
    attn_implementation=None,     # Default: None
)

model_max_length Auto-Detection: This parameter is now auto-detected from the model’s config. For example, Gemma 2 (8192 tokens) and Gemma 3 (32K-128K tokens depending on variant) will automatically use their native context lengths. The default 2048 is only used as a fallback when auto-detection fails. Set explicitly to override.

DPO Parameters

params = LLMTrainingParams(
    model="meta-llama/Llama-3.2-1B",
    data_path="./preferences.jsonl",
    project_name="my-model",

    trainer="dpo",

    # DPO-specific
    dpo_beta=0.1,              # Default: 0.1
    max_prompt_length=128,     # Default: 128
    max_completion_length=None, # Default: None

    # Reference model (optional)
    model_ref=None,  # Uses same as model if None

    # Data columns (required for DPO)
    prompt_text_column="prompt",
    text_column="chosen",
    rejected_text_column="rejected",
)

ORPO Parameters

params = LLMTrainingParams(
    model="google/gemma-2-2b",
    data_path="./preferences.jsonl",
    project_name="my-model",

    trainer="orpo",

    # ORPO-specific
    dpo_beta=0.1,              # Default: 0.1
    max_prompt_length=128,     # Default: 128
    max_completion_length=None, # Default: None

    # Data columns (required for ORPO)
    prompt_text_column="prompt",
    text_column="chosen",
    rejected_text_column="rejected",
)

GRPO Parameters

params = LLMTrainingParams(
    model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    project_name="grpo-agent",

    trainer="grpo",

    # GRPO-specific (required)
    rl_env_module="my_envs.hotel_env",   # Python module path for the environment
    rl_env_class="HotelEnv",             # Class name in the environment module
    rl_num_generations=4,                # Default: 4 — completions per prompt

    # Shared RL parameters (used by both PPO and GRPO)
    rl_kl_coef=0.1,           # Default: 0.1 — KL divergence penalty (beta)
    rl_clip_range=0.2,        # Default: 0.2 — Clipping range (epsilon)
    rl_env_config=None,       # Default: None — JSON config for environment constructor
    rl_max_new_tokens=256,    # Default: 128 — Max tokens per completion
    rl_top_k=50,              # Default: 50
    rl_top_p=1.0,             # Default: 1.0
    rl_temperature=1.0,       # Default: 1.0

    # vLLM acceleration (optional)
    use_vllm=False,                    # Default: False — enable vLLM for faster generation
    vllm_mode="colocate",              # Default: "colocate" — or "server"
    vllm_gpu_memory_utilization=0.3,   # Default: 0.3 — GPU memory fraction for vLLM (colocate)
    vllm_server_url=None,              # Default: None — vLLM server URL (server mode)
    vllm_tensor_parallel_size=1,       # Default: 1 — GPUs for vLLM tensor parallelism
    vllm_server_gpus=1,                # Default: 1 — GPUs reserved for vLLM server
)

GRPO does not require data_path — the dataset is built by the environment’s build_dataset() method. Install pip install aitraining[vllm] for vLLM support.

See GRPO Training for the full environment interface (build_dataset, score_episode, get_tools).

Knowledge Distillation

params = LLMTrainingParams(
    model="google/gemma-3-270m",           # Student
    teacher_model="google/gemma-2-2b",     # Teacher
    data_path="./prompts.jsonl",
    project_name="distilled-model",

    use_distillation=True,
    distill_temperature=3.0,   # Default: 3.0
    distill_alpha=0.7,         # Default: 0.7
    distill_max_teacher_length=512,  # Default: 512
)

Logging & Saving

params = LLMTrainingParams(
    model="google/gemma-3-270m",
    data_path="./data.jsonl",
    project_name="my-model",

    # Logging
    log="wandb",  # wandb, tensorboard, or None (default: wandb)
    logging_steps=-1,     # Default: -1 (auto)
    wandb_visualizer=True,  # Terminal visualizer
    wandb_token=None,       # W&B API token (optional)

    # Checkpointing
    save_strategy="steps",  # steps or epoch (default: epoch)
    save_steps=500,
    save_total_limit=1,   # Default: 1
    eval_strategy="steps",
)

Hub Integration

params = LLMTrainingParams(
    model="google/gemma-3-270m",
    data_path="./data.jsonl",
    project_name="my-model",

    # Push to Hub
    push_to_hub=True,
    username="your-username",
    token="hf_...",
)

Running Training

from autotrain.project import AutoTrainProject

# Create and run project
project = AutoTrainProject(
    params=params,
    backend="local",
    process=True
)

job_id = project.create()

Complete Example

from autotrain.trainers.clm.params import LLMTrainingParams
from autotrain.project import AutoTrainProject

# Full configuration
params = LLMTrainingParams(
    # Model
    model="meta-llama/Llama-3.2-1B",
    project_name="llama-production",

    # Data
    data_path="./conversations.jsonl",
    train_split="train",
    valid_split="validation",
    text_column="text",
    block_size=2048,

    # Training
    trainer="sft",
    epochs=3,
    batch_size=2,
    gradient_accumulation=8,
    lr=2e-5,
    warmup_ratio=0.1,
    mixed_precision="bf16",

    # LoRA
    peft=True,
    lora_r=32,
    lora_alpha=64,
    lora_dropout=0.05,

    # Optimization
    use_flash_attention_2=True,
    packing=True,
    auto_find_batch_size=True,
    unsloth=False,  # Use Unsloth for faster training

    # Distribution (for multi-GPU)
    distributed_backend=None,  # None for auto (DDP), or "deepspeed"

    # Logging
    log="wandb",
    logging_steps=-1,
    save_strategy="steps",
    save_steps=500,
    save_total_limit=1,

    # Hub
    push_to_hub=True,
    username="my-username",
    token="hf_...",
)

# Run training
project = AutoTrainProject(
    params=params,
    backend="local",
    process=True
)
job_id = project.create()

Getting Started

Core Endpoints

Task APIs

SDKs & Integration

Real-time

LLM Endpoints

LLM Training API

LLMTrainingParams

Basic Parameters

Trainer Selection

Training Hyperparameters

PEFT/LoRA Configuration

Data Processing

DPO Parameters

ORPO Parameters

GRPO Parameters

Knowledge Distillation

Logging & Saving

Hub Integration

Running Training

Complete Example

Next Steps

Python SDK

DPO Training

Getting Started

Core Endpoints

Task APIs

SDKs & Integration

Real-time

​LLM Training API

​LLMTrainingParams

​Basic Parameters

​Trainer Selection

​Training Hyperparameters

​PEFT/LoRA Configuration

​Data Processing

​DPO Parameters

​ORPO Parameters

​GRPO Parameters

​Knowledge Distillation

​Logging & Saving

​Hub Integration

​Running Training

​Complete Example

​Next Steps

Python SDK

DPO Training

LLM Training API

LLMTrainingParams

Basic Parameters

Trainer Selection

Training Hyperparameters

PEFT/LoRA Configuration

Data Processing

DPO Parameters

ORPO Parameters

GRPO Parameters

Knowledge Distillation

Logging & Saving

Hub Integration

Running Training

Complete Example

Next Steps