Skip to main content

Pipeline Automation

Build automated training pipelines for production workflows.

Simple Pipeline

Sequential Training

#!/bin/bash
# pipeline.sh

set -e  # Exit on error

# Step 1: Prepare data
echo "Step 1: Data preparation..."
python prepare_data.py

# Step 2: Train model
echo "Step 2: Training..."
aitraining llm --train \
  --model google/gemma-3-270m \
  --data-path ./processed_data \
  --project-name production-model

# Step 3: Evaluate
echo "Step 3: Evaluation..."
python evaluate_model.py --model-path ./production-model

# Step 4: Push to Hub (use huggingface-cli)
echo "Step 4: Deploying..."
huggingface-cli upload $HF_USERNAME/production-model ./production-model

echo "Pipeline complete!"

Python Pipeline

Using Python

import subprocess
import json
from pathlib import Path

def run_training(config):
    """Run training with config."""
    cmd = ["aitraining", "--config", config]
    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        raise RuntimeError(f"Training failed: {result.stderr}")

    return result.stdout

def get_training_metrics(model_path):
    """Get metrics from trainer_state.json."""
    state_file = Path(model_path) / "trainer_state.json"
    if state_file.exists():
        with open(state_file) as f:
            state = json.load(f)
            return {
                "best_metric": state.get("best_metric"),
                "global_step": state.get("global_step"),
                "epoch": state.get("epoch"),
            }
    return None

def main():
    # Train
    print("Training model...")
    run_training("config.yaml")

    # Get metrics from trainer state
    print("Checking results...")
    metrics = get_training_metrics("./output")
    print(f"Best metric: {metrics.get('best_metric')}")

    # Check quality gate (best_metric is typically eval_loss)
    if metrics.get("best_metric", 999) > 0.5:
        print("Quality gate failed!")
        return False

    print("Pipeline passed!")
    return True

if __name__ == "__main__":
    main()

Multi-Stage Pipeline

Training → Distillation → Evaluation

#!/bin/bash
# full_pipeline.sh

set -e

# Stage 1: Train teacher
echo "=== Stage 1: Training teacher model ==="
aitraining llm --train \
  --model google/gemma-2-2b \
  --data-path ./data \
  --project-name teacher-model \
  --epochs 3

# Stage 2: Distill to student
echo "=== Stage 2: Knowledge distillation ==="
aitraining llm --train \
  --model google/gemma-3-270m \
  --teacher-model ./teacher-model \
  --data-path ./data \
  --project-name student-model \
  --use-distillation \
  --distill-temperature 2.0 \
  --epochs 5

# Stage 3: Evaluate both
echo "=== Stage 3: Evaluation ==="
python compare_models.py \
  --teacher ./teacher-model \
  --student ./student-model

echo "Pipeline complete!"

Conditional Pipelines

With Quality Gates

#!/bin/bash
# quality_pipeline.sh

train_and_check() {
  local config=$1
  local threshold=$2

  # Train
  aitraining --config "$config"

  # Get best metric from trainer_state.json
  eval_loss=$(python -c "
import json
with open('output/trainer_state.json') as f:
    state = json.load(f)
    print(state.get('best_metric', 999))
")

  # Check threshold
  if (( $(echo "$eval_loss > $threshold" | bc -l) )); then
    echo "Quality gate failed: $eval_loss > $threshold"
    return 1
  fi

  return 0
}

# Run with quality gate
if train_and_check "config.yaml" 0.5; then
  echo "Proceeding to deployment..."
  # Deploy steps here
else
  echo "Pipeline stopped due to quality gate failure"
  exit 1
fi

Scheduled Training

Cron Job

# Add to crontab (crontab -e)
# Run daily at 2 AM
0 2 * * * /path/to/training_pipeline.sh >> /var/log/training.log 2>&1

With Data Updates

#!/bin/bash
# scheduled_training.sh

# Check for new data
NEW_DATA=$(find ./incoming -newer ./last_training_marker -type f | wc -l)

if [ "$NEW_DATA" -gt 0 ]; then
  echo "Found $NEW_DATA new files, starting training..."

  # Merge new data
  python merge_data.py

  # Train
  aitraining --config production.yaml

  # Update marker
  touch ./last_training_marker
else
  echo "No new data, skipping training"
fi

CI/CD Integration

GitHub Actions

# .github/workflows/train.yml
name: Train Model

on:
  push:
    branches: [main]
    paths:
      - 'data/**'
      - 'configs/**'

jobs:
  train:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3

      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install aitraining

      - name: Train model
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
        run: aitraining --config configs/production.yaml

Next Steps