Pipeline Automation
Build automated training pipelines for production workflows.Simple Pipeline
Sequential Training
Copy
#!/bin/bash
# pipeline.sh
set -e # Exit on error
# Step 1: Prepare data
echo "Step 1: Data preparation..."
python prepare_data.py
# Step 2: Train model
echo "Step 2: Training..."
aitraining llm --train \
--model google/gemma-3-270m \
--data-path ./processed_data \
--project-name production-model
# Step 3: Evaluate
echo "Step 3: Evaluation..."
python evaluate_model.py --model-path ./production-model
# Step 4: Push to Hub (use huggingface-cli)
echo "Step 4: Deploying..."
huggingface-cli upload $HF_USERNAME/production-model ./production-model
echo "Pipeline complete!"
Python Pipeline
Using Python
Copy
import subprocess
import json
from pathlib import Path
def run_training(config):
"""Run training with config."""
cmd = ["aitraining", "--config", config]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Training failed: {result.stderr}")
return result.stdout
def get_training_metrics(model_path):
"""Get metrics from trainer_state.json."""
state_file = Path(model_path) / "trainer_state.json"
if state_file.exists():
with open(state_file) as f:
state = json.load(f)
return {
"best_metric": state.get("best_metric"),
"global_step": state.get("global_step"),
"epoch": state.get("epoch"),
}
return None
def main():
# Train
print("Training model...")
run_training("config.yaml")
# Get metrics from trainer state
print("Checking results...")
metrics = get_training_metrics("./output")
print(f"Best metric: {metrics.get('best_metric')}")
# Check quality gate (best_metric is typically eval_loss)
if metrics.get("best_metric", 999) > 0.5:
print("Quality gate failed!")
return False
print("Pipeline passed!")
return True
if __name__ == "__main__":
main()
Multi-Stage Pipeline
Training → Distillation → Evaluation
Copy
#!/bin/bash
# full_pipeline.sh
set -e
# Stage 1: Train teacher
echo "=== Stage 1: Training teacher model ==="
aitraining llm --train \
--model google/gemma-2-2b \
--data-path ./data \
--project-name teacher-model \
--epochs 3
# Stage 2: Distill to student
echo "=== Stage 2: Knowledge distillation ==="
aitraining llm --train \
--model google/gemma-3-270m \
--teacher-model ./teacher-model \
--data-path ./data \
--project-name student-model \
--use-distillation \
--distill-temperature 2.0 \
--epochs 5
# Stage 3: Evaluate both
echo "=== Stage 3: Evaluation ==="
python compare_models.py \
--teacher ./teacher-model \
--student ./student-model
echo "Pipeline complete!"
Conditional Pipelines
With Quality Gates
Copy
#!/bin/bash
# quality_pipeline.sh
train_and_check() {
local config=$1
local threshold=$2
# Train
aitraining --config "$config"
# Get best metric from trainer_state.json
eval_loss=$(python -c "
import json
with open('output/trainer_state.json') as f:
state = json.load(f)
print(state.get('best_metric', 999))
")
# Check threshold
if (( $(echo "$eval_loss > $threshold" | bc -l) )); then
echo "Quality gate failed: $eval_loss > $threshold"
return 1
fi
return 0
}
# Run with quality gate
if train_and_check "config.yaml" 0.5; then
echo "Proceeding to deployment..."
# Deploy steps here
else
echo "Pipeline stopped due to quality gate failure"
exit 1
fi
Scheduled Training
Cron Job
Copy
# Add to crontab (crontab -e)
# Run daily at 2 AM
0 2 * * * /path/to/training_pipeline.sh >> /var/log/training.log 2>&1
With Data Updates
Copy
#!/bin/bash
# scheduled_training.sh
# Check for new data
NEW_DATA=$(find ./incoming -newer ./last_training_marker -type f | wc -l)
if [ "$NEW_DATA" -gt 0 ]; then
echo "Found $NEW_DATA new files, starting training..."
# Merge new data
python merge_data.py
# Train
aitraining --config production.yaml
# Update marker
touch ./last_training_marker
else
echo "No new data, skipping training"
fi
CI/CD Integration
GitHub Actions
Copy
# .github/workflows/train.yml
name: Train Model
on:
push:
branches: [main]
paths:
- 'data/**'
- 'configs/**'
jobs:
train:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: pip install aitraining
- name: Train model
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
run: aitraining --config configs/production.yaml