πŸ”βŒ˜K

Start typing to search docs.

Cost Management Runbook

1.0.0

Monitor Supabase costs and optimize shared infra.

Cost Management Runbook

This runbook provides comprehensive procedures for monitoring, optimizing, and controlling infrastructure costs across AWS and GCP deployments.

🎯 Overview

This runbook covers:

  • Cost monitoring and alerting
  • Budget management
  • Cost optimization strategies
  • Regular cost reviews
  • Emergency cost controls

πŸ’° Cost Monitoring

AWS Cost Monitoring

Daily Cost Check

#!/bin/bash
# aws-daily-cost-check.sh

echo "πŸ’° AWS Daily Cost Check"
echo "======================"

# Get yesterday's costs
YESTERDAY=$(date -d "yesterday" +%Y-%m-%d)
TODAY=$(date +%Y-%m-%d)

echo "πŸ“Š Costs for $YESTERDAY:"

aws ce get-cost-and-usage \
  --time-period Start=$YESTERDAY,End=$TODAY \
  --granularity DAILY \
  --metrics BlendedCost \
  --group-by Type=DIMENSION,Key=SERVICE \
  --query 'ResultsByTime[0].Groups[?Metrics.BlendedCost.Amount>`0.01`].[Keys[0],Metrics.BlendedCost.Amount]' \
  --output table

# Get month-to-date costs
MONTH_START=$(date +%Y-%m-01)

echo ""
echo "πŸ“ˆ Month-to-date costs:"

aws ce get-cost-and-usage \
  --time-period Start=$MONTH_START,End=$TODAY \
  --granularity MONTHLY \
  --metrics BlendedCost \
  --query 'ResultsByTime[0].Total.BlendedCost.Amount' \
  --output text

Weekly Cost Analysis

#!/bin/bash
# aws-weekly-cost-analysis.sh

echo "πŸ“Š AWS Weekly Cost Analysis"
echo "=========================="

# Get last 7 days of costs
WEEK_AGO=$(date -d "7 days ago" +%Y-%m-%d)
TODAY=$(date +%Y-%m-%d)

# Cost by service
echo "πŸ’Έ Top services by cost (last 7 days):"
aws ce get-cost-and-usage \
  --time-period Start=$WEEK_AGO,End=$TODAY \
  --granularity DAILY \
  --metrics BlendedCost \
  --group-by Type=DIMENSION,Key=SERVICE \
  --query 'ResultsByTime[].Groups[?Metrics.BlendedCost.Amount>`1.00`].[Keys[0],Metrics.BlendedCost.Amount]' \
  --output table | head -20

# Cost by resource
echo ""
echo "🏷️ Top resources by cost (last 7 days):"
aws ce get-cost-and-usage \
  --time-period Start=$WEEK_AGO,End=$TODAY \
  --granularity DAILY \
  --metrics BlendedCost \
  --group-by Type=DIMENSION,Key=RESOURCE_ID \
  --query 'ResultsByTime[].Groups[?Metrics.BlendedCost.Amount>`5.00`].[Keys[0],Metrics.BlendedCost.Amount]' \
  --output table | head -10

GCP Cost Monitoring

Daily Cost Check

#!/bin/bash
# gcp-daily-cost-check.sh

echo "πŸ’° GCP Daily Cost Check"
echo "======================"

PROJECT_ID=$(gcloud config get-value project)
YESTERDAY=$(date -d "yesterday" +%Y-%m-%d)

echo "πŸ“Š Costs for $YESTERDAY (Project: $PROJECT_ID):"

# Get billing data (requires billing export to BigQuery)
bq query --use_legacy_sql=false \
"SELECT
  service.description as service,
  ROUND(SUM(cost), 2) as cost_usd
FROM \`$PROJECT_ID.billing.gcp_billing_export_v1_*\`
WHERE DATE(usage_start_time) = '$YESTERDAY'
  AND cost > 0.01
GROUP BY service
ORDER BY cost_usd DESC
LIMIT 10"

Weekly Cost Analysis

#!/bin/bash
# gcp-weekly-cost-analysis.sh

echo "πŸ“Š GCP Weekly Cost Analysis"
echo "=========================="

PROJECT_ID=$(gcloud config get-value project)
WEEK_AGO=$(date -d "7 days ago" +%Y-%m-%d)
TODAY=$(date +%Y-%m-%d)

echo "πŸ’Έ Top services by cost (last 7 days):"

bq query --use_legacy_sql=false \
"SELECT
  service.description as service,
  ROUND(SUM(cost), 2) as cost_usd,
  COUNT(*) as usage_records
FROM \`$PROJECT_ID.billing.gcp_billing_export_v1_*\`
WHERE DATE(usage_start_time) BETWEEN '$WEEK_AGO' AND '$TODAY'
  AND cost > 0
GROUP BY service
ORDER BY cost_usd DESC
LIMIT 15"

πŸ“Š Budget Management

AWS Budget Setup

#!/bin/bash
# setup-aws-budgets.sh

echo "πŸ’° Setting up AWS Budgets"
echo "========================"

# Monthly budget for development environment
cat > dev-budget.json << EOF
{
  "BudgetName": "app-factory-development-monthly",
  "BudgetLimit": {
    "Amount": "200",
    "Unit": "USD"
  },
  "TimeUnit": "MONTHLY",
  "TimePeriod": {
    "Start": "$(date +%Y-%m-01)",
    "End": "2030-12-31"
  },
  "CostFilters": {
    "TagKey": ["Environment"],
    "TagValue": ["development"]
  },
  "BudgetType": "COST"
}
EOF

# Create budget
aws budgets create-budget \
  --account-id $(aws sts get-caller-identity --query Account --output text) \
  --budget file://dev-budget.json

# Production budget
cat > prod-budget.json << EOF
{
  "BudgetName": "app-factory-production-monthly",
  "BudgetLimit": {
    "Amount": "1000",
    "Unit": "USD"
  },
  "TimeUnit": "MONTHLY",
  "TimePeriod": {
    "Start": "$(date +%Y-%m-01)",
    "End": "2030-12-31"
  },
  "CostFilters": {
    "TagKey": ["Environment"],
    "TagValue": ["production"]
  },
  "BudgetType": "COST"
}
EOF

aws budgets create-budget \
  --account-id $(aws sts get-caller-identity --query Account --output text) \
  --budget file://prod-budget.json

echo "βœ… AWS budgets created successfully"

GCP Budget Setup

#!/bin/bash
# setup-gcp-budgets.sh

echo "πŸ’° Setting up GCP Budgets"
echo "========================"

PROJECT_ID=$(gcloud config get-value project)
BILLING_ACCOUNT=$(gcloud billing projects describe $PROJECT_ID --format="value(billingAccountName)" | cut -d'/' -f2)

# Development environment budget
gcloud billing budgets create \
  --billing-account=$BILLING_ACCOUNT \
  --display-name="App Factory Development Monthly" \
  --budget-amount=200USD \
  --threshold-rule=percent=50,basis=current-spend \
  --threshold-rule=percent=90,basis=current-spend \
  --threshold-rule=percent=100,basis=current-spend \
  --filter-projects=$PROJECT_ID \
  --filter-labels=environment=development

# Production environment budget
gcloud billing budgets create \
  --billing-account=$BILLING_ACCOUNT \
  --display-name="App Factory Production Monthly" \
  --budget-amount=1000USD \
  --threshold-rule=percent=50,basis=current-spend \
  --threshold-rule=percent=90,basis=current-spend \
  --threshold-rule=percent=100,basis=current-spend \
  --filter-projects=$PROJECT_ID \
  --filter-labels=environment=production

echo "βœ… GCP budgets created successfully"

πŸ”§ Cost Optimization

AWS Cost Optimization

RDS Optimization

#!/bin/bash
# optimize-aws-rds.sh

echo "πŸ—„οΈ AWS RDS Cost Optimization"
echo "============================"

# Find underutilized RDS instances
echo "πŸ“Š Checking RDS utilization..."

aws rds describe-db-instances \
  --query 'DBInstances[?DBInstanceStatus==`available`].[DBInstanceIdentifier,DBInstanceClass,Engine,AllocatedStorage]' \
  --output table

# Get CloudWatch metrics for CPU utilization
for instance in $(aws rds describe-db-instances --query 'DBInstances[?DBInstanceStatus==`available`].DBInstanceIdentifier' --output text); do
    echo "πŸ“ˆ CPU utilization for $instance (last 7 days):"
    
    aws cloudwatch get-metric-statistics \
      --namespace AWS/RDS \
      --metric-name CPUUtilization \
      --dimensions Name=DBInstanceIdentifier,Value=$instance \
      --start-time $(date -d "7 days ago" --iso-8601) \
      --end-time $(date --iso-8601) \
      --period 86400 \
      --statistics Average \
      --query 'Datapoints[].Average' \
      --output text | awk '{sum+=$1; count++} END {if(count>0) printf "Average: %.2f%%\n", sum/count}'
done

S3 Optimization

#!/bin/bash
# optimize-aws-s3.sh

echo "πŸͺ£ AWS S3 Cost Optimization"
echo "=========================="

# Find large S3 buckets
echo "πŸ“Š S3 bucket sizes:"
aws s3 ls | while read -r line; do
    bucket=$(echo $line | awk '{print $3}')
    if [ ! -z "$bucket" ]; then
        size=$(aws s3 ls s3://$bucket --recursive --summarize | grep "Total Size" | awk '{print $3}')
        if [ ! -z "$size" ] && [ "$size" -gt 1000000000 ]; then  # > 1GB
            echo "$bucket: $(echo $size | awk '{printf "%.2f GB", $1/1024/1024/1024}')"
        fi
    fi
done

# Check lifecycle policies
echo ""
echo "πŸ”„ Checking lifecycle policies:"
aws s3api list-buckets --query 'Buckets[].Name' --output text | while read bucket; do
    lifecycle=$(aws s3api get-bucket-lifecycle-configuration --bucket $bucket 2>/dev/null)
    if [ $? -eq 0 ]; then
        echo "βœ… $bucket has lifecycle policy"
    else
        echo "❌ $bucket missing lifecycle policy"
    fi
done

GCP Cost Optimization

Cloud SQL Optimization

#!/bin/bash
# optimize-gcp-cloudsql.sh

echo "πŸ—„οΈ GCP Cloud SQL Cost Optimization"
echo "=================================="

# List Cloud SQL instances
echo "πŸ“Š Cloud SQL instances:"
gcloud sql instances list --format="table(name,tier,region,status)"

# Check CPU utilization
for instance in $(gcloud sql instances list --format="value(name)"); do
    echo ""
    echo "πŸ“ˆ Checking utilization for $instance..."
    
    # Get recent CPU metrics (requires monitoring API)
    gcloud logging read "resource.type=cloud_sql_database AND resource.labels.database_id=$instance" \
      --limit=10 \
      --format="value(timestamp,jsonPayload.message)" 2>/dev/null || echo "No recent logs found"
done

Cloud Storage Optimization

#!/bin/bash
# optimize-gcp-storage.sh

echo "πŸͺ£ GCP Cloud Storage Cost Optimization"
echo "====================================="

# List storage buckets with sizes
echo "πŸ“Š Storage bucket analysis:"
gsutil ls -L -b gs://* | grep -E "(gs://|Storage class|Total size)" | \
  awk '/gs:\/\// {bucket=$0} /Storage class/ {class=$3} /Total size/ {size=$3; print bucket, class, size}'

# Check lifecycle policies
echo ""
echo "πŸ”„ Checking lifecycle policies:"
gsutil ls | while read bucket; do
    lifecycle=$(gsutil lifecycle get $bucket 2>/dev/null)
    if [ $? -eq 0 ]; then
        echo "βœ… $bucket has lifecycle policy"
    else
        echo "❌ $bucket missing lifecycle policy"
    fi
done

🚨 Emergency Cost Controls

AWS Emergency Shutdown

#!/bin/bash
# aws-emergency-shutdown.sh

echo "🚨 AWS Emergency Cost Control"
echo "============================"

read -p "⚠️ This will shut down non-essential resources. Continue? (y/N) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
    echo "❌ Emergency shutdown cancelled"
    exit 1
fi

# Stop non-production RDS instances
echo "πŸ›‘ Stopping development RDS instances..."
aws rds describe-db-instances \
  --query 'DBInstances[?contains(Tags[?Key==`Environment`].Value, `development`) && DBInstanceStatus==`available`].DBInstanceIdentifier' \
  --output text | while read instance; do
    if [ ! -z "$instance" ]; then
        echo "Stopping $instance..."
        aws rds stop-db-instance --db-instance-identifier $instance
    fi
done

# Stop non-production EC2 instances
echo "πŸ›‘ Stopping development EC2 instances..."
aws ec2 describe-instances \
  --filters "Name=tag:Environment,Values=development" "Name=instance-state-name,Values=running" \
  --query 'Reservations[].Instances[].InstanceId' \
  --output text | while read instance; do
    if [ ! -z "$instance" ]; then
        echo "Stopping $instance..."
        aws ec2 stop-instances --instance-ids $instance
    fi
done

echo "βœ… Emergency shutdown complete"

GCP Emergency Shutdown

#!/bin/bash
# gcp-emergency-shutdown.sh

echo "🚨 GCP Emergency Cost Control"
echo "============================"

read -p "⚠️ This will shut down non-essential resources. Continue? (y/N) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
    echo "❌ Emergency shutdown cancelled"
    exit 1
fi

# Stop development Cloud SQL instances
echo "πŸ›‘ Stopping development Cloud SQL instances..."
gcloud sql instances list --filter="labels.environment=development" --format="value(name)" | while read instance; do
    if [ ! -z "$instance" ]; then
        echo "Stopping $instance..."
        gcloud sql instances patch $instance --activation-policy=NEVER
    fi
done

# Stop development Compute Engine instances
echo "πŸ›‘ Stopping development Compute Engine instances..."
gcloud compute instances list --filter="labels.environment=development AND status=RUNNING" --format="value(name,zone)" | while read instance zone; do
    if [ ! -z "$instance" ]; then
        echo "Stopping $instance in $zone..."
        gcloud compute instances stop $instance --zone=$zone
    fi
done

echo "βœ… Emergency shutdown complete"

πŸ“ˆ Cost Reporting

Monthly Cost Report

#!/bin/bash
# monthly-cost-report.sh

echo "πŸ“Š Monthly Infrastructure Cost Report"
echo "===================================="

MONTH=$(date +%Y-%m)
LAST_MONTH=$(date -d "last month" +%Y-%m)

echo "Report for: $MONTH"
echo "Comparison with: $LAST_MONTH"
echo ""

if [ "$PROVIDER" = "aws" ]; then
    # AWS monthly report
    echo "πŸ’° AWS Costs:"
    
    # Current month
    CURRENT_COST=$(aws ce get-cost-and-usage \
      --time-period Start=${MONTH}-01,End=$(date +%Y-%m-%d) \
      --granularity MONTHLY \
      --metrics BlendedCost \
      --query 'ResultsByTime[0].Total.BlendedCost.Amount' \
      --output text)
    
    echo "Current month (${MONTH}): \$${CURRENT_COST}"
    
    # Previous month
    LAST_MONTH_COST=$(aws ce get-cost-and-usage \
      --time-period Start=${LAST_MONTH}-01,End=${MONTH}-01 \
      --granularity MONTHLY \
      --metrics BlendedCost \
      --query 'ResultsByTime[0].Total.BlendedCost.Amount' \
      --output text)
    
    echo "Previous month (${LAST_MONTH}): \$${LAST_MONTH_COST}"
    
    # Calculate change
    if [ ! -z "$CURRENT_COST" ] && [ ! -z "$LAST_MONTH_COST" ]; then
        CHANGE=$(echo "$CURRENT_COST $LAST_MONTH_COST" | awk '{printf "%.2f", ($1-$2)/$2*100}')
        echo "Change: ${CHANGE}%"
    fi

elif [ "$PROVIDER" = "gcp" ]; then
    # GCP monthly report
    echo "πŸ’° GCP Costs:"
    
    PROJECT_ID=$(gcloud config get-value project)
    
    # Current month costs (requires BigQuery billing export)
    bq query --use_legacy_sql=false \
    "SELECT
      ROUND(SUM(cost), 2) as current_month_cost
    FROM \`$PROJECT_ID.billing.gcp_billing_export_v1_*\`
    WHERE DATE(usage_start_time) >= '${MONTH}-01'
      AND DATE(usage_start_time) < DATE_ADD('${MONTH}-01', INTERVAL 1 MONTH)"
fi

echo ""
echo "πŸ“‹ Cost Breakdown by Environment:"

# Environment-specific costs would go here
echo "Development: [Calculate from tags/labels]"
echo "Production: [Calculate from tags/labels]"

Cost Optimization Recommendations

#!/bin/bash
# cost-optimization-recommendations.sh

echo "πŸ’‘ Cost Optimization Recommendations"
echo "===================================="

if [ "$PROVIDER" = "aws" ]; then
    echo "πŸ” AWS Recommendations:"
    
    # Get AWS Cost Explorer recommendations
    aws ce get-rightsizing-recommendation \
      --service EC2-Instance \
      --query 'RightsizingRecommendations[?RightsizingType==`Terminate`].[ResourceId,CurrentInstance.InstanceType,RightsizingType]' \
      --output table
    
    # Reserved Instance recommendations
    aws ce get-reservation-purchase-recommendation \
      --service EC2-Instance \
      --query 'Recommendations[].RecommendationDetails.[InstanceDetails.EC2InstanceDetails.InstanceType,RecommendedNumberOfInstancesToPurchase]' \
      --output table

elif [ "$PROVIDER" = "gcp" ]; then
    echo "πŸ” GCP Recommendations:"
    
    # Get GCP recommender suggestions
    gcloud recommender recommendations list \
      --project=$(gcloud config get-value project) \
      --recommender=google.compute.instance.MachineTypeRecommender \
      --location=us-central1 \
      --format="table(name,description,primaryImpact.costProjection.cost.units)"
fi

echo ""
echo "πŸ“Š General Recommendations:"
echo "1. Review unused resources monthly"
echo "2. Implement auto-scaling policies"
echo "3. Use spot/preemptible instances for non-critical workloads"
echo "4. Set up lifecycle policies for storage"
echo "5. Monitor and optimize database performance"

Last updated: $(date) Version: 1.0.0