#!/bin/bash # QAUP 零停机更新脚本 set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' print_message() { local color=$1 local message=$2 echo -e "${color}${message}${NC}" } # 获取时间戳 get_timestamp() { date '+%Y-%m-%d %H:%M:%S' } # 记录更新日志 log_update() { local message=$1 local timestamp=$(get_timestamp) echo "[$timestamp] $message" >> "./logs/update.log" print_message $BLUE "[$timestamp] $message" } # 检查服务健康状态 check_service_health() { local service=$1 local max_attempts=${2:-30} local attempt=0 while [ $attempt -lt $max_attempts ]; do case $service in "app") if curl -f -s http://localhost:8080/actuator/health &>/dev/null; then return 0 fi ;; "nginx") if curl -f -s http://localhost/health &>/dev/null; then return 0 fi ;; "postgres") if docker exec qaup-postgres pg_isready -h localhost -p 5432 -U postgres &>/dev/null; then return 0 fi ;; "redis") if docker exec qaup-redis redis-cli ping &>/dev/null; then return 0 fi ;; esac sleep 2 attempt=$((attempt + 1)) done return 1 } # 等待服务就绪 wait_for_service() { local service=$1 local timeout=${2:-60} log_update "等待 $service 服务就绪..." if check_service_health "$service" $((timeout / 2)); then log_update "$service 服务已就绪" return 0 else log_update "$service 服务启动超时" return 1 fi } # 创建服务备份 create_service_backup() { local service=$1 local backup_name="${service}_backup_$(date +%Y%m%d_%H%M%S)" log_update "创建 $service 服务备份: $backup_name" # 导出当前运行的容器为镜像 if docker ps --format "{{.Names}}" | grep -q "qaup-$service"; then docker commit "qaup-$service" "$backup_name" log_update "$service 服务备份已创建: $backup_name" echo "$backup_name" > "./logs/${service}_backup.txt" return 0 else log_update "警告: $service 容器未运行,跳过备份" return 1 fi } # 恢复服务备份 restore_service_backup() { local service=$1 local backup_file="./logs/${service}_backup.txt" if [ -f "$backup_file" ]; then local backup_name=$(cat "$backup_file") log_update "恢复 $service 服务备份: $backup_name" # 停止当前容器 docker stop "qaup-$service" || true docker rm "qaup-$service" || true # 从备份镜像启动容器 docker run -d --name "qaup-$service" \ --network qaup_qaup-network \ "$backup_name" if wait_for_service "$service"; then log_update "$service 服务备份恢复成功" return 0 else log_update "$service 服务备份恢复失败" return 1 fi else log_update "错误: 未找到 $service 服务备份信息" return 1 fi } # 滚动更新应用服务 rolling_update_app() { log_update "开始应用服务滚动更新..." # 1. 创建备份 create_service_backup "app" # 2. 构建新镜像 log_update "构建新的应用镜像..." cd "$PROJECT_ROOT" docker build -t qaup-app:new . # 3. 启动新容器(临时端口) log_update "启动新应用容器..." docker run -d --name qaup-app-new \ --network qaup_qaup-network \ -p 8081:8080 \ --env-file .env \ -v qaup_app_logs:/app/logs \ -v qaup_app_uploads:/app/uploadPath \ qaup-app:new # 4. 等待新容器就绪 if ! check_service_health "app" 30; then log_update "新应用容器启动失败,回滚..." docker stop qaup-app-new || true docker rm qaup-app-new || true return 1 fi # 5. 更新 Nginx 配置指向新容器 log_update "更新负载均衡配置..." docker exec qaup-nginx sh -c " sed -i 's/qaup-app:8080/qaup-app-new:8080/g' /etc/nginx/conf.d/default.conf nginx -s reload " # 6. 验证新服务 sleep 10 if curl -f -s http://localhost/prod-api/actuator/health &>/dev/null; then log_update "新应用服务验证成功" # 7. 停止旧容器 log_update "停止旧应用容器..." docker stop qaup-app || true docker rm qaup-app || true # 8. 重命名新容器 docker rename qaup-app-new qaup-app # 9. 更新镜像标签 docker tag qaup-app:new qaup-app:latest docker rmi qaup-app:new || true log_update "应用服务滚动更新完成" return 0 else log_update "新应用服务验证失败,回滚..." # 回滚 Nginx 配置 docker exec qaup-nginx sh -c " sed -i 's/qaup-app-new:8080/qaup-app:8080/g' /etc/nginx/conf.d/default.conf nginx -s reload " # 清理新容器 docker stop qaup-app-new || true docker rm qaup-app-new || true return 1 fi } # 蓝绿部署 blue_green_deployment() { log_update "开始蓝绿部署..." local current_env="blue" local new_env="green" # 检查当前环境 if docker ps --format "{{.Names}}" | grep -q "qaup-app-green"; then current_env="green" new_env="blue" fi log_update "当前环境: $current_env, 目标环境: $new_env" # 1. 构建新镜像 log_update "构建新镜像..." cd "$PROJECT_ROOT" docker build -t "qaup-app:$new_env" . # 2. 启动新环境 log_update "启动 $new_env 环境..." docker run -d --name "qaup-app-$new_env" \ --network qaup_qaup-network \ --env-file .env \ -v qaup_app_logs:/app/logs \ -v qaup_app_uploads:/app/uploadPath \ "qaup-app:$new_env" # 3. 等待新环境就绪 if ! wait_for_service "app"; then log_update "$new_env 环境启动失败" docker stop "qaup-app-$new_env" || true docker rm "qaup-app-$new_env" || true return 1 fi # 4. 切换流量 log_update "切换流量到 $new_env 环境..." docker exec qaup-nginx sh -c " sed -i 's/qaup-app-$current_env:8080/qaup-app-$new_env:8080/g' /etc/nginx/conf.d/default.conf nginx -s reload " # 5. 验证新环境 sleep 10 if curl -f -s http://localhost/prod-api/actuator/health &>/dev/null; then log_update "$new_env 环境验证成功" # 6. 停止旧环境 log_update "停止 $current_env 环境..." docker stop "qaup-app-$current_env" || true docker rm "qaup-app-$current_env" || true # 7. 重命名新环境容器 docker rename "qaup-app-$new_env" qaup-app log_update "蓝绿部署完成" return 0 else log_update "$new_env 环境验证失败,回滚..." # 回滚流量 docker exec qaup-nginx sh -c " sed -i 's/qaup-app-$new_env:8080/qaup-app-$current_env:8080/g' /etc/nginx/conf.d/default.conf nginx -s reload " # 清理失败的环境 docker stop "qaup-app-$new_env" || true docker rm "qaup-app-$new_env" || true return 1 fi } # 数据库在线迁移 database_online_migration() { log_update "开始数据库在线迁移..." # 1. 创建数据库备份 log_update "创建数据库备份..." docker exec qaup-postgres /backup-db.sh # 2. 检查迁移脚本 local migration_dir="$PROJECT_ROOT/sql/migrations" if [ ! -d "$migration_dir" ]; then log_update "未找到数据库迁移脚本目录: $migration_dir" return 0 fi # 3. 执行迁移脚本 for migration_file in "$migration_dir"/*.sql; do if [ -f "$migration_file" ]; then local filename=$(basename "$migration_file") log_update "执行迁移脚本: $filename" if docker exec qaup-postgres psql -U postgres -d qaup -f "/migrations/$filename"; then log_update "迁移脚本 $filename 执行成功" else log_update "迁移脚本 $filename 执行失败,停止迁移" return 1 fi fi done log_update "数据库在线迁移完成" return 0 } # 健康检查和验证 post_update_verification() { log_update "开始更新后验证..." local checks_passed=0 local total_checks=5 # 1. 检查所有容器状态 log_update "检查容器状态..." if docker ps --format "{{.Names}}" | grep -q "qaup-"; then checks_passed=$((checks_passed + 1)) log_update "✓ 容器状态检查通过" else log_update "✗ 容器状态检查失败" fi # 2. 检查应用健康状态 log_update "检查应用健康状态..." if check_service_health "app"; then checks_passed=$((checks_passed + 1)) log_update "✓ 应用健康检查通过" else log_update "✗ 应用健康检查失败" fi # 3. 检查前端访问 log_update "检查前端访问..." if curl -f -s http://localhost/health &>/dev/null; then checks_passed=$((checks_passed + 1)) log_update "✓ 前端访问检查通过" else log_update "✗ 前端访问检查失败" fi # 4. 检查数据库连接 log_update "检查数据库连接..." if check_service_health "postgres"; then checks_passed=$((checks_passed + 1)) log_update "✓ 数据库连接检查通过" else log_update "✗ 数据库连接检查失败" fi # 5. 检查 Redis 连接 log_update "检查 Redis 连接..." if check_service_health "redis"; then checks_passed=$((checks_passed + 1)) log_update "✓ Redis 连接检查通过" else log_update "✗ Redis 连接检查失败" fi # 验证结果 log_update "验证完成: $checks_passed/$total_checks 项检查通过" if [ $checks_passed -eq $total_checks ]; then log_update "✓ 所有验证检查通过,更新成功" return 0 else log_update "✗ 部分验证检查失败,建议检查系统状态" return 1 fi } # 完整的零停机更新流程 full_zero_downtime_update() { log_update "=========================================" log_update "开始零停机更新流程" log_update "=========================================" # 1. 预检查 log_update "执行更新前检查..." if ! post_update_verification; then log_update "更新前检查失败,取消更新" return 1 fi # 2. 数据库迁移 if ! database_online_migration; then log_update "数据库迁移失败,取消更新" return 1 fi # 3. 应用更新 if ! rolling_update_app; then log_update "应用更新失败,尝试回滚..." restore_service_backup "app" return 1 fi # 4. 更新后验证 if ! post_update_verification; then log_update "更新后验证失败,建议检查系统状态" return 1 fi log_update "=========================================" log_update "零停机更新完成" log_update "=========================================" return 0 } # 显示帮助信息 show_help() { echo "QAUP 零停机更新脚本" echo "" echo "用法: $0 [命令]" echo "" echo "命令:" echo " full 完整的零停机更新流程" echo " rolling 滚动更新应用服务" echo " blue-green 蓝绿部署" echo " db-migration 数据库在线迁移" echo " verify 更新后验证" echo " backup 创建服务备份" echo " restore 恢复服务备份" echo "" echo "示例:" echo " $0 full # 执行完整更新流程" echo " $0 rolling # 仅滚动更新应用" echo " $0 backup app # 备份应用服务" echo " $0 restore app # 恢复应用服务" } # 主函数 main() { # 确保日志目录存在 mkdir -p "./logs" if [ $# -eq 0 ]; then show_help exit 0 fi local command=$1 shift case $command in full) full_zero_downtime_update ;; rolling) rolling_update_app ;; blue-green) blue_green_deployment ;; db-migration) database_online_migration ;; verify) post_update_verification ;; backup) if [ $# -eq 0 ]; then print_message $RED "请指定要备份的服务名称" exit 1 fi create_service_backup "$1" ;; restore) if [ $# -eq 0 ]; then print_message $RED "请指定要恢复的服务名称" exit 1 fi restore_service_backup "$1" ;; help|--help|-h) show_help ;; *) print_message $RED "未知命令: $command" show_help exit 1 ;; esac } main "$@"