QAUP_Management/deploy/docker/zero-downtime-update.sh

501 lines
14 KiB
Bash
Executable File

#!/bin/bash
# QAUP 零停机更新脚本
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_message() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# 获取时间戳
get_timestamp() {
date '+%Y-%m-%d %H:%M:%S'
}
# 记录更新日志
log_update() {
local message=$1
local timestamp=$(get_timestamp)
echo "[$timestamp] $message" >> "./logs/update.log"
print_message $BLUE "[$timestamp] $message"
}
# 检查服务健康状态
check_service_health() {
local service=$1
local max_attempts=${2:-30}
local attempt=0
while [ $attempt -lt $max_attempts ]; do
case $service in
"app")
if curl -f -s http://localhost:8080/actuator/health &>/dev/null; then
return 0
fi
;;
"nginx")
if curl -f -s http://localhost/health &>/dev/null; then
return 0
fi
;;
"postgres")
if docker exec qaup-postgres pg_isready -h localhost -p 5432 -U postgres &>/dev/null; then
return 0
fi
;;
"redis")
if docker exec qaup-redis redis-cli ping &>/dev/null; then
return 0
fi
;;
esac
sleep 2
attempt=$((attempt + 1))
done
return 1
}
# 等待服务就绪
wait_for_service() {
local service=$1
local timeout=${2:-60}
log_update "等待 $service 服务就绪..."
if check_service_health "$service" $((timeout / 2)); then
log_update "$service 服务已就绪"
return 0
else
log_update "$service 服务启动超时"
return 1
fi
}
# 创建服务备份
create_service_backup() {
local service=$1
local backup_name="${service}_backup_$(date +%Y%m%d_%H%M%S)"
log_update "创建 $service 服务备份: $backup_name"
# 导出当前运行的容器为镜像
if docker ps --format "{{.Names}}" | grep -q "qaup-$service"; then
docker commit "qaup-$service" "$backup_name"
log_update "$service 服务备份已创建: $backup_name"
echo "$backup_name" > "./logs/${service}_backup.txt"
return 0
else
log_update "警告: $service 容器未运行,跳过备份"
return 1
fi
}
# 恢复服务备份
restore_service_backup() {
local service=$1
local backup_file="./logs/${service}_backup.txt"
if [ -f "$backup_file" ]; then
local backup_name=$(cat "$backup_file")
log_update "恢复 $service 服务备份: $backup_name"
# 停止当前容器
docker stop "qaup-$service" || true
docker rm "qaup-$service" || true
# 从备份镜像启动容器
docker run -d --name "qaup-$service" \
--network qaup_qaup-network \
"$backup_name"
if wait_for_service "$service"; then
log_update "$service 服务备份恢复成功"
return 0
else
log_update "$service 服务备份恢复失败"
return 1
fi
else
log_update "错误: 未找到 $service 服务备份信息"
return 1
fi
}
# 滚动更新应用服务
rolling_update_app() {
log_update "开始应用服务滚动更新..."
# 1. 创建备份
create_service_backup "app"
# 2. 构建新镜像
log_update "构建新的应用镜像..."
cd "$PROJECT_ROOT"
docker build -t qaup-app:new .
# 3. 启动新容器(临时端口)
log_update "启动新应用容器..."
docker run -d --name qaup-app-new \
--network qaup_qaup-network \
-p 8081:8080 \
--env-file .env \
-v qaup_app_logs:/app/logs \
-v qaup_app_uploads:/app/uploadPath \
qaup-app:new
# 4. 等待新容器就绪
if ! check_service_health "app" 30; then
log_update "新应用容器启动失败,回滚..."
docker stop qaup-app-new || true
docker rm qaup-app-new || true
return 1
fi
# 5. 更新 Nginx 配置指向新容器
log_update "更新负载均衡配置..."
docker exec qaup-nginx sh -c "
sed -i 's/qaup-app:8080/qaup-app-new:8080/g' /etc/nginx/conf.d/default.conf
nginx -s reload
"
# 6. 验证新服务
sleep 10
if curl -f -s http://localhost/prod-api/actuator/health &>/dev/null; then
log_update "新应用服务验证成功"
# 7. 停止旧容器
log_update "停止旧应用容器..."
docker stop qaup-app || true
docker rm qaup-app || true
# 8. 重命名新容器
docker rename qaup-app-new qaup-app
# 9. 更新镜像标签
docker tag qaup-app:new qaup-app:latest
docker rmi qaup-app:new || true
log_update "应用服务滚动更新完成"
return 0
else
log_update "新应用服务验证失败,回滚..."
# 回滚 Nginx 配置
docker exec qaup-nginx sh -c "
sed -i 's/qaup-app-new:8080/qaup-app:8080/g' /etc/nginx/conf.d/default.conf
nginx -s reload
"
# 清理新容器
docker stop qaup-app-new || true
docker rm qaup-app-new || true
return 1
fi
}
# 蓝绿部署
blue_green_deployment() {
log_update "开始蓝绿部署..."
local current_env="blue"
local new_env="green"
# 检查当前环境
if docker ps --format "{{.Names}}" | grep -q "qaup-app-green"; then
current_env="green"
new_env="blue"
fi
log_update "当前环境: $current_env, 目标环境: $new_env"
# 1. 构建新镜像
log_update "构建新镜像..."
cd "$PROJECT_ROOT"
docker build -t "qaup-app:$new_env" .
# 2. 启动新环境
log_update "启动 $new_env 环境..."
docker run -d --name "qaup-app-$new_env" \
--network qaup_qaup-network \
--env-file .env \
-v qaup_app_logs:/app/logs \
-v qaup_app_uploads:/app/uploadPath \
"qaup-app:$new_env"
# 3. 等待新环境就绪
if ! wait_for_service "app"; then
log_update "$new_env 环境启动失败"
docker stop "qaup-app-$new_env" || true
docker rm "qaup-app-$new_env" || true
return 1
fi
# 4. 切换流量
log_update "切换流量到 $new_env 环境..."
docker exec qaup-nginx sh -c "
sed -i 's/qaup-app-$current_env:8080/qaup-app-$new_env:8080/g' /etc/nginx/conf.d/default.conf
nginx -s reload
"
# 5. 验证新环境
sleep 10
if curl -f -s http://localhost/prod-api/actuator/health &>/dev/null; then
log_update "$new_env 环境验证成功"
# 6. 停止旧环境
log_update "停止 $current_env 环境..."
docker stop "qaup-app-$current_env" || true
docker rm "qaup-app-$current_env" || true
# 7. 重命名新环境容器
docker rename "qaup-app-$new_env" qaup-app
log_update "蓝绿部署完成"
return 0
else
log_update "$new_env 环境验证失败,回滚..."
# 回滚流量
docker exec qaup-nginx sh -c "
sed -i 's/qaup-app-$new_env:8080/qaup-app-$current_env:8080/g' /etc/nginx/conf.d/default.conf
nginx -s reload
"
# 清理失败的环境
docker stop "qaup-app-$new_env" || true
docker rm "qaup-app-$new_env" || true
return 1
fi
}
# 数据库在线迁移
database_online_migration() {
log_update "开始数据库在线迁移..."
# 1. 创建数据库备份
log_update "创建数据库备份..."
docker exec qaup-postgres /backup-db.sh
# 2. 检查迁移脚本
local migration_dir="$PROJECT_ROOT/sql/migrations"
if [ ! -d "$migration_dir" ]; then
log_update "未找到数据库迁移脚本目录: $migration_dir"
return 0
fi
# 3. 执行迁移脚本
for migration_file in "$migration_dir"/*.sql; do
if [ -f "$migration_file" ]; then
local filename=$(basename "$migration_file")
log_update "执行迁移脚本: $filename"
if docker exec qaup-postgres psql -U postgres -d qaup -f "/migrations/$filename"; then
log_update "迁移脚本 $filename 执行成功"
else
log_update "迁移脚本 $filename 执行失败,停止迁移"
return 1
fi
fi
done
log_update "数据库在线迁移完成"
return 0
}
# 健康检查和验证
post_update_verification() {
log_update "开始更新后验证..."
local checks_passed=0
local total_checks=5
# 1. 检查所有容器状态
log_update "检查容器状态..."
if docker ps --format "{{.Names}}" | grep -q "qaup-"; then
checks_passed=$((checks_passed + 1))
log_update "✓ 容器状态检查通过"
else
log_update "✗ 容器状态检查失败"
fi
# 2. 检查应用健康状态
log_update "检查应用健康状态..."
if check_service_health "app"; then
checks_passed=$((checks_passed + 1))
log_update "✓ 应用健康检查通过"
else
log_update "✗ 应用健康检查失败"
fi
# 3. 检查前端访问
log_update "检查前端访问..."
if curl -f -s http://localhost/health &>/dev/null; then
checks_passed=$((checks_passed + 1))
log_update "✓ 前端访问检查通过"
else
log_update "✗ 前端访问检查失败"
fi
# 4. 检查数据库连接
log_update "检查数据库连接..."
if check_service_health "postgres"; then
checks_passed=$((checks_passed + 1))
log_update "✓ 数据库连接检查通过"
else
log_update "✗ 数据库连接检查失败"
fi
# 5. 检查 Redis 连接
log_update "检查 Redis 连接..."
if check_service_health "redis"; then
checks_passed=$((checks_passed + 1))
log_update "✓ Redis 连接检查通过"
else
log_update "✗ Redis 连接检查失败"
fi
# 验证结果
log_update "验证完成: $checks_passed/$total_checks 项检查通过"
if [ $checks_passed -eq $total_checks ]; then
log_update "✓ 所有验证检查通过,更新成功"
return 0
else
log_update "✗ 部分验证检查失败,建议检查系统状态"
return 1
fi
}
# 完整的零停机更新流程
full_zero_downtime_update() {
log_update "========================================="
log_update "开始零停机更新流程"
log_update "========================================="
# 1. 预检查
log_update "执行更新前检查..."
if ! post_update_verification; then
log_update "更新前检查失败,取消更新"
return 1
fi
# 2. 数据库迁移
if ! database_online_migration; then
log_update "数据库迁移失败,取消更新"
return 1
fi
# 3. 应用更新
if ! rolling_update_app; then
log_update "应用更新失败,尝试回滚..."
restore_service_backup "app"
return 1
fi
# 4. 更新后验证
if ! post_update_verification; then
log_update "更新后验证失败,建议检查系统状态"
return 1
fi
log_update "========================================="
log_update "零停机更新完成"
log_update "========================================="
return 0
}
# 显示帮助信息
show_help() {
echo "QAUP 零停机更新脚本"
echo ""
echo "用法: $0 [命令]"
echo ""
echo "命令:"
echo " full 完整的零停机更新流程"
echo " rolling 滚动更新应用服务"
echo " blue-green 蓝绿部署"
echo " db-migration 数据库在线迁移"
echo " verify 更新后验证"
echo " backup 创建服务备份"
echo " restore 恢复服务备份"
echo ""
echo "示例:"
echo " $0 full # 执行完整更新流程"
echo " $0 rolling # 仅滚动更新应用"
echo " $0 backup app # 备份应用服务"
echo " $0 restore app # 恢复应用服务"
}
# 主函数
main() {
# 确保日志目录存在
mkdir -p "./logs"
if [ $# -eq 0 ]; then
show_help
exit 0
fi
local command=$1
shift
case $command in
full)
full_zero_downtime_update
;;
rolling)
rolling_update_app
;;
blue-green)
blue_green_deployment
;;
db-migration)
database_online_migration
;;
verify)
post_update_verification
;;
backup)
if [ $# -eq 0 ]; then
print_message $RED "请指定要备份的服务名称"
exit 1
fi
create_service_backup "$1"
;;
restore)
if [ $# -eq 0 ]; then
print_message $RED "请指定要恢复的服务名称"
exit 1
fi
restore_service_backup "$1"
;;
help|--help|-h)
show_help
;;
*)
print_message $RED "未知命令: $command"
show_help
exit 1
;;
esac
}
main "$@"