Add ROI-based shoe training workflow

2026-03-16 11:16:50 +08:00 · 2026-03-16 11:16:50 +08:00 · 22aee7fa1e
commit 22aee7fa1e
parent 45f1f3182f
8 changed files with 720 additions and 19 deletions
--- a/02_train.bat
+++ b/02_train.bat
@ -2,8 +2,34 @@
 chcp 65001 >nul
 cls
-:: 设置 Python 3.11 路径
+set "REPO_DIR=%~dp0"
-set "PATH=C:\Users\Tellme\AppData\Local\Programs\Python\Python311\Scripts;C:\Users\Tellme\AppData\Local\Programs\Python\Python311;%PATH%"
+pushd "%REPO_DIR%"
 set "YOLO_LAUNCHER="
 where yolo >nul 2>nul
 if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=yolo"
 if not defined YOLO_LAUNCHER (
    py -3.11 -c "import ultralytics" >nul 2>nul
    if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=py -3.11 -m ultralytics"
 )
 if not defined YOLO_LAUNCHER (
    python -c "import ultralytics" >nul 2>nul
    if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=python -m ultralytics"
 )
 if not defined YOLO_LAUNCHER (
    echo [错误] 未找到可用的 Ultralytics 启动方式
    echo.
    echo 请先确保满足以下任一条件:
    echo   1. yolo 命令已加入 PATH
    echo   2. py -3.11 可运行并已安装 ultralytics
    echo   3. python 可运行并已安装 ultralytics
    popd
    pause
    exit /b 1
 )
 echo ============================================================
 echo           训练鞋子检测模型 (YOLOv8 + 640x640)
@ -11,14 +37,15 @@ echo ============================================================
 echo.
 :: 设置数据集路径
-set DATASET=datasets/openimages-shoes-yolo/data.yaml
+set "DATASET=%REPO_DIR%datasets\openimages-shoes-yolo\data.yaml"
 :: 检查数据集是否存在
-if not exist %DATASET% (
+if not exist "%DATASET%" (
    echo [错误] 找不到数据集配置文件: %DATASET%
    echo.
    echo 请先下载数据集:
-    echo   python 01_download_dataset.py --source openimages --max-samples 5000
+    echo   py -3.11 "%REPO_DIR%01_download_dataset.py" --source openimages --max-samples 5000
    popd
    pause
    exit /b 1
 )
@ -69,11 +96,12 @@ echo                     开始训练
 echo ============================================================
 echo.
-yolo detect train data=%DATASET% model=%MODEL% epochs=%EPOCHS% imgsz=%IMGSZ% batch=%BATCH% device=0
+call %YOLO_LAUNCHER% detect train data="%DATASET%" model="%MODEL%" epochs=%EPOCHS% imgsz=%IMGSZ% batch=%BATCH% device=0
 if %ERRORLEVEL% neq 0 (
    echo.
    echo [错误] 训练失败！
    popd
    pause
    exit /b 1
 )
@ -87,4 +115,5 @@ echo 模型保存在: runs/detect/train/weights/best.pt
 echo.
 echo 下一步: 运行 03_export_onnx.bat 导出 ONNX
 echo.
 popd
 pause
--- a/03_export_onnx.bat
+++ b/03_export_onnx.bat
@ -2,19 +2,46 @@
 chcp 65001 >nul
 cls
-:: 设置 Python 3.11 路径
+set "REPO_DIR=%~dp0"
-set "PATH=C:\Users\Tellme\AppData\Local\Programs\Python\Python311\Scripts;C:\Users\Tellme\AppData\Local\Programs\Python\Python311;%PATH%"
+pushd "%REPO_DIR%"
 set "YOLO_LAUNCHER="
 where yolo >nul 2>nul
 if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=yolo"
 if not defined YOLO_LAUNCHER (
    py -3.11 -c "import ultralytics" >nul 2>nul
    if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=py -3.11 -m ultralytics"
 )
 if not defined YOLO_LAUNCHER (
    python -c "import ultralytics" >nul 2>nul
    if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=python -m ultralytics"
 )
 if not defined YOLO_LAUNCHER (
    echo [错误] 未找到可用的 Ultralytics 启动方式
    echo.
    echo 请先确保满足以下任一条件:
    echo   1. yolo 命令已加入 PATH
    echo   2. py -3.11 可运行并已安装 ultralytics
    echo   3. python 可运行并已安装 ultralytics
    popd
    pause
    exit /b 1
 )
 echo ============================================================
 echo         导出 ONNX 模型 (640x640)
 echo ============================================================
 echo.
-set MODEL_PATH=%USERPROFILE%\apps\ultralytics\runs\detect\train\weights\best.pt
+set "MODEL_PATH=%REPO_DIR%runs\detect\train\weights\best.pt"
-if not exist %MODEL_PATH% (
+if not exist "%MODEL_PATH%" (
    echo [错误] 找不到模型: %MODEL_PATH%
    echo 请先运行 02_train.bat 训练
    popd
    pause
    exit /b 1
 )
@ -22,17 +49,19 @@ if not exist %MODEL_PATH% (
 echo [信息] 输入模型: %MODEL_PATH%
 echo.
-yolo export model=%MODEL_PATH% format=onnx imgsz=640 opset=12 simplify
+call %YOLO_LAUNCHER% export model="%MODEL_PATH%" format=onnx imgsz=640 opset=12 simplify
 if %ERRORLEVEL% neq 0 (
    echo [错误] 导出失败！
    popd
    pause
    exit /b 1
 )
 echo.
-echo [成功] ONNX 模型: %USERPROFILE%\apps\ultralytics\runs\detect\train\weights\best.onnx
+echo [成功] ONNX 模型: %REPO_DIR%runs\detect\train\weights\best.onnx
 echo.
 echo 下一步: 在 Ubuntu 上运行 04_convert_rknn.py 转换
 echo.
 popd
 pause
--- a/06_finetune_ppe.bat
+++ b/06_finetune_ppe.bat
@ -1,26 +1,56 @@
@echo off
 setlocal
-set "PATH=C:\Users\Tellme\AppData\Local\Programs\Python\Python311\Scripts;C:\Users\Tellme\AppData\Local\Programs\Python\Python311;%PATH%"
+set "REPO_DIR=%~dp0"
 pushd "%REPO_DIR%"
 set "YOLO_LAUNCHER="
 where yolo >nul 2>nul
 if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=yolo"
 if not defined YOLO_LAUNCHER (
    py -3.11 -c "import ultralytics" >nul 2>nul
    if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=py -3.11 -m ultralytics"
 )
 if not defined YOLO_LAUNCHER (
    python -c "import ultralytics" >nul 2>nul
    if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=python -m ultralytics"
 )
 if not defined YOLO_LAUNCHER (
    echo [ERROR] No usable Ultralytics launcher was found
    echo.
    echo Please make sure one of the following works:
    echo   1. yolo in PATH
    echo   2. py -3.11 with ultralytics installed
    echo   3. python with ultralytics installed
    popd
    pause
    exit /b 1
 )
 echo ============================================================
 echo        Stage 2 Fine-tuning on PPE shoe subset
 echo ============================================================
 echo.
-set "DATASET=C:\Users\Tellme\apps\ppe-model-training\datasets\ppe-shoes\data.yaml"
+set "DATASET=%REPO_DIR%datasets\ppe-shoes\data.yaml"
-set "BASE_MODEL=C:\Users\Tellme\apps\ultralytics\runs\detect\train3\weights\best.pt"
+set "BASE_MODEL=%REPO_DIR%runs\detect\train\weights\best.pt"
 if not exist "%DATASET%" (
    echo [ERROR] PPE shoe subset not found: %DATASET%
    echo Run:
-    echo   C:\Users\Tellme\AppData\Local\Programs\Python\Python311\python.exe C:\Users\Tellme\apps\ppe-model-training\05_prepare_ppe_shoe_subset.py
+    echo   python "%REPO_DIR%05_prepare_ppe_shoe_subset.py"
    popd
    pause
    exit /b 1
 )
 if not exist "%BASE_MODEL%" (
    echo [ERROR] Base model not found: %BASE_MODEL%
    echo Run 02_train.bat first to create the base checkpoint.
    popd
    pause
    exit /b 1
 )
@ -45,11 +75,12 @@ echo                    Start fine-tuning
 echo ============================================================
 echo.
-yolo detect train data="%DATASET%" model="%BASE_MODEL%" epochs=%EPOCHS% imgsz=%IMGSZ% batch=%BATCH% device=0
+call %YOLO_LAUNCHER% detect train data="%DATASET%" model="%BASE_MODEL%" epochs=%EPOCHS% imgsz=%IMGSZ% batch=%BATCH% device=0
 if %ERRORLEVEL% neq 0 (
    echo.
    echo [ERROR] Fine-tuning failed.
    popd
    pause
    exit /b 1
 )
@ -59,4 +90,5 @@ echo ============================================================
 echo                    Fine-tuning complete
 echo ============================================================
 echo.
 popd
 pause
--- a/09_build_roi_shoe_dataset.py
+++ b/09_build_roi_shoe_dataset.py
@ -0,0 +1,456 @@
 #!/usr/bin/env python3
 """Build a foot-ROI shoe dataset from existing YOLO shoe datasets."""
 from __future__ import annotations
 import argparse
 import math
 import shutil
 from collections import defaultdict
 from dataclasses import dataclass
 from pathlib import Path
 from PIL import Image
 DEFAULT_SOURCES = [
    "datasets/openimages-shoes-yolo",
    "datasets/ppe-shoes",
 ]
 IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".bmp", ".webp")
 PAIR_MAX_X_GAP_FACTOR = 3.2
 PAIR_MAX_Y_GAP_FACTOR = 1.2
 PAIR_MIN_AREA_RATIO = 0.4
 PAIR_MAX_AREA_RATIO = 2.5
 SINGLE_AREA_RANGE = (0.15, 0.35)
 PAIR_AREA_RANGE = (0.25, 0.50)
@dataclass(frozen=True)
 class Box:
    x1: float
    y1: float
    x2: float
    y2: float
    @property
    def w(self) -> float:
        return max(0.0, self.x2 - self.x1)
    @property
    def h(self) -> float:
        return max(0.0, self.y2 - self.y1)
    @property
    def area(self) -> float:
        return self.w * self.h
    @property
    def cx(self) -> float:
        return (self.x1 + self.x2) / 2.0
    @property
    def cy(self) -> float:
        return (self.y1 + self.y2) / 2.0
    def clip(self, width: float, height: float) -> "Box | None":
        x1 = min(max(self.x1, 0.0), width)
        y1 = min(max(self.y1, 0.0), height)
        x2 = min(max(self.x2, 0.0), width)
        y2 = min(max(self.y2, 0.0), height)
        if x2 <= x1 or y2 <= y1:
            return None
        return Box(x1, y1, x2, y2)
@dataclass(frozen=True)
 class RoiSample:
    roi: Box
    members: tuple[int, ...]
    mode: str
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Build a foot-context ROI shoe dataset")
    parser.add_argument(
        "--sources",
        nargs="+",
        default=DEFAULT_SOURCES,
        help="Source YOLO datasets containing images/<split> and labels/<split>",
    )
    parser.add_argument(
        "--output",
        default="datasets/shoe-roi-mix",
        help="Output ROI dataset directory",
    )
    parser.add_argument(
        "--clean",
        action="store_true",
        help="Delete the output directory before rebuilding",
    )
    return parser.parse_args()
 def ensure_output_layout(output_dir: Path) -> None:
    for split in ("train", "val", "test"):
        (output_dir / "images" / split).mkdir(parents=True, exist_ok=True)
        (output_dir / "labels" / split).mkdir(parents=True, exist_ok=True)
 def find_image(image_dir: Path, stem: str) -> Path | None:
    for ext in IMAGE_EXTS:
        candidate = image_dir / f"{stem}{ext}"
        if candidate.exists():
            return candidate
    return None
 def load_boxes(label_path: Path, image_width: int, image_height: int) -> list[Box]:
    boxes: list[Box] = []
    for raw_line in label_path.read_text(encoding="utf-8").splitlines():
        line = raw_line.strip()
        if not line:
            continue
        parts = line.split()
        if len(parts) < 5:
            continue
        _, xc, yc, w, h = parts[:5]
        box_w = float(w) * image_width
        box_h = float(h) * image_height
        center_x = float(xc) * image_width
        center_y = float(yc) * image_height
        box = Box(
            center_x - box_w / 2.0,
            center_y - box_h / 2.0,
            center_x + box_w / 2.0,
            center_y + box_h / 2.0,
        ).clip(image_width, image_height)
        if box is not None and box.area > 1.0:
            boxes.append(box)
    return dedupe_boxes(boxes)
 def dedupe_boxes(boxes: list[Box], iou_threshold: float = 0.9) -> list[Box]:
    kept: list[Box] = []
    for box in sorted(boxes, key=lambda item: item.area, reverse=True):
        if any(iou(box, existing) >= iou_threshold for existing in kept):
            continue
        kept.append(box)
    return sorted(kept, key=lambda item: (item.cx, item.cy))
 def iou(a: Box, b: Box) -> float:
    inter_x1 = max(a.x1, b.x1)
    inter_y1 = max(a.y1, b.y1)
    inter_x2 = min(a.x2, b.x2)
    inter_y2 = min(a.y2, b.y2)
    inter_w = max(0.0, inter_x2 - inter_x1)
    inter_h = max(0.0, inter_y2 - inter_y1)
    inter_area = inter_w * inter_h
    if inter_area <= 0:
        return 0.0
    union = a.area + b.area - inter_area
    return inter_area / union if union > 0 else 0.0
 def should_pair(left: Box, right: Box) -> bool:
    width_ref = max(left.w, right.w)
    height_ref = max(left.h, right.h)
    if width_ref <= 0 or height_ref <= 0:
        return False
    dx = abs(left.cx - right.cx)
    dy = abs(left.cy - right.cy)
    area_ratio = left.area / right.area if right.area > 0 else math.inf
    return (
        dx <= width_ref * PAIR_MAX_X_GAP_FACTOR
        and dy <= height_ref * PAIR_MAX_Y_GAP_FACTOR
        and PAIR_MIN_AREA_RATIO <= area_ratio <= PAIR_MAX_AREA_RATIO
    )
 def greedy_group_boxes(boxes: list[Box]) -> list[tuple[int, ...]]:
    if len(boxes) < 2:
        return [(idx,) for idx in range(len(boxes))]
    candidates: list[tuple[float, int, int]] = []
    for i in range(len(boxes)):
        for j in range(i + 1, len(boxes)):
            if not should_pair(boxes[i], boxes[j]):
                continue
            dx = abs(boxes[i].cx - boxes[j].cx)
            dy = abs(boxes[i].cy - boxes[j].cy)
            score = dx + (0.5 * dy)
            candidates.append((score, i, j))
    used: set[int] = set()
    groups: list[tuple[int, ...]] = []
    for _, i, j in sorted(candidates, key=lambda item: item[0]):
        if i in used or j in used:
            continue
        used.add(i)
        used.add(j)
        groups.append((i, j))
    for idx in range(len(boxes)):
        if idx not in used:
            groups.append((idx,))
    return groups
 def expand_single(box: Box) -> Box:
    return Box(
        box.x1 - (0.6 * box.w),
        box.y1 - (0.5 * box.h),
        box.x1 - (0.6 * box.w) + (2.2 * box.w),
        box.y1 - (0.5 * box.h) + (2.4 * box.h),
    )
 def expand_pair(boxes: list[Box], group: tuple[int, int]) -> Box:
    first = boxes[group[0]]
    second = boxes[group[1]]
    union_x1 = min(first.x1, second.x1)
    union_y1 = min(first.y1, second.y1)
    union_x2 = max(first.x2, second.x2)
    union_y2 = max(first.y2, second.y2)
    union_w = union_x2 - union_x1
    union_h = union_y2 - union_y1
    roi_x = union_x1 - (0.35 * union_w)
    roi_y = union_y1 - (0.45 * union_h)
    return Box(
        roi_x,
        roi_y,
        roi_x + (1.7 * union_w),
        roi_y + (2.0 * union_h),
    )
 def clamp_roi(roi: Box, image_width: int, image_height: int) -> Box | None:
    clipped = roi.clip(float(image_width), float(image_height))
    if clipped is None:
        return None
    x1 = int(math.floor(clipped.x1))
    y1 = int(math.floor(clipped.y1))
    x2 = int(math.ceil(clipped.x2))
    y2 = int(math.ceil(clipped.y2))
    x1 = max(0, min(x1, image_width - 1))
    y1 = max(0, min(y1, image_height - 1))
    x2 = max(x1 + 1, min(x2, image_width))
    y2 = max(y1 + 1, min(y2, image_height))
    return Box(float(x1), float(y1), float(x2), float(y2))
 def resize_roi_to_ratio(
    roi: Box,
    image_width: int,
    image_height: int,
    object_area: float,
    min_ratio: float,
    max_ratio: float,
 ) -> Box | None:
    if object_area <= 0:
        return None
    adjusted = roi
    target_ratio = (min_ratio + max_ratio) / 2.0
    for _ in range(3):
        roi_area = adjusted.area
        if roi_area <= 0:
            return None
        ratio = object_area / roi_area
        if min_ratio <= ratio <= max_ratio:
            break
        scale = math.sqrt(ratio / target_ratio)
        if ratio < min_ratio:
            scale = max(0.6, min(0.95, scale))
        else:
            scale = min(1.8, max(1.05, scale))
        new_w = adjusted.w * scale
        new_h = adjusted.h * scale
        cx = adjusted.cx
        cy = adjusted.cy
        adjusted = Box(cx - new_w / 2.0, cy - new_h / 2.0, cx + new_w / 2.0, cy + new_h / 2.0)
        adjusted = clamp_roi(adjusted, image_width, image_height)
        if adjusted is None:
            return None
    return adjusted
 def boxes_in_roi(boxes: list[Box], roi: Box) -> list[Box]:
    included: list[Box] = []
    for box in boxes:
        if not (roi.x1 <= box.cx <= roi.x2 and roi.y1 <= box.cy <= roi.y2):
            continue
        clipped = Box(
            box.x1 - roi.x1,
            box.y1 - roi.y1,
            box.x2 - roi.x1,
            box.y2 - roi.y1,
        ).clip(roi.w, roi.h)
        if clipped is not None and clipped.area > 4.0:
            included.append(clipped)
    return included
 def make_roi_samples(boxes: list[Box], image_width: int, image_height: int) -> list[RoiSample]:
    samples: list[RoiSample] = []
    groups = greedy_group_boxes(boxes)
    for group in groups:
        if len(group) == 2:
            roi = expand_pair(boxes, group)
            area_range = PAIR_AREA_RANGE
            mode = "pair"
        else:
            roi = expand_single(boxes[group[0]])
            area_range = SINGLE_AREA_RANGE
            mode = "single"
        roi = clamp_roi(roi, image_width, image_height)
        if roi is None:
            continue
        object_area = sum(boxes[idx].area for idx in group)
        roi = resize_roi_to_ratio(roi, image_width, image_height, object_area, *area_range)
        if roi is None:
            continue
        samples.append(RoiSample(roi=roi, members=group, mode=mode))
    return samples
 def to_yolo_lines(boxes: list[Box], roi_w: float, roi_h: float) -> list[str]:
    lines: list[str] = []
    for box in boxes:
        xc = ((box.x1 + box.x2) / 2.0) / roi_w
        yc = ((box.y1 + box.y2) / 2.0) / roi_h
        bw = box.w / roi_w
        bh = box.h / roi_h
        lines.append(f"0 {xc:.6f} {yc:.6f} {bw:.6f} {bh:.6f}")
    return lines
 def write_yaml(output_dir: Path, sources: list[str]) -> None:
    yaml_path = output_dir / "data.yaml"
    source_names = ", ".join(Path(item).name for item in sources)
    yaml_path.write_text(
        "\n".join(
            [
                "# ROI shoe training mix",
                "",
                f"path: {output_dir.resolve().as_posix()}",
                "train: images/train",
                "val: images/val",
                "test: images/test",
                "",
                "nc: 1",
                "names: ['shoe']",
                "",
                "dataset_info:",
                "  name: shoe-roi-mix",
                "  task: detect_shoe_roi",
                f"  source: {source_names}",
                "  note: cropped to foot-context ROIs to match online two-stage inference",
                "",
            ]
        ),
        encoding="utf-8",
    )
 def build_split(source_dir: Path, output_dir: Path, split: str) -> dict[str, int]:
    image_dir = source_dir / "images" / split
    label_dir = source_dir / "labels" / split
    if not image_dir.exists() or not label_dir.exists():
        return {"images": 0, "boxes": 0, "single": 0, "pair": 0}
    stats = {"images": 0, "boxes": 0, "single": 0, "pair": 0}
    prefix = source_dir.name.replace("-", "_")
    for label_path in sorted(label_dir.glob("*.txt")):
        image_path = find_image(image_dir, label_path.stem)
        if image_path is None:
            continue
        with Image.open(image_path) as image:
            image = image.convert("RGB")
            width, height = image.size
            boxes = load_boxes(label_path, width, height)
            if not boxes:
                continue
            samples = make_roi_samples(boxes, width, height)
            for sample_idx, sample in enumerate(samples):
                roi_boxes = boxes_in_roi(boxes, sample.roi)
                if not roi_boxes:
                    continue
                out_stem = f"{prefix}_{label_path.stem}_{sample.mode}_{sample_idx:02d}"
                dst_image = output_dir / "images" / split / f"{out_stem}.jpg"
                dst_label = output_dir / "labels" / split / f"{out_stem}.txt"
                crop = image.crop((sample.roi.x1, sample.roi.y1, sample.roi.x2, sample.roi.y2))
                crop.save(dst_image, quality=95)
                yolo_lines = to_yolo_lines(roi_boxes, sample.roi.w, sample.roi.h)
                dst_label.write_text("\n".join(yolo_lines) + "\n", encoding="utf-8")
                stats["images"] += 1
                stats["boxes"] += len(roi_boxes)
                stats[sample.mode] += 1
    return stats
 def main() -> None:
    args = parse_args()
    output_dir = Path(args.output)
    if args.clean and output_dir.exists():
        shutil.rmtree(output_dir)
    ensure_output_layout(output_dir)
    summary: dict[str, dict[str, dict[str, int]]] = defaultdict(dict)
    totals = {"images": 0, "boxes": 0, "single": 0, "pair": 0}
    for source in args.sources:
        source_dir = Path(source)
        if not source_dir.exists():
            raise FileNotFoundError(f"Source dataset not found: {source_dir}")
        for split in ("train", "val", "test"):
            stats = build_split(source_dir, output_dir, split)
            summary[source_dir.name][split] = stats
            for key in totals:
                totals[key] += stats[key]
    write_yaml(output_dir, args.sources)
    print(f"Output dataset: {output_dir.resolve()}")
    for source_name, split_map in summary.items():
        print(f"[{source_name}]")
        for split in ("train", "val", "test"):
            stats = split_map.get(split, {"images": 0, "boxes": 0, "single": 0, "pair": 0})
            print(
                f"  {split}: rois={stats['images']} boxes={stats['boxes']} "
                f"single={stats['single']} pair={stats['pair']}"
            )
    print(
        "Total:"
        f" rois={totals['images']} boxes={totals['boxes']}"
        f" single={totals['single']} pair={totals['pair']}"
    )
 if __name__ == "__main__":
    main()
--- a/10_run_shoe_compare.ps1
+++ b/10_run_shoe_compare.ps1
@ -1,6 +1,6 @@
 $ErrorActionPreference = "Stop"
-$repo = "C:\Users\tianj\Documents\apps\DetectionModelTraining"
+$repo = $PSScriptRoot
 $env:PYTHONPATH = "$repo\.pydeps"
 $env:YOLO_CONFIG_DIR = "$repo\.ultralytics"
--- a/11_run_shoe_compare_960.ps1
+++ b/11_run_shoe_compare_960.ps1
@ -1,6 +1,6 @@
 $ErrorActionPreference = "Stop"
-$repo = "C:\Users\tianj\Documents\apps\DetectionModelTraining"
+$repo = $PSScriptRoot
 $env:PYTHONPATH = "$repo\.pydeps"
 $env:YOLO_CONFIG_DIR = "$repo\.ultralytics"
--- a/12_train_roi_yolov8s_640.bat
+++ b/12_train_roi_yolov8s_640.bat
@ -0,0 +1,92 @@
@echo off
 chcp 65001 >nul
 cls
 set "REPO_DIR=%~dp0"
 pushd "%REPO_DIR%"
 set "YOLO_LAUNCHER="
 where yolo >nul 2>nul
 if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=yolo"
 if not defined YOLO_LAUNCHER (
    py -3.11 -c "import ultralytics" >nul 2>nul
    if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=py -3.11 -m ultralytics"
 )
 if not defined YOLO_LAUNCHER (
    python -c "import ultralytics" >nul 2>nul
    if %ERRORLEVEL% equ 0 set "YOLO_LAUNCHER=python -m ultralytics"
 )
 if not defined YOLO_LAUNCHER (
    echo [错误] 未找到可用的 Ultralytics 启动方式
    echo.
    echo 请先确保满足以下任一条件:
    echo   1. yolo 命令已加入 PATH
    echo   2. py -3.11 可运行并已安装 ultralytics
    echo   3. python 可运行并已安装 ultralytics
    popd
    pause
    exit /b 1
 )
 set "DATASET=%REPO_DIR%datasets\shoe-roi-mix\data.yaml"
 if not exist "%DATASET%" (
    echo [错误] 找不到 ROI 数据集配置: %DATASET%
    echo.
    echo 请先构建 ROI 数据集:
    echo   python "%REPO_DIR%09_build_roi_shoe_dataset.py" --clean
    popd
    pause
    exit /b 1
 )
 echo ============================================================
 echo      训练鞋子 ROI 检测模型 (YOLOv8s + 640x640)
 echo ============================================================
 echo.
 echo [信息] 数据集: %DATASET%
 echo [信息] 模型: yolov8s.pt
 echo.
 set "EPOCHS=150"
 set "IMGSZ=640"
 set "BATCH=16"
 set "PROJECT=%REPO_DIR%runs\roi_yolov8s_640"
 set "RUN_NAME=train_roi"
 echo 训练参数:
 echo   - Epochs: %EPOCHS%
 echo   - Image Size: %IMGSZ%x%IMGSZ%
 echo   - Batch Size: %BATCH%
 echo   - Device: GPU (cuda:0)
 echo   - Project: %PROJECT%
 echo   - Run Name: %RUN_NAME% ^(已存在时会自动递增，不覆盖旧模型^)
 echo.
 echo ============================================================
 echo                     开始训练
 echo ============================================================
 echo.
 call %YOLO_LAUNCHER% detect train data="%DATASET%" model="yolov8s.pt" epochs=%EPOCHS% imgsz=%IMGSZ% batch=%BATCH% device=0 project="%PROJECT%" name="%RUN_NAME%"
 if %ERRORLEVEL% neq 0 (
    echo.
    echo [错误] 训练失败！
    popd
    pause
    exit /b 1
 )
 echo.
 echo ============================================================
 echo                     训练完成！
 echo ============================================================
 echo.
 echo 模型输出目录: %PROJECT%
 echo.
 popd
 pause
--- a/README.md
+++ b/README.md
@ -1,5 +1,68 @@
 # 鞋子检测模型训练指南
 ## 当前主方案：YOLOv8s-640 + 脚部 ROI 训练
 当前项目的主训练方向已经调整为：
 - 只训练 `yolov8s`、输入尺寸固定 `640x640`
 - 训练数据不再直接使用“整张场景图”或“鞋子纯特写图”
 - 先根据鞋框裁出更接近线上输入分布的“脚部 ROI 图”，再训练鞋检测模型
 这样做的原因是线上链路并不是直接在整张图上找鞋，而是：
 1. 先从人体框生成脚部 ROI
 2. 再在脚部 ROI 上做鞋检测
 因此训练阶段也尽量模拟这个输入分布，保留一些裤脚、地面和周围背景，避免训练样本过于像商品特写。
 ### ROI 规则
 单鞋 ROI：
 - 已知鞋框 `(x, y, w, h)`
 - `roi_x = x - 0.6w`
 - `roi_y = y - 0.5h`
 - `roi_w = 2.2w`
 - `roi_h = 2.4h`
 双鞋 ROI：
 - 优先把两只鞋裁进同一张 ROI
 - 先取两只鞋框并集，再扩框：
 - `roi_x = union_x - 0.35 * union_w`
 - `roi_y = union_y - 0.45 * union_h`
 - `roi_w = 1.7 * union_w`
 - `roi_h = 2.0 * union_h`
 裁图会自动裁剪到图像边界内。
 ### 新主流程
 1. 准备原始单类鞋数据集
 ```bash
 python 01_download_dataset.py --source openimages --max-samples 5000
 python 05_prepare_ppe_shoe_subset.py
 ```
 2. 构建 ROI 化训练集
 ```bash
 python 09_build_roi_shoe_dataset.py --clean
 ```
 输出目录：
 - `datasets/shoe-roi-mix`
 3. 训练新的 ROI 模型
 ```bash
 12_train_roi_yolov8s_640.bat
 ```
 模型输出目录：
 - `runs/roi_yolov8s_640`
 说明：
 - 新模型会写到新的项目目录，不覆盖之前已有模型
 - 如果 `train_roi` 已存在，Ultralytics 会自动递增运行目录名
 ## 方案：640x640 单模型（部署时用2窗口）
 **训练阶段**：