Use real person boxes for ROI dataset generation
This commit is contained in:
parent
22aee7fa1e
commit
36f6389d22
@ -19,6 +19,7 @@ Open Images 推荐类别:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import ast
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import shutil
|
import shutil
|
||||||
@ -32,6 +33,8 @@ from pathlib import Path
|
|||||||
OPENIMAGES_RECOMMENDED_CLASSES = ["Footwear", "Boot"]
|
OPENIMAGES_RECOMMENDED_CLASSES = ["Footwear", "Boot"]
|
||||||
OPENIMAGES_OPTIONAL_CLASSES = ["Sandal"]
|
OPENIMAGES_OPTIONAL_CLASSES = ["Sandal"]
|
||||||
OPENIMAGES_NOT_RECOMMENDED_CLASSES = ["High heels", "Roller skates"]
|
OPENIMAGES_NOT_RECOMMENDED_CLASSES = ["High heels", "Roller skates"]
|
||||||
|
OPENIMAGES_PERSON_CLASS = "Person"
|
||||||
|
ROI_SOURCE_DEFAULT_DIR = "datasets/openimages-person-shoes"
|
||||||
|
|
||||||
|
|
||||||
def download_ultralytics_cppe(dataset_dir: str = "datasets/construction-ppe"):
|
def download_ultralytics_cppe(dataset_dir: str = "datasets/construction-ppe"):
|
||||||
@ -123,6 +126,32 @@ dataset_info:
|
|||||||
return yaml_path
|
return yaml_path
|
||||||
|
|
||||||
|
|
||||||
|
def create_roi_source_yaml(dataset_dir: str, source_name: str, dataset_path_value: str = "."):
|
||||||
|
"""创建 person+shoe ROI 源数据集配置。"""
|
||||||
|
yaml_content = f"""# 人体+鞋子 ROI 源数据集配置
|
||||||
|
|
||||||
|
path: {dataset_path_value}
|
||||||
|
train: images/train
|
||||||
|
val: images/val
|
||||||
|
test: images/test
|
||||||
|
|
||||||
|
nc: 2
|
||||||
|
names: ['person', 'shoe']
|
||||||
|
|
||||||
|
dataset_info:
|
||||||
|
name: {source_name}
|
||||||
|
task: detect_person_and_shoe_for_roi
|
||||||
|
note: 用真实 Person 框生成脚部 ROI,shoe 为 ROI 内检测目标
|
||||||
|
"""
|
||||||
|
|
||||||
|
yaml_path = os.path.join(dataset_dir, "data.yaml")
|
||||||
|
with open(yaml_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(yaml_content)
|
||||||
|
|
||||||
|
print(f"\n✓ ROI 源配置文件创建: {yaml_path}")
|
||||||
|
return yaml_path
|
||||||
|
|
||||||
|
|
||||||
def rewrite_yaml_for_existing_splits(dataset_dir: str, source_name: str):
|
def rewrite_yaml_for_existing_splits(dataset_dir: str, source_name: str):
|
||||||
"""根据现有目录结构重写 data.yaml。"""
|
"""根据现有目录结构重写 data.yaml。"""
|
||||||
images_root = Path(dataset_dir) / "images"
|
images_root = Path(dataset_dir) / "images"
|
||||||
@ -157,6 +186,68 @@ dataset_info:
|
|||||||
return str(yaml_path)
|
return str(yaml_path)
|
||||||
|
|
||||||
|
|
||||||
|
def rewrite_roi_yaml_for_existing_splits(dataset_dir: str, source_name: str):
|
||||||
|
"""根据现有目录结构重写 person+shoe ROI 源 data.yaml。"""
|
||||||
|
images_root = Path(dataset_dir) / "images"
|
||||||
|
split_names = [name for name in ("train", "val", "test") if (images_root / name).exists()]
|
||||||
|
|
||||||
|
if not split_names:
|
||||||
|
raise RuntimeError(f"未找到任何图像 split: {images_root}")
|
||||||
|
|
||||||
|
train_split = "train" if "train" in split_names else split_names[0]
|
||||||
|
val_split = "val" if "val" in split_names else train_split
|
||||||
|
test_line = f"test: images/{'test' if 'test' in split_names else val_split}"
|
||||||
|
|
||||||
|
yaml_content = f"""# 人体+鞋子 ROI 源数据集配置
|
||||||
|
|
||||||
|
path: .
|
||||||
|
train: images/{train_split}
|
||||||
|
val: images/{val_split}
|
||||||
|
{test_line}
|
||||||
|
|
||||||
|
nc: 2
|
||||||
|
names: ['person', 'shoe']
|
||||||
|
|
||||||
|
dataset_info:
|
||||||
|
name: {source_name}
|
||||||
|
task: detect_person_and_shoe_for_roi
|
||||||
|
note: 用真实 Person 框生成脚部 ROI,shoe 为 ROI 内检测目标
|
||||||
|
"""
|
||||||
|
|
||||||
|
yaml_path = Path(dataset_dir) / "data.yaml"
|
||||||
|
yaml_path.write_text(yaml_content, encoding="utf-8")
|
||||||
|
print(f"\n✓ ROI 源配置文件更新: {yaml_path}")
|
||||||
|
return str(yaml_path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_dataset_name_map(dataset_yaml: Path) -> dict[int, str]:
|
||||||
|
"""读取 YOLO names,不依赖额外 yaml 库。"""
|
||||||
|
names: dict[int, str] = {}
|
||||||
|
lines = dataset_yaml.read_text(encoding="utf-8").splitlines()
|
||||||
|
for index, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
if not stripped.startswith("names:"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
inline_value = stripped[len("names:") :].strip()
|
||||||
|
if inline_value:
|
||||||
|
value = ast.literal_eval(inline_value)
|
||||||
|
if isinstance(value, list):
|
||||||
|
return {idx: str(name) for idx, name in enumerate(value)}
|
||||||
|
|
||||||
|
for child in lines[index + 1 :]:
|
||||||
|
if not child.startswith(" "):
|
||||||
|
break
|
||||||
|
child_stripped = child.strip()
|
||||||
|
if ":" not in child_stripped:
|
||||||
|
continue
|
||||||
|
key_text, value_text = child_stripped.split(":", 1)
|
||||||
|
if key_text.strip().isdigit():
|
||||||
|
names[int(key_text.strip())] = value_text.strip().strip("'\"")
|
||||||
|
break
|
||||||
|
return names
|
||||||
|
|
||||||
|
|
||||||
def ensure_openimages_train_val_split(export_dir: str, train_ratio: float = 0.9, seed: int = 42):
|
def ensure_openimages_train_val_split(export_dir: str, train_ratio: float = 0.9, seed: int = 42):
|
||||||
"""如果导出结果只有单个 split,则自动切分为 train/val。"""
|
"""如果导出结果只有单个 split,则自动切分为 train/val。"""
|
||||||
images_root = Path(export_dir) / "images"
|
images_root = Path(export_dir) / "images"
|
||||||
@ -236,8 +327,76 @@ def merge_openimages_to_single_class(export_dir: str):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def download_openimages(classes: list, max_samples: int, dataset_dir: str):
|
def rewrite_openimages_to_roi_source(export_dir: str):
|
||||||
"""通过 FiftyOne 下载 Open Images 并导出为单类 shoe 数据集。"""
|
"""将 Open Images 导出的标签改写为 person+shoe ROI 源数据。"""
|
||||||
|
labels_root = Path(export_dir) / "labels"
|
||||||
|
dataset_yaml = Path(export_dir) / "dataset.yaml"
|
||||||
|
if not labels_root.exists():
|
||||||
|
print(f"✗ 未找到标签目录: {labels_root}")
|
||||||
|
return False
|
||||||
|
if not dataset_yaml.exists():
|
||||||
|
print(f"✗ 未找到 dataset.yaml: {dataset_yaml}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
name_map = load_dataset_name_map(dataset_yaml)
|
||||||
|
person_ids = {idx for idx, name in name_map.items() if name.lower() == "person"}
|
||||||
|
shoe_ids = {
|
||||||
|
idx
|
||||||
|
for idx, name in name_map.items()
|
||||||
|
if name.lower() in {"footwear", "boot", "sandal", "high heels", "roller skates"}
|
||||||
|
}
|
||||||
|
|
||||||
|
if not person_ids:
|
||||||
|
print("✗ 导出结果中未找到 Person 类")
|
||||||
|
return False
|
||||||
|
if not shoe_ids:
|
||||||
|
print("✗ 导出结果中未找到鞋类")
|
||||||
|
return False
|
||||||
|
|
||||||
|
kept_files = 0
|
||||||
|
total_person = 0
|
||||||
|
total_shoe = 0
|
||||||
|
|
||||||
|
for label_file in labels_root.rglob("*.txt"):
|
||||||
|
lines = label_file.read_text(encoding="utf-8").splitlines()
|
||||||
|
rewritten: list[str] = []
|
||||||
|
file_person = 0
|
||||||
|
file_shoe = 0
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
parts = line.strip().split()
|
||||||
|
if len(parts) < 5 or not parts[0].isdigit():
|
||||||
|
continue
|
||||||
|
class_id = int(parts[0])
|
||||||
|
if class_id in person_ids:
|
||||||
|
parts[0] = "0"
|
||||||
|
rewritten.append(" ".join(parts))
|
||||||
|
total_person += 1
|
||||||
|
file_person += 1
|
||||||
|
elif class_id in shoe_ids:
|
||||||
|
parts[0] = "1"
|
||||||
|
rewritten.append(" ".join(parts))
|
||||||
|
total_shoe += 1
|
||||||
|
file_shoe += 1
|
||||||
|
|
||||||
|
if file_person > 0 and file_shoe > 0:
|
||||||
|
label_file.write_text("\n".join(rewritten) + "\n", encoding="utf-8")
|
||||||
|
kept_files += 1
|
||||||
|
else:
|
||||||
|
label_file.write_text("", encoding="utf-8")
|
||||||
|
|
||||||
|
ensure_openimages_train_val_split(export_dir)
|
||||||
|
rewrite_roi_yaml_for_existing_splits(export_dir, "Open Images V7 ROI Source")
|
||||||
|
|
||||||
|
print("\nROI 源重写完成:")
|
||||||
|
print(f" 同时含 person + shoe 的标签文件: {kept_files}")
|
||||||
|
print(f" Person 标注框数: {total_person}")
|
||||||
|
print(f" Shoe 标注框数: {total_shoe}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def download_openimages(classes: list, max_samples: int, dataset_dir: str, mode: str):
|
||||||
|
"""通过 FiftyOne 下载 Open Images 并导出为单类或 ROI 源数据集。"""
|
||||||
try:
|
try:
|
||||||
import fiftyone as fo
|
import fiftyone as fo
|
||||||
import fiftyone.zoo as foz
|
import fiftyone.zoo as foz
|
||||||
@ -247,11 +406,15 @@ def download_openimages(classes: list, max_samples: int, dataset_dir: str):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
export_dir = dataset_dir + "-yolo"
|
export_dir = dataset_dir + "-yolo"
|
||||||
|
requested_classes = list(classes)
|
||||||
|
if mode == "roi-source" and OPENIMAGES_PERSON_CLASS not in requested_classes:
|
||||||
|
requested_classes = [OPENIMAGES_PERSON_CLASS] + requested_classes
|
||||||
|
|
||||||
print("=" * 70)
|
print("=" * 70)
|
||||||
print("下载 Open Images V7 数据集")
|
print("下载 Open Images V7 数据集")
|
||||||
print("=" * 70)
|
print("=" * 70)
|
||||||
print(f"类别: {classes}")
|
print(f"模式: {mode}")
|
||||||
|
print(f"类别: {requested_classes}")
|
||||||
print(f"最大样本数: {max_samples}")
|
print(f"最大样本数: {max_samples}")
|
||||||
print("原始缓存目录: FiftyOne 默认缓存目录")
|
print("原始缓存目录: FiftyOne 默认缓存目录")
|
||||||
print(f"YOLO 导出目录: {export_dir}")
|
print(f"YOLO 导出目录: {export_dir}")
|
||||||
@ -266,7 +429,7 @@ def download_openimages(classes: list, max_samples: int, dataset_dir: str):
|
|||||||
"open-images-v7",
|
"open-images-v7",
|
||||||
split="train",
|
split="train",
|
||||||
label_types=["detections"],
|
label_types=["detections"],
|
||||||
classes=classes,
|
classes=requested_classes,
|
||||||
max_samples=max_samples,
|
max_samples=max_samples,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -277,8 +440,12 @@ def download_openimages(classes: list, max_samples: int, dataset_dir: str):
|
|||||||
label_field="ground_truth",
|
label_field="ground_truth",
|
||||||
)
|
)
|
||||||
|
|
||||||
if not merge_openimages_to_single_class(export_dir):
|
if mode == "roi-source":
|
||||||
return False
|
if not rewrite_openimages_to_roi_source(export_dir):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if not merge_openimages_to_single_class(export_dir):
|
||||||
|
return False
|
||||||
|
|
||||||
print(f"\n✓ 数据集保存: {export_dir}")
|
print(f"\n✓ 数据集保存: {export_dir}")
|
||||||
return True
|
return True
|
||||||
@ -352,6 +519,12 @@ def main():
|
|||||||
default="datasets/openimages-shoes",
|
default="datasets/openimages-shoes",
|
||||||
help="数据集保存目录",
|
help="数据集保存目录",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--mode",
|
||||||
|
choices=["single-class", "roi-source"],
|
||||||
|
default="single-class",
|
||||||
|
help="导出模式: single-class 用于单类训练, roi-source 保留 person + shoe 供 ROI 构建",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--max-samples",
|
"--max-samples",
|
||||||
type=int,
|
type=int,
|
||||||
@ -376,6 +549,8 @@ def main():
|
|||||||
print(f"推荐类别: {OPENIMAGES_RECOMMENDED_CLASSES}")
|
print(f"推荐类别: {OPENIMAGES_RECOMMENDED_CLASSES}")
|
||||||
print(f"可选补充: {OPENIMAGES_OPTIONAL_CLASSES}")
|
print(f"可选补充: {OPENIMAGES_OPTIONAL_CLASSES}")
|
||||||
print(f"默认不建议: {OPENIMAGES_NOT_RECOMMENDED_CLASSES}")
|
print(f"默认不建议: {OPENIMAGES_NOT_RECOMMENDED_CLASSES}")
|
||||||
|
if args.mode == "roi-source":
|
||||||
|
print(f"将额外保留: {OPENIMAGES_PERSON_CLASS}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
if args.source == "ultralytics":
|
if args.source == "ultralytics":
|
||||||
@ -385,7 +560,9 @@ def main():
|
|||||||
check_dataset(args.dir)
|
check_dataset(args.dir)
|
||||||
|
|
||||||
elif args.source == "openimages":
|
elif args.source == "openimages":
|
||||||
success = download_openimages(args.classes, args.max_samples, args.dir)
|
if args.mode == "roi-source" and args.dir == parser.get_default("dir"):
|
||||||
|
args.dir = ROI_SOURCE_DEFAULT_DIR
|
||||||
|
success = download_openimages(args.classes, args.max_samples, args.dir, args.mode)
|
||||||
final_dataset_dir = args.dir + "-yolo"
|
final_dataset_dir = args.dir + "-yolo"
|
||||||
if success:
|
if success:
|
||||||
check_dataset(final_dataset_dir)
|
check_dataset(final_dataset_dir)
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""从 Construction-PPE 中提取单类 shoe 子集。"""
|
"""从 Construction-PPE 中提取单类 shoe 或 person+shoe ROI 源数据。"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import shutil
|
import shutil
|
||||||
@ -7,6 +7,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
|
|
||||||
SHOE_CLASSES = {"3", "10"} # boots, no_boots
|
SHOE_CLASSES = {"3", "10"} # boots, no_boots
|
||||||
|
PERSON_CLASSES = {"6"} # Person
|
||||||
|
|
||||||
|
|
||||||
def ensure_clean_dir(path: Path):
|
def ensure_clean_dir(path: Path):
|
||||||
@ -15,7 +16,7 @@ def ensure_clean_dir(path: Path):
|
|||||||
path.mkdir(parents=True, exist_ok=True)
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def write_yaml(output_dir: Path):
|
def write_shoe_yaml(output_dir: Path):
|
||||||
yaml_path = output_dir / "data.yaml"
|
yaml_path = output_dir / "data.yaml"
|
||||||
abs_output = output_dir.resolve().as_posix()
|
abs_output = output_dir.resolve().as_posix()
|
||||||
yaml_path.write_text(
|
yaml_path.write_text(
|
||||||
@ -42,7 +43,34 @@ def write_yaml(output_dir: Path):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def convert_split(source_dir: Path, output_dir: Path, split: str):
|
def write_roi_source_yaml(output_dir: Path):
|
||||||
|
yaml_path = output_dir / "data.yaml"
|
||||||
|
abs_output = output_dir.resolve().as_posix()
|
||||||
|
yaml_path.write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"# PPE 人体+鞋子 ROI 源数据配置",
|
||||||
|
"",
|
||||||
|
f"path: {abs_output}",
|
||||||
|
"train: images/train",
|
||||||
|
"val: images/val",
|
||||||
|
"test: images/test",
|
||||||
|
"",
|
||||||
|
"nc: 2",
|
||||||
|
"names: ['person', 'shoe']",
|
||||||
|
"",
|
||||||
|
"dataset_info:",
|
||||||
|
" name: Construction-PPE ROI source",
|
||||||
|
" source: Construction-PPE",
|
||||||
|
" note: 保留 Person 和鞋类,用真实人框生成脚部 ROI",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_split(source_dir: Path, output_dir: Path, split: str, mode: str):
|
||||||
image_src = source_dir / "images" / split
|
image_src = source_dir / "images" / split
|
||||||
label_src = source_dir / "labels" / split
|
label_src = source_dir / "labels" / split
|
||||||
image_dst = output_dir / "images" / split
|
image_dst = output_dir / "images" / split
|
||||||
@ -56,16 +84,32 @@ def convert_split(source_dir: Path, output_dir: Path, split: str):
|
|||||||
|
|
||||||
for label_file in sorted(label_src.glob("*.txt")):
|
for label_file in sorted(label_src.glob("*.txt")):
|
||||||
lines = label_file.read_text(encoding="utf-8").splitlines()
|
lines = label_file.read_text(encoding="utf-8").splitlines()
|
||||||
shoe_lines = []
|
out_lines = []
|
||||||
|
file_person = 0
|
||||||
|
file_shoe = 0
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
parts = line.strip().split()
|
parts = line.strip().split()
|
||||||
if len(parts) < 5 or parts[0] not in SHOE_CLASSES:
|
if len(parts) < 5:
|
||||||
continue
|
continue
|
||||||
parts[0] = "0"
|
class_id = parts[0]
|
||||||
shoe_lines.append(" ".join(parts))
|
if mode == "roi-source":
|
||||||
|
if class_id in PERSON_CLASSES:
|
||||||
|
parts[0] = "0"
|
||||||
|
out_lines.append(" ".join(parts))
|
||||||
|
file_person += 1
|
||||||
|
elif class_id in SHOE_CLASSES:
|
||||||
|
parts[0] = "1"
|
||||||
|
out_lines.append(" ".join(parts))
|
||||||
|
file_shoe += 1
|
||||||
|
elif class_id in SHOE_CLASSES:
|
||||||
|
parts[0] = "0"
|
||||||
|
out_lines.append(" ".join(parts))
|
||||||
|
file_shoe += 1
|
||||||
|
|
||||||
if not shoe_lines:
|
if mode == "roi-source" and (file_person == 0 or file_shoe == 0):
|
||||||
|
continue
|
||||||
|
if mode != "roi-source" and file_shoe == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
image_file = image_src / f"{label_file.stem}.jpg"
|
image_file = image_src / f"{label_file.stem}.jpg"
|
||||||
@ -75,9 +119,9 @@ def convert_split(source_dir: Path, output_dir: Path, split: str):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
shutil.copy2(image_file, image_dst / image_file.name)
|
shutil.copy2(image_file, image_dst / image_file.name)
|
||||||
(label_dst / label_file.name).write_text("\n".join(shoe_lines) + "\n", encoding="utf-8")
|
(label_dst / label_file.name).write_text("\n".join(out_lines) + "\n", encoding="utf-8")
|
||||||
kept_images += 1
|
kept_images += 1
|
||||||
kept_boxes += len(shoe_lines)
|
kept_boxes += len(out_lines)
|
||||||
|
|
||||||
return kept_images, kept_boxes
|
return kept_images, kept_boxes
|
||||||
|
|
||||||
@ -94,22 +138,33 @@ def main():
|
|||||||
default="datasets/ppe-shoes",
|
default="datasets/ppe-shoes",
|
||||||
help="输出目录",
|
help="输出目录",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--mode",
|
||||||
|
choices=["shoe-only", "roi-source"],
|
||||||
|
default="shoe-only",
|
||||||
|
help="shoe-only 输出单类 shoe; roi-source 保留 person + shoe 供 ROI 构建使用",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
source_dir = Path(args.source)
|
source_dir = Path(args.source)
|
||||||
output_dir = Path(args.output)
|
output_dir = Path(args.output)
|
||||||
|
if args.mode == "roi-source" and args.output == parser.get_default("output"):
|
||||||
|
output_dir = Path("datasets/ppe-person-shoes")
|
||||||
|
|
||||||
ensure_clean_dir(output_dir)
|
ensure_clean_dir(output_dir)
|
||||||
|
|
||||||
total_images = 0
|
total_images = 0
|
||||||
total_boxes = 0
|
total_boxes = 0
|
||||||
for split in ("train", "val", "test"):
|
for split in ("train", "val", "test"):
|
||||||
kept_images, kept_boxes = convert_split(source_dir, output_dir, split)
|
kept_images, kept_boxes = convert_split(source_dir, output_dir, split, args.mode)
|
||||||
total_images += kept_images
|
total_images += kept_images
|
||||||
total_boxes += kept_boxes
|
total_boxes += kept_boxes
|
||||||
print(f"[{split}] images={kept_images} boxes={kept_boxes}")
|
print(f"[{split}] images={kept_images} boxes={kept_boxes}")
|
||||||
|
|
||||||
write_yaml(output_dir)
|
if args.mode == "roi-source":
|
||||||
|
write_roi_source_yaml(output_dir)
|
||||||
|
else:
|
||||||
|
write_shoe_yaml(output_dir)
|
||||||
print(f"\n输出目录: {output_dir}")
|
print(f"\n输出目录: {output_dir}")
|
||||||
print(f"总图片数: {total_images}")
|
print(f"总图片数: {total_images}")
|
||||||
print(f"总框数: {total_boxes}")
|
print(f"总框数: {total_boxes}")
|
||||||
|
|||||||
@ -1,9 +1,22 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Build a foot-ROI shoe dataset from existing YOLO shoe datasets."""
|
"""Build a foot-ROI shoe dataset from existing YOLO shoe datasets.
|
||||||
|
|
||||||
|
Preferred training input should come from person-bottom ROIs, matching online inference:
|
||||||
|
|
||||||
|
roi_x = x - 0.24w
|
||||||
|
roi_y = y + 0.64h
|
||||||
|
roi_w = 1.48w
|
||||||
|
roi_h = 0.58h
|
||||||
|
|
||||||
|
When person boxes are available, this script uses them directly.
|
||||||
|
When only shoe boxes are available, it falls back to shoe-based ROI approximation
|
||||||
|
that still tries to match person-bottom input distribution rather than shoe closeups.
|
||||||
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import ast
|
||||||
import math
|
import math
|
||||||
import shutil
|
import shutil
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@ -14,8 +27,9 @@ from PIL import Image
|
|||||||
|
|
||||||
|
|
||||||
DEFAULT_SOURCES = [
|
DEFAULT_SOURCES = [
|
||||||
|
"datasets/ppe-person-shoes",
|
||||||
|
"datasets/openimages-person-shoes-yolo",
|
||||||
"datasets/openimages-shoes-yolo",
|
"datasets/openimages-shoes-yolo",
|
||||||
"datasets/ppe-shoes",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".bmp", ".webp")
|
IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".bmp", ".webp")
|
||||||
@ -24,8 +38,8 @@ PAIR_MAX_Y_GAP_FACTOR = 1.2
|
|||||||
PAIR_MIN_AREA_RATIO = 0.4
|
PAIR_MIN_AREA_RATIO = 0.4
|
||||||
PAIR_MAX_AREA_RATIO = 2.5
|
PAIR_MAX_AREA_RATIO = 2.5
|
||||||
|
|
||||||
SINGLE_AREA_RANGE = (0.15, 0.35)
|
SINGLE_AREA_RANGE = (0.10, 0.26)
|
||||||
PAIR_AREA_RANGE = (0.25, 0.50)
|
PAIR_AREA_RANGE = (0.18, 0.40)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@ -72,6 +86,14 @@ class RoiSample:
|
|||||||
mode: str
|
mode: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class SourceSpec:
|
||||||
|
dataset_dir: Path
|
||||||
|
person_ids: set[int]
|
||||||
|
shoe_ids: set[int]
|
||||||
|
uses_person_boxes: bool
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
def parse_args() -> argparse.Namespace:
|
||||||
parser = argparse.ArgumentParser(description="Build a foot-context ROI shoe dataset")
|
parser = argparse.ArgumentParser(description="Build a foot-context ROI shoe dataset")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -107,8 +129,71 @@ def find_image(image_dir: Path, stem: str) -> Path | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def load_boxes(label_path: Path, image_width: int, image_height: int) -> list[Box]:
|
def parse_names_from_yaml(yaml_path: Path) -> dict[int, str]:
|
||||||
boxes: list[Box] = []
|
names: dict[int, str] = {}
|
||||||
|
lines = yaml_path.read_text(encoding="utf-8").splitlines()
|
||||||
|
for index, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
if not stripped.startswith("names:"):
|
||||||
|
continue
|
||||||
|
inline = stripped[len("names:") :].strip()
|
||||||
|
if inline:
|
||||||
|
value = ast.literal_eval(inline)
|
||||||
|
if isinstance(value, list):
|
||||||
|
return {idx: str(name) for idx, name in enumerate(value)}
|
||||||
|
for child in lines[index + 1 :]:
|
||||||
|
if not child.startswith(" "):
|
||||||
|
break
|
||||||
|
child_stripped = child.strip()
|
||||||
|
if ":" not in child_stripped:
|
||||||
|
continue
|
||||||
|
key_text, value_text = child_stripped.split(":", 1)
|
||||||
|
if key_text.strip().isdigit():
|
||||||
|
names[int(key_text.strip())] = value_text.strip().strip("'\"")
|
||||||
|
break
|
||||||
|
return names
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_source_spec(source_dir: Path) -> SourceSpec:
|
||||||
|
names: dict[int, str] = {}
|
||||||
|
# Prefer data.yaml because ROI-source exports intentionally rewrite class ids.
|
||||||
|
for candidate in (source_dir / "data.yaml", source_dir / "dataset.yaml"):
|
||||||
|
if candidate.exists():
|
||||||
|
names = parse_names_from_yaml(candidate)
|
||||||
|
if names:
|
||||||
|
break
|
||||||
|
|
||||||
|
lowered = {idx: name.lower() for idx, name in names.items()}
|
||||||
|
person_ids = {
|
||||||
|
idx for idx, name in lowered.items() if name in {"person", "man", "woman", "boy", "girl"}
|
||||||
|
}
|
||||||
|
shoe_ids = {
|
||||||
|
idx
|
||||||
|
for idx, name in lowered.items()
|
||||||
|
if name in {"shoe", "footwear", "boot", "boots", "no_boots", "sandal", "high heels"}
|
||||||
|
}
|
||||||
|
|
||||||
|
if not names:
|
||||||
|
shoe_ids = {0}
|
||||||
|
|
||||||
|
if not shoe_ids:
|
||||||
|
raise RuntimeError(f"未能在 {source_dir} 识别鞋类标签")
|
||||||
|
|
||||||
|
return SourceSpec(
|
||||||
|
dataset_dir=source_dir,
|
||||||
|
person_ids=person_ids,
|
||||||
|
shoe_ids=shoe_ids,
|
||||||
|
uses_person_boxes=bool(person_ids),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_annotations(
|
||||||
|
label_path: Path,
|
||||||
|
image_width: int,
|
||||||
|
image_height: int,
|
||||||
|
allowed_ids: set[int],
|
||||||
|
) -> list[tuple[int, Box]]:
|
||||||
|
annotations: list[tuple[int, Box]] = []
|
||||||
for raw_line in label_path.read_text(encoding="utf-8").splitlines():
|
for raw_line in label_path.read_text(encoding="utf-8").splitlines():
|
||||||
line = raw_line.strip()
|
line = raw_line.strip()
|
||||||
if not line:
|
if not line:
|
||||||
@ -116,6 +201,9 @@ def load_boxes(label_path: Path, image_width: int, image_height: int) -> list[Bo
|
|||||||
parts = line.split()
|
parts = line.split()
|
||||||
if len(parts) < 5:
|
if len(parts) < 5:
|
||||||
continue
|
continue
|
||||||
|
class_id = int(parts[0])
|
||||||
|
if class_id not in allowed_ids:
|
||||||
|
continue
|
||||||
_, xc, yc, w, h = parts[:5]
|
_, xc, yc, w, h = parts[:5]
|
||||||
box_w = float(w) * image_width
|
box_w = float(w) * image_width
|
||||||
box_h = float(h) * image_height
|
box_h = float(h) * image_height
|
||||||
@ -128,17 +216,20 @@ def load_boxes(label_path: Path, image_width: int, image_height: int) -> list[Bo
|
|||||||
center_y + box_h / 2.0,
|
center_y + box_h / 2.0,
|
||||||
).clip(image_width, image_height)
|
).clip(image_width, image_height)
|
||||||
if box is not None and box.area > 1.0:
|
if box is not None and box.area > 1.0:
|
||||||
boxes.append(box)
|
annotations.append((class_id, box))
|
||||||
return dedupe_boxes(boxes)
|
return dedupe_annotations(annotations)
|
||||||
|
|
||||||
|
|
||||||
def dedupe_boxes(boxes: list[Box], iou_threshold: float = 0.9) -> list[Box]:
|
def dedupe_annotations(
|
||||||
kept: list[Box] = []
|
annotations: list[tuple[int, Box]],
|
||||||
for box in sorted(boxes, key=lambda item: item.area, reverse=True):
|
iou_threshold: float = 0.9,
|
||||||
if any(iou(box, existing) >= iou_threshold for existing in kept):
|
) -> list[tuple[int, Box]]:
|
||||||
|
kept: list[tuple[int, Box]] = []
|
||||||
|
for class_id, box in sorted(annotations, key=lambda item: item[1].area, reverse=True):
|
||||||
|
if any(class_id == kept_id and iou(box, existing) >= iou_threshold for kept_id, existing in kept):
|
||||||
continue
|
continue
|
||||||
kept.append(box)
|
kept.append((class_id, box))
|
||||||
return sorted(kept, key=lambda item: (item.cx, item.cy))
|
return sorted(kept, key=lambda item: (item[1].cx, item[1].cy))
|
||||||
|
|
||||||
|
|
||||||
def iou(a: Box, b: Box) -> float:
|
def iou(a: Box, b: Box) -> float:
|
||||||
@ -202,16 +293,20 @@ def greedy_group_boxes(boxes: list[Box]) -> list[tuple[int, ...]]:
|
|||||||
return groups
|
return groups
|
||||||
|
|
||||||
|
|
||||||
def expand_single(box: Box) -> Box:
|
def estimate_person_from_single_shoe(box: Box) -> Box:
|
||||||
return Box(
|
"""Estimate a loose full-body box from a single shoe box."""
|
||||||
box.x1 - (0.6 * box.w),
|
person_w = max(4.2 * box.w, 2.8 * box.h)
|
||||||
box.y1 - (0.5 * box.h),
|
person_h = max(7.6 * box.h, 3.4 * box.w)
|
||||||
box.x1 - (0.6 * box.w) + (2.2 * box.w),
|
person_cx = box.cx
|
||||||
box.y1 - (0.5 * box.h) + (2.4 * box.h),
|
# Put shoe close to the bottom of the estimated person, leaving small ground margin.
|
||||||
)
|
person_y2 = box.y2 + (0.08 * person_h)
|
||||||
|
person_x1 = person_cx - (person_w / 2.0)
|
||||||
|
person_y1 = person_y2 - person_h
|
||||||
|
return Box(person_x1, person_y1, person_x1 + person_w, person_y2)
|
||||||
|
|
||||||
|
|
||||||
def expand_pair(boxes: list[Box], group: tuple[int, int]) -> Box:
|
def estimate_person_from_pair(boxes: list[Box], group: tuple[int, int]) -> Box:
|
||||||
|
"""Estimate a loose full-body box from a visible pair of shoes."""
|
||||||
first = boxes[group[0]]
|
first = boxes[group[0]]
|
||||||
second = boxes[group[1]]
|
second = boxes[group[1]]
|
||||||
union_x1 = min(first.x1, second.x1)
|
union_x1 = min(first.x1, second.x1)
|
||||||
@ -220,14 +315,28 @@ def expand_pair(boxes: list[Box], group: tuple[int, int]) -> Box:
|
|||||||
union_y2 = max(first.y2, second.y2)
|
union_y2 = max(first.y2, second.y2)
|
||||||
union_w = union_x2 - union_x1
|
union_w = union_x2 - union_x1
|
||||||
union_h = union_y2 - union_y1
|
union_h = union_y2 - union_y1
|
||||||
roi_x = union_x1 - (0.35 * union_w)
|
person_w = max(1.95 * union_w, 2.6 * union_h)
|
||||||
roi_y = union_y1 - (0.45 * union_h)
|
person_h = max(7.8 * union_h, 2.9 * union_w)
|
||||||
return Box(
|
person_cx = (union_x1 + union_x2) / 2.0
|
||||||
roi_x,
|
person_y2 = union_y2 + (0.08 * person_h)
|
||||||
roi_y,
|
person_x1 = person_cx - (person_w / 2.0)
|
||||||
roi_x + (1.7 * union_w),
|
person_y1 = person_y2 - person_h
|
||||||
roi_y + (2.0 * union_h),
|
return Box(person_x1, person_y1, person_x1 + person_w, person_y2)
|
||||||
)
|
|
||||||
|
|
||||||
|
def roi_from_person_box(person_box: Box) -> Box:
|
||||||
|
"""Apply the online person-bottom ROI rule and loosen it slightly."""
|
||||||
|
roi_x = person_box.x1 - (0.24 * person_box.w)
|
||||||
|
roi_y = person_box.y1 + (0.64 * person_box.h)
|
||||||
|
roi_w = 1.48 * person_box.w
|
||||||
|
roi_h = 0.58 * person_box.h
|
||||||
|
|
||||||
|
# Slightly enlarge to keep more trouser leg, ground, and side context than online.
|
||||||
|
roi_x -= 0.08 * roi_w
|
||||||
|
roi_y -= 0.08 * roi_h
|
||||||
|
roi_w *= 1.16
|
||||||
|
roi_h *= 1.18
|
||||||
|
return Box(roi_x, roi_y, roi_x + roi_w, roi_y + roi_h)
|
||||||
|
|
||||||
|
|
||||||
def clamp_roi(roi: Box, image_width: int, image_height: int) -> Box | None:
|
def clamp_roi(roi: Box, image_width: int, image_height: int) -> Box | None:
|
||||||
@ -302,16 +411,28 @@ def boxes_in_roi(boxes: list[Box], roi: Box) -> list[Box]:
|
|||||||
return included
|
return included
|
||||||
|
|
||||||
|
|
||||||
|
def make_person_roi_samples(person_boxes: list[Box], image_width: int, image_height: int) -> list[RoiSample]:
|
||||||
|
samples: list[RoiSample] = []
|
||||||
|
for person_idx, person_box in enumerate(person_boxes):
|
||||||
|
roi = roi_from_person_box(person_box)
|
||||||
|
roi = clamp_roi(roi, image_width, image_height)
|
||||||
|
if roi is not None:
|
||||||
|
samples.append(RoiSample(roi=roi, members=(person_idx,), mode="person"))
|
||||||
|
return samples
|
||||||
|
|
||||||
|
|
||||||
def make_roi_samples(boxes: list[Box], image_width: int, image_height: int) -> list[RoiSample]:
|
def make_roi_samples(boxes: list[Box], image_width: int, image_height: int) -> list[RoiSample]:
|
||||||
samples: list[RoiSample] = []
|
samples: list[RoiSample] = []
|
||||||
groups = greedy_group_boxes(boxes)
|
groups = greedy_group_boxes(boxes)
|
||||||
for group in groups:
|
for group in groups:
|
||||||
if len(group) == 2:
|
if len(group) == 2:
|
||||||
roi = expand_pair(boxes, group)
|
person_box = estimate_person_from_pair(boxes, group)
|
||||||
|
roi = roi_from_person_box(person_box)
|
||||||
area_range = PAIR_AREA_RANGE
|
area_range = PAIR_AREA_RANGE
|
||||||
mode = "pair"
|
mode = "pair"
|
||||||
else:
|
else:
|
||||||
roi = expand_single(boxes[group[0]])
|
person_box = estimate_person_from_single_shoe(boxes[group[0]])
|
||||||
|
roi = roi_from_person_box(person_box)
|
||||||
area_range = SINGLE_AREA_RANGE
|
area_range = SINGLE_AREA_RANGE
|
||||||
mode = "single"
|
mode = "single"
|
||||||
|
|
||||||
@ -358,7 +479,7 @@ def write_yaml(output_dir: Path, sources: list[str]) -> None:
|
|||||||
" name: shoe-roi-mix",
|
" name: shoe-roi-mix",
|
||||||
" task: detect_shoe_roi",
|
" task: detect_shoe_roi",
|
||||||
f" source: {source_names}",
|
f" source: {source_names}",
|
||||||
" note: cropped to foot-context ROIs to match online two-stage inference",
|
" note: prefer person-bottom ROIs; current public data uses shoe-box fallback crops",
|
||||||
"",
|
"",
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
@ -366,13 +487,14 @@ def write_yaml(output_dir: Path, sources: list[str]) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_split(source_dir: Path, output_dir: Path, split: str) -> dict[str, int]:
|
def build_split(source_spec: SourceSpec, output_dir: Path, split: str) -> dict[str, int]:
|
||||||
|
source_dir = source_spec.dataset_dir
|
||||||
image_dir = source_dir / "images" / split
|
image_dir = source_dir / "images" / split
|
||||||
label_dir = source_dir / "labels" / split
|
label_dir = source_dir / "labels" / split
|
||||||
if not image_dir.exists() or not label_dir.exists():
|
if not image_dir.exists() or not label_dir.exists():
|
||||||
return {"images": 0, "boxes": 0, "single": 0, "pair": 0}
|
return {"images": 0, "boxes": 0, "single": 0, "pair": 0, "person": 0}
|
||||||
|
|
||||||
stats = {"images": 0, "boxes": 0, "single": 0, "pair": 0}
|
stats = {"images": 0, "boxes": 0, "single": 0, "pair": 0, "person": 0}
|
||||||
prefix = source_dir.name.replace("-", "_")
|
prefix = source_dir.name.replace("-", "_")
|
||||||
|
|
||||||
for label_path in sorted(label_dir.glob("*.txt")):
|
for label_path in sorted(label_dir.glob("*.txt")):
|
||||||
@ -383,13 +505,27 @@ def build_split(source_dir: Path, output_dir: Path, split: str) -> dict[str, int
|
|||||||
with Image.open(image_path) as image:
|
with Image.open(image_path) as image:
|
||||||
image = image.convert("RGB")
|
image = image.convert("RGB")
|
||||||
width, height = image.size
|
width, height = image.size
|
||||||
boxes = load_boxes(label_path, width, height)
|
annotations = load_annotations(
|
||||||
if not boxes:
|
label_path,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
source_spec.person_ids | source_spec.shoe_ids,
|
||||||
|
)
|
||||||
|
if not annotations:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
samples = make_roi_samples(boxes, width, height)
|
shoe_boxes = [box for class_id, box in annotations if class_id in source_spec.shoe_ids]
|
||||||
|
person_boxes = [box for class_id, box in annotations if class_id in source_spec.person_ids]
|
||||||
|
if not shoe_boxes:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if source_spec.uses_person_boxes and person_boxes:
|
||||||
|
samples = make_person_roi_samples(person_boxes, width, height)
|
||||||
|
else:
|
||||||
|
samples = make_roi_samples(shoe_boxes, width, height)
|
||||||
|
|
||||||
for sample_idx, sample in enumerate(samples):
|
for sample_idx, sample in enumerate(samples):
|
||||||
roi_boxes = boxes_in_roi(boxes, sample.roi)
|
roi_boxes = boxes_in_roi(shoe_boxes, sample.roi)
|
||||||
if not roi_boxes:
|
if not roi_boxes:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -420,18 +556,19 @@ def main() -> None:
|
|||||||
ensure_output_layout(output_dir)
|
ensure_output_layout(output_dir)
|
||||||
|
|
||||||
summary: dict[str, dict[str, dict[str, int]]] = defaultdict(dict)
|
summary: dict[str, dict[str, dict[str, int]]] = defaultdict(dict)
|
||||||
totals = {"images": 0, "boxes": 0, "single": 0, "pair": 0}
|
totals = {"images": 0, "boxes": 0, "single": 0, "pair": 0, "person": 0}
|
||||||
|
|
||||||
for source in args.sources:
|
for source in args.sources:
|
||||||
source_dir = Path(source)
|
source_dir = Path(source)
|
||||||
if not source_dir.exists():
|
if not source_dir.exists():
|
||||||
raise FileNotFoundError(f"Source dataset not found: {source_dir}")
|
raise FileNotFoundError(f"Source dataset not found: {source_dir}")
|
||||||
|
source_spec = resolve_source_spec(source_dir)
|
||||||
|
|
||||||
for split in ("train", "val", "test"):
|
for split in ("train", "val", "test"):
|
||||||
stats = build_split(source_dir, output_dir, split)
|
stats = build_split(source_spec, output_dir, split)
|
||||||
summary[source_dir.name][split] = stats
|
summary[source_dir.name][split] = stats
|
||||||
for key in totals:
|
for key in totals:
|
||||||
totals[key] += stats[key]
|
totals[key] += stats.get(key, 0)
|
||||||
|
|
||||||
write_yaml(output_dir, args.sources)
|
write_yaml(output_dir, args.sources)
|
||||||
|
|
||||||
@ -439,10 +576,10 @@ def main() -> None:
|
|||||||
for source_name, split_map in summary.items():
|
for source_name, split_map in summary.items():
|
||||||
print(f"[{source_name}]")
|
print(f"[{source_name}]")
|
||||||
for split in ("train", "val", "test"):
|
for split in ("train", "val", "test"):
|
||||||
stats = split_map.get(split, {"images": 0, "boxes": 0, "single": 0, "pair": 0})
|
stats = split_map.get(split, {"images": 0, "boxes": 0, "single": 0, "pair": 0, "person": 0})
|
||||||
print(
|
print(
|
||||||
f" {split}: rois={stats['images']} boxes={stats['boxes']} "
|
f" {split}: rois={stats['images']} boxes={stats['boxes']} "
|
||||||
f"single={stats['single']} pair={stats['pair']}"
|
f"person={stats['person']} single={stats['single']} pair={stats['pair']}"
|
||||||
)
|
)
|
||||||
|
|
||||||
print(
|
print(
|
||||||
|
|||||||
371
13_preview_roi_samples.ps1
Normal file
371
13_preview_roi_samples.ps1
Normal file
@ -0,0 +1,371 @@
|
|||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
|
||||||
|
Add-Type -AssemblyName System.Drawing
|
||||||
|
|
||||||
|
$repo = $PSScriptRoot
|
||||||
|
$outputDir = Join-Path $repo "samples\roi_preview"
|
||||||
|
|
||||||
|
if (Test-Path $outputDir) {
|
||||||
|
Remove-Item $outputDir -Recurse -Force
|
||||||
|
}
|
||||||
|
New-Item -ItemType Directory -Path $outputDir | Out-Null
|
||||||
|
|
||||||
|
$imageExts = @(".jpg", ".jpeg", ".png", ".bmp", ".webp")
|
||||||
|
|
||||||
|
function New-Box {
|
||||||
|
param(
|
||||||
|
[double]$X1,
|
||||||
|
[double]$Y1,
|
||||||
|
[double]$X2,
|
||||||
|
[double]$Y2
|
||||||
|
)
|
||||||
|
[PSCustomObject]@{
|
||||||
|
X1 = $X1
|
||||||
|
Y1 = $Y1
|
||||||
|
X2 = $X2
|
||||||
|
Y2 = $Y2
|
||||||
|
W = [Math]::Max(0.0, $X2 - $X1)
|
||||||
|
H = [Math]::Max(0.0, $Y2 - $Y1)
|
||||||
|
Cx = ($X1 + $X2) / 2.0
|
||||||
|
Cy = ($Y1 + $Y2) / 2.0
|
||||||
|
Area = [Math]::Max(0.0, $X2 - $X1) * [Math]::Max(0.0, $Y2 - $Y1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function Clip-Box {
|
||||||
|
param(
|
||||||
|
$Box,
|
||||||
|
[double]$Width,
|
||||||
|
[double]$Height
|
||||||
|
)
|
||||||
|
$x1 = [Math]::Min([Math]::Max($Box.X1, 0.0), $Width)
|
||||||
|
$y1 = [Math]::Min([Math]::Max($Box.Y1, 0.0), $Height)
|
||||||
|
$x2 = [Math]::Min([Math]::Max($Box.X2, 0.0), $Width)
|
||||||
|
$y2 = [Math]::Min([Math]::Max($Box.Y2, 0.0), $Height)
|
||||||
|
if ($x2 -le $x1 -or $y2 -le $y1) {
|
||||||
|
return $null
|
||||||
|
}
|
||||||
|
return New-Box -X1 $x1 -Y1 $y1 -X2 $x2 -Y2 $y2
|
||||||
|
}
|
||||||
|
|
||||||
|
function Find-Image {
|
||||||
|
param(
|
||||||
|
[string]$ImageDir,
|
||||||
|
[string]$Stem
|
||||||
|
)
|
||||||
|
foreach ($ext in $imageExts) {
|
||||||
|
$path = Join-Path $ImageDir ($Stem + $ext)
|
||||||
|
if (Test-Path $path) {
|
||||||
|
return $path
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $null
|
||||||
|
}
|
||||||
|
|
||||||
|
function Load-Boxes {
|
||||||
|
param(
|
||||||
|
[string]$LabelPath,
|
||||||
|
[int]$ImageWidth,
|
||||||
|
[int]$ImageHeight
|
||||||
|
)
|
||||||
|
$boxes = @()
|
||||||
|
foreach ($line in Get-Content $LabelPath) {
|
||||||
|
$text = $line.Trim()
|
||||||
|
if (-not $text) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
$parts = $text -split "\s+"
|
||||||
|
if ($parts.Length -lt 5) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
$xc = [double]$parts[1] * $ImageWidth
|
||||||
|
$yc = [double]$parts[2] * $ImageHeight
|
||||||
|
$w = [double]$parts[3] * $ImageWidth
|
||||||
|
$h = [double]$parts[4] * $ImageHeight
|
||||||
|
$box = New-Box -X1 ($xc - $w / 2.0) -Y1 ($yc - $h / 2.0) -X2 ($xc + $w / 2.0) -Y2 ($yc + $h / 2.0)
|
||||||
|
$clipped = Clip-Box -Box $box -Width $ImageWidth -Height $ImageHeight
|
||||||
|
if ($null -ne $clipped -and $clipped.Area -gt 1.0) {
|
||||||
|
$boxes += $clipped
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $boxes | Sort-Object Cx, Cy
|
||||||
|
}
|
||||||
|
|
||||||
|
function Should-Pair {
|
||||||
|
param($Left, $Right)
|
||||||
|
$widthRef = [Math]::Max($Left.W, $Right.W)
|
||||||
|
$heightRef = [Math]::Max($Left.H, $Right.H)
|
||||||
|
if ($widthRef -le 0 -or $heightRef -le 0) {
|
||||||
|
return $false
|
||||||
|
}
|
||||||
|
$dx = [Math]::Abs($Left.Cx - $Right.Cx)
|
||||||
|
$dy = [Math]::Abs($Left.Cy - $Right.Cy)
|
||||||
|
$areaRatio = if ($Right.Area -gt 0) { $Left.Area / $Right.Area } else { [double]::PositiveInfinity }
|
||||||
|
return (
|
||||||
|
$dx -le ($widthRef * 3.2) -and
|
||||||
|
$dy -le ($heightRef * 1.2) -and
|
||||||
|
$areaRatio -ge 0.4 -and
|
||||||
|
$areaRatio -le 2.5
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
function Get-Groups {
|
||||||
|
param($Boxes)
|
||||||
|
$groups = @()
|
||||||
|
$used = @{}
|
||||||
|
$candidates = @()
|
||||||
|
for ($i = 0; $i -lt $Boxes.Count; $i++) {
|
||||||
|
for ($j = $i + 1; $j -lt $Boxes.Count; $j++) {
|
||||||
|
if (Should-Pair $Boxes[$i] $Boxes[$j]) {
|
||||||
|
$score = [Math]::Abs($Boxes[$i].Cx - $Boxes[$j].Cx) + 0.5 * [Math]::Abs($Boxes[$i].Cy - $Boxes[$j].Cy)
|
||||||
|
$candidates += [PSCustomObject]@{ Score = $score; I = $i; J = $j }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
foreach ($candidate in ($candidates | Sort-Object Score)) {
|
||||||
|
if ($used.ContainsKey($candidate.I) -or $used.ContainsKey($candidate.J)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
$used[$candidate.I] = $true
|
||||||
|
$used[$candidate.J] = $true
|
||||||
|
$groups += ,@($candidate.I, $candidate.J)
|
||||||
|
}
|
||||||
|
for ($i = 0; $i -lt $Boxes.Count; $i++) {
|
||||||
|
if (-not $used.ContainsKey($i)) {
|
||||||
|
$groups += ,@($i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $groups
|
||||||
|
}
|
||||||
|
|
||||||
|
function Estimate-PersonFromSingle {
|
||||||
|
param($Box)
|
||||||
|
$personW = [Math]::Max(4.2 * $Box.W, 2.8 * $Box.H)
|
||||||
|
$personH = [Math]::Max(7.6 * $Box.H, 3.4 * $Box.W)
|
||||||
|
$personCx = $Box.Cx
|
||||||
|
$personY2 = $Box.Y2 + 0.08 * $personH
|
||||||
|
$personX1 = $personCx - $personW / 2.0
|
||||||
|
$personY1 = $personY2 - $personH
|
||||||
|
return New-Box -X1 $personX1 -Y1 $personY1 -X2 ($personX1 + $personW) -Y2 $personY2
|
||||||
|
}
|
||||||
|
|
||||||
|
function Estimate-PersonFromPair {
|
||||||
|
param($A, $B)
|
||||||
|
$ux1 = [Math]::Min($A.X1, $B.X1)
|
||||||
|
$uy1 = [Math]::Min($A.Y1, $B.Y1)
|
||||||
|
$ux2 = [Math]::Max($A.X2, $B.X2)
|
||||||
|
$uy2 = [Math]::Max($A.Y2, $B.Y2)
|
||||||
|
$uw = $ux2 - $ux1
|
||||||
|
$uh = $uy2 - $uy1
|
||||||
|
$personW = [Math]::Max(1.95 * $uw, 2.6 * $uh)
|
||||||
|
$personH = [Math]::Max(7.8 * $uh, 2.9 * $uw)
|
||||||
|
$personCx = ($ux1 + $ux2) / 2.0
|
||||||
|
$personY2 = $uy2 + 0.08 * $personH
|
||||||
|
$personX1 = $personCx - $personW / 2.0
|
||||||
|
$personY1 = $personY2 - $personH
|
||||||
|
return New-Box -X1 $personX1 -Y1 $personY1 -X2 ($personX1 + $personW) -Y2 $personY2
|
||||||
|
}
|
||||||
|
|
||||||
|
function Get-RoiFromPerson {
|
||||||
|
param($Person)
|
||||||
|
$roiX = $Person.X1 - 0.24 * $Person.W
|
||||||
|
$roiY = $Person.Y1 + 0.64 * $Person.H
|
||||||
|
$roiW = 1.48 * $Person.W
|
||||||
|
$roiH = 0.58 * $Person.H
|
||||||
|
$roiX = $roiX - 0.08 * $roiW
|
||||||
|
$roiY = $roiY - 0.08 * $roiH
|
||||||
|
$roiW = $roiW * 1.16
|
||||||
|
$roiH = $roiH * 1.18
|
||||||
|
return New-Box -X1 $roiX -Y1 $roiY -X2 ($roiX + $roiW) -Y2 ($roiY + $roiH)
|
||||||
|
}
|
||||||
|
|
||||||
|
function Clamp-Roi {
|
||||||
|
param(
|
||||||
|
$Roi,
|
||||||
|
[int]$ImageWidth,
|
||||||
|
[int]$ImageHeight
|
||||||
|
)
|
||||||
|
$clipped = Clip-Box -Box $Roi -Width $ImageWidth -Height $ImageHeight
|
||||||
|
if ($null -eq $clipped) {
|
||||||
|
return $null
|
||||||
|
}
|
||||||
|
$x1 = [Math]::Max(0, [Math]::Min([int][Math]::Floor($clipped.X1), $ImageWidth - 1))
|
||||||
|
$y1 = [Math]::Max(0, [Math]::Min([int][Math]::Floor($clipped.Y1), $ImageHeight - 1))
|
||||||
|
$x2 = [Math]::Max($x1 + 1, [Math]::Min([int][Math]::Ceiling($clipped.X2), $ImageWidth))
|
||||||
|
$y2 = [Math]::Max($y1 + 1, [Math]::Min([int][Math]::Ceiling($clipped.Y2), $ImageHeight))
|
||||||
|
return New-Box -X1 $x1 -Y1 $y1 -X2 $x2 -Y2 $y2
|
||||||
|
}
|
||||||
|
|
||||||
|
function Resize-RoiToTarget {
|
||||||
|
param(
|
||||||
|
$Roi,
|
||||||
|
[int]$ImageWidth,
|
||||||
|
[int]$ImageHeight,
|
||||||
|
[double]$ObjectArea,
|
||||||
|
[double]$MinRatio,
|
||||||
|
[double]$MaxRatio
|
||||||
|
)
|
||||||
|
$adjusted = $Roi
|
||||||
|
$targetRatio = ($MinRatio + $MaxRatio) / 2.0
|
||||||
|
for ($iter = 0; $iter -lt 3; $iter++) {
|
||||||
|
$ratio = if ($adjusted.Area -gt 0) { $ObjectArea / $adjusted.Area } else { 0.0 }
|
||||||
|
if ($ratio -ge $MinRatio -and $ratio -le $MaxRatio) {
|
||||||
|
return $adjusted
|
||||||
|
}
|
||||||
|
$scale = [Math]::Sqrt($ratio / $targetRatio)
|
||||||
|
if ($ratio -lt $MinRatio) {
|
||||||
|
$scale = [Math]::Max(0.6, [Math]::Min(0.95, $scale))
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$scale = [Math]::Min(1.8, [Math]::Max(1.05, $scale))
|
||||||
|
}
|
||||||
|
$newW = $adjusted.W * $scale
|
||||||
|
$newH = $adjusted.H * $scale
|
||||||
|
$cx = $adjusted.Cx
|
||||||
|
$cy = $adjusted.Cy
|
||||||
|
$adjusted = Clamp-Roi (New-Box -X1 ($cx - $newW / 2.0) -Y1 ($cy - $newH / 2.0) -X2 ($cx + $newW / 2.0) -Y2 ($cy + $newH / 2.0)) $ImageWidth $ImageHeight
|
||||||
|
if ($null -eq $adjusted) {
|
||||||
|
return $null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $adjusted
|
||||||
|
}
|
||||||
|
|
||||||
|
function Draw-Rect {
|
||||||
|
param(
|
||||||
|
[System.Drawing.Graphics]$Graphics,
|
||||||
|
[System.Drawing.Pen]$Pen,
|
||||||
|
$Box
|
||||||
|
)
|
||||||
|
$Graphics.DrawRectangle($Pen, [float]$Box.X1, [float]$Box.Y1, [float]$Box.W, [float]$Box.H)
|
||||||
|
}
|
||||||
|
|
||||||
|
$samples = @(
|
||||||
|
@{ Dataset = "datasets\openimages-shoes-yolo"; Split = "train"; Stem = "00015a7cf95ec19d"; Label = "openimages_single" },
|
||||||
|
@{ Dataset = "datasets\openimages-shoes-yolo"; Split = "train"; Stem = "0036655159bdef7f"; Label = "openimages_pair" },
|
||||||
|
@{ Dataset = "datasets\openimages-shoes-yolo"; Split = "train"; Stem = "00003223e04e2e66"; Label = "openimages_mixed" },
|
||||||
|
@{ Dataset = "datasets\ppe-shoes"; Split = "train"; Stem = "image1001"; Label = "ppe_pair_a" },
|
||||||
|
@{ Dataset = "datasets\ppe-shoes"; Split = "train"; Stem = "image1011"; Label = "ppe_pair_b" }
|
||||||
|
)
|
||||||
|
|
||||||
|
$summary = @()
|
||||||
|
|
||||||
|
foreach ($sample in $samples) {
|
||||||
|
$imageDir = Join-Path $repo (Join-Path $sample.Dataset ("images\" + $sample.Split))
|
||||||
|
$labelPath = Join-Path $repo (Join-Path $sample.Dataset ("labels\" + $sample.Split + "\" + $sample.Stem + ".txt"))
|
||||||
|
$imagePath = Find-Image -ImageDir $imageDir -Stem $sample.Stem
|
||||||
|
if (-not $imagePath -or -not (Test-Path $labelPath)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
$bitmap = [System.Drawing.Bitmap]::new($imagePath)
|
||||||
|
try {
|
||||||
|
$boxes = Load-Boxes -LabelPath $labelPath -ImageWidth $bitmap.Width -ImageHeight $bitmap.Height
|
||||||
|
if ($boxes.Count -eq 0) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
$groups = Get-Groups -Boxes $boxes
|
||||||
|
$canvas = [System.Drawing.Bitmap]::new($bitmap)
|
||||||
|
$graphics = [System.Drawing.Graphics]::FromImage($canvas)
|
||||||
|
$graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::HighQuality
|
||||||
|
$shoePen = [System.Drawing.Pen]::new([System.Drawing.Color]::Lime, 3)
|
||||||
|
$roiPen = [System.Drawing.Pen]::new([System.Drawing.Color]::Red, 4)
|
||||||
|
$font = [System.Drawing.Font]::new("Arial", 18, [System.Drawing.FontStyle]::Bold)
|
||||||
|
$brush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::Yellow)
|
||||||
|
|
||||||
|
try {
|
||||||
|
for ($i = 0; $i -lt $boxes.Count; $i++) {
|
||||||
|
Draw-Rect -Graphics $graphics -Pen $shoePen -Box $boxes[$i]
|
||||||
|
}
|
||||||
|
|
||||||
|
$roiIndex = 0
|
||||||
|
foreach ($group in $groups) {
|
||||||
|
if ($group.Count -eq 2) {
|
||||||
|
$person = Estimate-PersonFromPair $boxes[$group[0]] $boxes[$group[1]]
|
||||||
|
$roi = Get-RoiFromPerson $person
|
||||||
|
$minRatio = 0.18
|
||||||
|
$maxRatio = 0.40
|
||||||
|
$mode = "pair"
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$person = Estimate-PersonFromSingle $boxes[$group[0]]
|
||||||
|
$roi = Get-RoiFromPerson $person
|
||||||
|
$minRatio = 0.10
|
||||||
|
$maxRatio = 0.26
|
||||||
|
$mode = "single"
|
||||||
|
}
|
||||||
|
$roi = Clamp-Roi $roi $bitmap.Width $bitmap.Height
|
||||||
|
if ($null -eq $roi) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
$objectArea = 0.0
|
||||||
|
foreach ($idx in $group) {
|
||||||
|
$objectArea += $boxes[$idx].Area
|
||||||
|
}
|
||||||
|
$roi = Resize-RoiToTarget $roi $bitmap.Width $bitmap.Height $objectArea $minRatio $maxRatio
|
||||||
|
if ($null -eq $roi) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
Draw-Rect -Graphics $graphics -Pen $roiPen -Box $roi
|
||||||
|
$graphics.DrawString("ROI$roiIndex", $font, $brush, [float]($roi.X1 + 4), [float]([Math]::Max(0, $roi.Y1 - 28)))
|
||||||
|
|
||||||
|
$cropRect = [System.Drawing.Rectangle]::new([int]$roi.X1, [int]$roi.Y1, [int]$roi.W, [int]$roi.H)
|
||||||
|
$crop = $bitmap.Clone($cropRect, $bitmap.PixelFormat)
|
||||||
|
$cropGraphics = [System.Drawing.Graphics]::FromImage($crop)
|
||||||
|
$cropPen = [System.Drawing.Pen]::new([System.Drawing.Color]::Lime, 3)
|
||||||
|
try {
|
||||||
|
foreach ($box in $boxes) {
|
||||||
|
if ($box.Cx -lt $roi.X1 -or $box.Cx -gt $roi.X2 -or $box.Cy -lt $roi.Y1 -or $box.Cy -gt $roi.Y2) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
$local = New-Box -X1 ($box.X1 - $roi.X1) -Y1 ($box.Y1 - $roi.Y1) -X2 ($box.X2 - $roi.X1) -Y2 ($box.Y2 - $roi.Y1)
|
||||||
|
$local = Clip-Box -Box $local -Width $roi.W -Height $roi.H
|
||||||
|
if ($null -ne $local) {
|
||||||
|
Draw-Rect -Graphics $cropGraphics -Pen $cropPen -Box $local
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
$cropGraphics.Dispose()
|
||||||
|
$cropPen.Dispose()
|
||||||
|
}
|
||||||
|
|
||||||
|
$cropPath = Join-Path $outputDir ($sample.Label + "_roi" + $roiIndex + ".jpg")
|
||||||
|
$crop.Save($cropPath, [System.Drawing.Imaging.ImageFormat]::Jpeg)
|
||||||
|
$crop.Dispose()
|
||||||
|
|
||||||
|
$areaRatio = [Math]::Round($objectArea / $roi.Area, 3)
|
||||||
|
$summary += [PSCustomObject]@{
|
||||||
|
Sample = $sample.Label
|
||||||
|
Roi = "roi$roiIndex"
|
||||||
|
Mode = $mode
|
||||||
|
Boxes = $group.Count
|
||||||
|
AreaRatio = $areaRatio
|
||||||
|
Crop = $cropPath
|
||||||
|
}
|
||||||
|
$roiIndex++
|
||||||
|
}
|
||||||
|
|
||||||
|
$overlayPath = Join-Path $outputDir ($sample.Label + "_overlay.jpg")
|
||||||
|
$canvas.Save($overlayPath, [System.Drawing.Imaging.ImageFormat]::Jpeg)
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
$graphics.Dispose()
|
||||||
|
$shoePen.Dispose()
|
||||||
|
$roiPen.Dispose()
|
||||||
|
$font.Dispose()
|
||||||
|
$brush.Dispose()
|
||||||
|
$canvas.Dispose()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
$bitmap.Dispose()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$summaryPath = Join-Path $outputDir "summary.txt"
|
||||||
|
$summary | Sort-Object Sample, Roi | Format-Table -AutoSize | Out-String | Set-Content -Path $summaryPath -Encoding UTF8
|
||||||
|
Write-Output "Preview images written to: $outputDir"
|
||||||
|
Write-Output "Summary written to: $summaryPath"
|
||||||
76
README.md
76
README.md
@ -5,7 +5,7 @@
|
|||||||
当前项目的主训练方向已经调整为:
|
当前项目的主训练方向已经调整为:
|
||||||
- 只训练 `yolov8s`、输入尺寸固定 `640x640`
|
- 只训练 `yolov8s`、输入尺寸固定 `640x640`
|
||||||
- 训练数据不再直接使用“整张场景图”或“鞋子纯特写图”
|
- 训练数据不再直接使用“整张场景图”或“鞋子纯特写图”
|
||||||
- 先根据鞋框裁出更接近线上输入分布的“脚部 ROI 图”,再训练鞋检测模型
|
- 优先根据真实`人体框`裁出更接近线上输入分布的“脚部 ROI 图”,再训练鞋检测模型
|
||||||
|
|
||||||
这样做的原因是线上链路并不是直接在整张图上找鞋,而是:
|
这样做的原因是线上链路并不是直接在整张图上找鞋,而是:
|
||||||
1. 先从人体框生成脚部 ROI
|
1. 先从人体框生成脚部 ROI
|
||||||
@ -15,30 +15,70 @@
|
|||||||
|
|
||||||
### ROI 规则
|
### ROI 规则
|
||||||
|
|
||||||
单鞋 ROI:
|
优先原则:
|
||||||
- 已知鞋框 `(x, y, w, h)`
|
- 最优方式:如果有`人体框`,直接按线上人体下部 ROI 规则裁图
|
||||||
- `roi_x = x - 0.6w`
|
- 次优方式:如果只有`鞋框`,再按鞋框扩图,尽量模拟人体下部脚部 ROI 的视觉分布
|
||||||
- `roi_y = y - 0.5h`
|
|
||||||
- `roi_w = 2.2w`
|
|
||||||
- `roi_h = 2.4h`
|
|
||||||
|
|
||||||
双鞋 ROI:
|
线上人体下部 ROI 规则:
|
||||||
- 优先把两只鞋裁进同一张 ROI
|
- 已知人体框 `(x, y, w, h)`
|
||||||
- 先取两只鞋框并集,再扩框:
|
- `roi_x = x - 0.24w`
|
||||||
- `roi_x = union_x - 0.35 * union_w`
|
- `roi_y = y + 0.64h`
|
||||||
- `roi_y = union_y - 0.45 * union_h`
|
- `roi_w = 1.48w`
|
||||||
- `roi_w = 1.7 * union_w`
|
- `roi_h = 0.58h`
|
||||||
- `roi_h = 2.0 * union_h`
|
|
||||||
|
|
||||||
裁图会自动裁剪到图像边界内。
|
这条规则的目标是:
|
||||||
|
- 横向比人体略宽,尽量把双脚都包进去
|
||||||
|
- 纵向覆盖人体下部到脚下地面
|
||||||
|
- 让鞋模型看到的输入更接近真实线上两阶段链路
|
||||||
|
|
||||||
|
当前推荐的数据准备方式:
|
||||||
|
|
||||||
|
1. PPE 数据保留 `Person + shoe`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python 05_prepare_ppe_shoe_subset.py --mode roi-source
|
||||||
|
```
|
||||||
|
|
||||||
|
输出目录:
|
||||||
|
- `datasets/ppe-person-shoes`
|
||||||
|
|
||||||
|
2. Open Images 重新下载,并保留 `Person + shoe`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python 01_download_dataset.py --source openimages --mode roi-source --max-samples 8000
|
||||||
|
```
|
||||||
|
|
||||||
|
默认输出目录:
|
||||||
|
- `datasets/openimages-person-shoes-yolo`
|
||||||
|
|
||||||
|
3. 构建 ROI 化训练集
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python 09_build_roi_shoe_dataset.py --clean
|
||||||
|
```
|
||||||
|
|
||||||
|
构建规则:
|
||||||
|
- 如果源数据里有真实 `Person` 框,直接按人体下部 ROI 规则裁图
|
||||||
|
- 只有在没有人框时,才退回鞋框扩图 fallback
|
||||||
|
|
||||||
|
如果只有鞋框,没有人体框:
|
||||||
|
- 仍然可以做鞋框 fallback 扩图
|
||||||
|
- 但目标不是做鞋子纯特写,而是尽量近似“人体下部脚部 ROI”
|
||||||
|
- 也就是保留一定裤脚、脚下地面和周围背景
|
||||||
|
- 裁图会自动截到图像边界内
|
||||||
|
|
||||||
|
训练目标总结:
|
||||||
|
- 不要做鞋子纯特写
|
||||||
|
- 要做“脚部局部图”
|
||||||
|
- 让训练输入尽量贴近线上“人体下部脚部 ROI”
|
||||||
|
|
||||||
### 新主流程
|
### 新主流程
|
||||||
|
|
||||||
1. 准备原始单类鞋数据集
|
1. 准备 ROI 源数据集
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python 01_download_dataset.py --source openimages --max-samples 5000
|
python 05_prepare_ppe_shoe_subset.py --mode roi-source
|
||||||
python 05_prepare_ppe_shoe_subset.py
|
python 01_download_dataset.py --source openimages --mode roi-source --max-samples 8000
|
||||||
```
|
```
|
||||||
|
|
||||||
2. 构建 ROI 化训练集
|
2. 构建 ROI 化训练集
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user