From ce48dee42c42cc6486846853f3a9d735415c6429 Mon Sep 17 00:00:00 2001 From: sladro Date: Thu, 2 Apr 2026 14:07:59 +0800 Subject: [PATCH] Handle stray artifact files during restore --- engine/artifact_manager.py | 51 ++++++++++++++++++++++++++++++++-- tests/test_artifact_manager.py | 28 +++++++++++++++++++ 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/engine/artifact_manager.py b/engine/artifact_manager.py index 7034458..e344cd1 100644 --- a/engine/artifact_manager.py +++ b/engine/artifact_manager.py @@ -13,6 +13,51 @@ from engine.models import BaselineSnapshot, TaskSpec class ArtifactManager: task: TaskSpec + def _is_excluded(self, relative_path: str) -> bool: + return any(fnmatch(relative_path, exclude) for exclude in self.task.artifacts.exclude) + + def _artifact_roots(self) -> list[Path]: + root_dir = self.task.root_dir + roots: list[Path] = [] + for pattern in self.task.artifacts.include: + normalized_pattern = pattern.replace("\\", "/") + segments = [segment for segment in normalized_pattern.split("/") if segment and segment != "."] + prefix: list[str] = [] + for segment in segments: + if any(char in segment for char in "*?["): + break + prefix.append(segment) + if prefix: + roots.append(root_dir.joinpath(*prefix)) + continue + roots.append(root_dir) + minimal_roots: list[Path] = [] + for root in sorted(set(roots), key=lambda candidate: (len(candidate.parts), candidate.as_posix())): + if any(existing == root or existing in root.parents for existing in minimal_roots): + continue + minimal_roots.append(root) + return minimal_roots + + def _current_paths_within_roots(self) -> list[Path]: + root_dir = self.task.root_dir + discovered: set[Path] = set() + for root in self._artifact_roots(): + if not root.exists(): + continue + if root.is_file(): + relative_path = root.relative_to(root_dir).as_posix() + if not self._is_excluded(relative_path): + discovered.add(root) + continue + for path in root.rglob("*"): + if not path.is_file(): + continue + relative_path = path.relative_to(root_dir).as_posix() + if self._is_excluded(relative_path): + continue + discovered.add(path) + return sorted(discovered) + def resolve_paths(self) -> list[Path]: root_dir = self.task.root_dir resolved: set[Path] = set() @@ -21,7 +66,7 @@ class ArtifactManager: if not path.is_file(): continue relative_path = path.relative_to(root_dir).as_posix() - if any(fnmatch(relative_path, exclude) for exclude in self.task.artifacts.exclude): + if self._is_excluded(relative_path): continue resolved.add(path) return sorted(resolved) @@ -37,7 +82,7 @@ class ArtifactManager: return BaselineSnapshot(file_contents=file_contents, file_hashes=file_hashes) def restore(self, snapshot: BaselineSnapshot) -> None: - current_paths = set(self.resolve_paths()) + current_paths = set(self._current_paths_within_roots()) snapshot_paths = set(snapshot.file_contents) for path in current_paths - snapshot_paths: path.unlink() @@ -49,7 +94,7 @@ class ArtifactManager: def diff_summary(self, snapshot: BaselineSnapshot) -> str: lines: list[str] = [] current_contents: dict[Path, str] = {} - for path in self.resolve_paths(): + for path in self._current_paths_within_roots(): with path.open("r", encoding="utf-8", newline="") as handle: current_contents[path] = handle.read() all_paths = sorted(set(snapshot.file_contents) | set(current_contents)) diff --git a/tests/test_artifact_manager.py b/tests/test_artifact_manager.py index a7ca848..c4be45d 100644 --- a/tests/test_artifact_manager.py +++ b/tests/test_artifact_manager.py @@ -107,6 +107,34 @@ class ArtifactManagerTest(unittest.TestCase): self.assertIn("-before", summary) self.assertIn("+after", summary) + def test_diff_summary_and_restore_handle_stray_file_outside_include_glob(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + artifact_dir = root / "artifacts" + artifact_dir.mkdir() + target = artifact_dir / "sample.md" + target.write_text("baseline\n", encoding="utf-8") + manager = ArtifactManager(make_task(root)) + snapshot = manager.snapshot() + + target.unlink() + stray = artifact_dir / "archive" / "sample.txt" + stray.parent.mkdir() + stray.write_text("renamed\n", encoding="utf-8") + + summary = manager.diff_summary(snapshot) + + self.assertIn("artifacts/sample.md (before)", summary) + self.assertIn("artifacts/archive/sample.txt (after)", summary) + self.assertIn("-baseline", summary) + self.assertIn("+renamed", summary) + + manager.restore(snapshot) + + self.assertFalse(stray.exists()) + self.assertTrue(target.exists()) + self.assertEqual(target.read_text(encoding="utf-8"), "baseline\n") + if __name__ == "__main__": unittest.main()