136 lines
4.8 KiB
Python
136 lines
4.8 KiB
Python
from pathlib import Path
|
|
import tempfile
|
|
import unittest
|
|
|
|
from engine.task_loader import TaskValidationError, load_task
|
|
|
|
|
|
VALID_TASK = """
|
|
id: demo
|
|
description: Demo task
|
|
artifacts:
|
|
include:
|
|
- tasks/demo/sample.txt
|
|
exclude: []
|
|
max_files_per_iteration: 1
|
|
mutation:
|
|
mode: direct_edit
|
|
allowed_file_types: [".txt"]
|
|
max_changed_lines: 10
|
|
mutator:
|
|
type: command
|
|
command: "python -c \\\"print('mutate')\\\""
|
|
cwd: "."
|
|
timeout_seconds: 5
|
|
runner:
|
|
command: "python -c \\\"print('run')\\\""
|
|
cwd: "."
|
|
timeout_seconds: 10
|
|
scorer:
|
|
type: command
|
|
command: "python -c \\\"import json; print(json.dumps({'score': 1, 'metrics': {'violation_count': 0}}))\\\""
|
|
timeout_seconds: 15
|
|
parse:
|
|
format: json
|
|
score_field: "score"
|
|
metrics_field: "metrics"
|
|
objective:
|
|
primary_metric: score
|
|
direction: maximize
|
|
constraints:
|
|
- metric: violation_count
|
|
op: "<="
|
|
value: 0
|
|
policy:
|
|
keep_if: better_primary
|
|
tie_breakers: []
|
|
on_failure: discard
|
|
budget:
|
|
max_iterations: 3
|
|
max_failures: 1
|
|
logging:
|
|
results_file: work/results.jsonl
|
|
candidate_dir: work/candidates
|
|
"""
|
|
|
|
|
|
class TaskLoaderTest(unittest.TestCase):
|
|
def write_task(self, content: str) -> Path:
|
|
temp_dir = tempfile.TemporaryDirectory()
|
|
self.addCleanup(temp_dir.cleanup)
|
|
task_path = Path(temp_dir.name) / "task.yaml"
|
|
task_path.write_text(content, encoding="utf-8")
|
|
return task_path
|
|
|
|
def test_loads_minimal_task(self) -> None:
|
|
task = load_task(self.write_task(VALID_TASK))
|
|
self.assertEqual(task.id, "demo")
|
|
self.assertEqual(task.artifacts.max_files_per_iteration, 1)
|
|
self.assertEqual(task.constraints[0].metric, "violation_count")
|
|
self.assertEqual(task.mutator.type, "command")
|
|
self.assertEqual(task.mutator.command, "python -c \"print('mutate')\"")
|
|
self.assertEqual(task.mutator.cwd, ".")
|
|
self.assertEqual(task.mutator.timeout_seconds, 5)
|
|
self.assertEqual(task.runner.timeout_seconds, 10)
|
|
self.assertEqual(task.scorer.timeout_seconds, 15)
|
|
|
|
def test_loads_repository_sample_task(self) -> None:
|
|
task = load_task(Path("tasks/skill-quality/task.yaml"))
|
|
self.assertEqual(task.id, "skill-quality")
|
|
self.assertEqual(task.mutator.type, "command")
|
|
self.assertEqual(task.mutator.cwd, "tasks/skill-quality")
|
|
self.assertEqual(task.mutator.timeout_seconds, 30)
|
|
self.assertEqual(task.runner.timeout_seconds, 30)
|
|
self.assertEqual(task.scorer.timeout_seconds, 30)
|
|
|
|
def test_rejects_missing_required_section(self) -> None:
|
|
content = VALID_TASK.replace("objective:\n primary_metric: score\n direction: maximize\n", "")
|
|
with self.assertRaises(TaskValidationError) as ctx:
|
|
load_task(self.write_task(content))
|
|
self.assertIn("objective", str(ctx.exception))
|
|
|
|
def test_rejects_invalid_direction(self) -> None:
|
|
content = VALID_TASK.replace("direction: maximize", "direction: sideways")
|
|
with self.assertRaises(TaskValidationError) as ctx:
|
|
load_task(self.write_task(content))
|
|
self.assertIn("direction", str(ctx.exception))
|
|
|
|
def test_rejects_malformed_yaml(self) -> None:
|
|
content = VALID_TASK + " bad_indent: [\n"
|
|
with self.assertRaises(TaskValidationError):
|
|
load_task(self.write_task(content))
|
|
|
|
def test_rejects_invalid_enum_value(self) -> None:
|
|
content = VALID_TASK.replace("mode: direct_edit", "mode: patch")
|
|
with self.assertRaises(TaskValidationError) as ctx:
|
|
load_task(self.write_task(content))
|
|
self.assertIn("mutation.mode", str(ctx.exception))
|
|
|
|
def test_rejects_invalid_mutator_type(self) -> None:
|
|
content = VALID_TASK.replace("type: command", "type: script", 1)
|
|
with self.assertRaises(TaskValidationError) as ctx:
|
|
load_task(self.write_task(content))
|
|
self.assertIn("mutator.type", str(ctx.exception))
|
|
|
|
def test_rejects_missing_mutator_cwd(self) -> None:
|
|
content = VALID_TASK.replace(" cwd: \".\"\n", "", 1)
|
|
with self.assertRaises(TaskValidationError) as ctx:
|
|
load_task(self.write_task(content))
|
|
self.assertIn("cwd", str(ctx.exception))
|
|
|
|
def test_rejects_missing_mutator_timeout_seconds(self) -> None:
|
|
content = VALID_TASK.replace(" timeout_seconds: 5\n", "", 1)
|
|
with self.assertRaises(TaskValidationError) as ctx:
|
|
load_task(self.write_task(content))
|
|
self.assertIn("timeout_seconds", str(ctx.exception))
|
|
|
|
def test_rejects_missing_scorer_timeout_seconds(self) -> None:
|
|
content = VALID_TASK.replace(" timeout_seconds: 15\n", "", 1)
|
|
with self.assertRaises(TaskValidationError) as ctx:
|
|
load_task(self.write_task(content))
|
|
self.assertIn("timeout_seconds", str(ctx.exception))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|