id: skill-quality description: Deterministic sample task for scoring a skill document. artifacts: include: - fixtures/SKILL.md exclude: [] max_files_per_iteration: 1 mutation: mode: direct_edit allowed_file_types: - .md max_changed_lines: 20 runner: command: python ../../scripts/evaluate_skill_task.py --task-dir . --artifact fixtures/SKILL.md --output ../../work/skill-run.json cwd: tasks/skill-quality timeout_seconds: 30 scorer: type: command command: python scripts/score_skill_task.py --input work/skill-run.json parse: format: json score_field: score metrics_field: metrics objective: primary_metric: score direction: maximize constraints: - metric: violation_count op: <= value: 0 policy: keep_if: better_primary tie_breakers: [] on_failure: discard budget: max_iterations: 1 max_failures: 1 logging: results_file: work/results.jsonl candidate_dir: work/candidates