77 lines
2.4 KiB
Python
77 lines
2.4 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from common import read_json, write_json
|
|
|
|
|
|
def compile_patterns(contains: list[str], regexes: list[str]) -> tuple[list[str], list[re.Pattern[str]]]:
|
|
compiled = [re.compile(pattern, re.IGNORECASE) for pattern in regexes]
|
|
return contains, compiled
|
|
|
|
|
|
def match_block(block: dict[str, Any], contains: list[str], regexes: list[re.Pattern[str]], kinds: set[str], heading_only: bool, block_ids: set[str]) -> bool:
|
|
if kinds and block.get("kind") not in kinds:
|
|
return False
|
|
if block_ids and block.get("id") not in block_ids:
|
|
return False
|
|
if heading_only and not block.get("heading"):
|
|
return False
|
|
|
|
text = block.get("text", "")
|
|
if contains and not all(term.lower() in text.lower() for term in contains):
|
|
return False
|
|
if regexes and not all(regex.search(text) for regex in regexes):
|
|
return False
|
|
return True
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--graph", required=True)
|
|
parser.add_argument("--contains", action="append", default=[])
|
|
parser.add_argument("--regex", action="append", default=[])
|
|
parser.add_argument("--kind", action="append", default=[])
|
|
parser.add_argument("--heading-only", action="store_true")
|
|
parser.add_argument("--block-id", action="append", default=[])
|
|
parser.add_argument("--limit", type=int, default=20)
|
|
parser.add_argument("--out")
|
|
args = parser.parse_args()
|
|
|
|
graph = read_json(Path(args.graph).resolve())
|
|
contains, regexes = compile_patterns(args.contains, args.regex)
|
|
kinds = set(args.kind)
|
|
block_ids = set(args.block_id)
|
|
|
|
matches = [
|
|
block
|
|
for block in graph.get("blocks", [])
|
|
if match_block(block, contains, regexes, kinds, args.heading_only, block_ids)
|
|
][: args.limit]
|
|
|
|
result = {
|
|
"query": {
|
|
"contains": contains,
|
|
"regex": args.regex,
|
|
"kind": args.kind,
|
|
"heading_only": args.heading_only,
|
|
"block_ids": args.block_id,
|
|
"limit": args.limit,
|
|
},
|
|
"matches": matches,
|
|
"count": len(matches),
|
|
}
|
|
|
|
if args.out:
|
|
write_json(Path(args.out).resolve(), result)
|
|
else:
|
|
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|