import os from pathlib import Path THIRD_PARTY_DIR_NAMES = { 'node_modules', 'vendor', 'third_party', 'third-party', 'extern', 'external', 'deps', 'Cesium', 'Cesium-1.132', 'dist', 'build' } CODE_EXTENSIONS = { '.py', '.js', '.ts', '.tsx', '.jsx', '.css', '.scss', '.html', '.c', '.h', '.cpp', '.hpp', '.cc', '.hh', '.m', '.mm', '.java', '.go', '.rs', '.cs', '.vue', '.svelte' } SKIP_DIR_NAMES = { '.git', '.hg', '.svn', '__pycache__', '.mypy_cache', '.pytest_cache', '.idea', '.vscode', '.venv', 'venv', 'env', '.tox', '.cache', 'Resources', 'icons', 'tex', 'terminal' } MAX_FILE_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB def is_third_party(path: Path) -> bool: for part in path.parts: # normalize case on Windows name = part.lower() if name in {n.lower() for n in THIRD_PARTY_DIR_NAMES}: return True return False def should_skip_dir(dirname: str) -> bool: return dirname.lower() in {n.lower() for n in SKIP_DIR_NAMES} def is_code_file(path: Path) -> bool: return path.suffix.lower() in CODE_EXTENSIONS def count_lines(path: Path) -> int: try: if path.stat().st_size > MAX_FILE_SIZE_BYTES: return 0 # Try text read with universal newlines, fallback to binary try: with path.open('r', encoding='utf-8', errors='ignore') as f: return sum(1 for _ in f) except Exception: with path.open('rb') as f: return f.read().count(b'\n') except Exception: return 0 def main(root: Path) -> None: third_party_loc = 0 first_party_loc = 0 third_party_files = 0 first_party_files = 0 for dirpath, dirnames, filenames in os.walk(root): # prune directories dirnames[:] = [d for d in dirnames if not should_skip_dir(d)] current = Path(dirpath) current_is_third = is_third_party(current) for fn in filenames: p = current / fn if not is_code_file(p): continue loc = count_lines(p) if current_is_third: third_party_loc += loc third_party_files += 1 else: first_party_loc += loc first_party_files += 1 total_loc = first_party_loc + third_party_loc open_source_rate = (third_party_loc / total_loc) * 100 if total_loc else 0.0 print('Open-Source Rate (by LOC): {:.2f}%'.format(open_source_rate)) print('\nBreakdown:') print(' First-party LOC : {} ({} files)'.format(first_party_loc, first_party_files)) print(' Third-party LOC : {} ({} files)'.format(third_party_loc, third_party_files)) print(' Total LOC : {}'.format(total_loc)) print('\nNotes:') print(' - Third-party directories are heuristically detected by common names: {}'.format(', '.join(sorted(THIRD_PARTY_DIR_NAMES)))) print(' - Static asset directories are skipped: {}'.format(', '.join(sorted(SKIP_DIR_NAMES)))) print(' - Counted code extensions: {}'.format(', '.join(sorted(CODE_EXTENSIONS)))) if __name__ == '__main__': main(Path('.').resolve())