96 lines
3.1 KiB
Python
96 lines
3.1 KiB
Python
import os
|
|
from pathlib import Path
|
|
|
|
THIRD_PARTY_DIR_NAMES = {
|
|
'node_modules', 'vendor', 'third_party', 'third-party', 'extern', 'external', 'deps',
|
|
'Cesium', 'Cesium-1.132', 'dist', 'build'
|
|
}
|
|
|
|
CODE_EXTENSIONS = {
|
|
'.py', '.js', '.ts', '.tsx', '.jsx', '.css', '.scss', '.html', '.c', '.h', '.cpp', '.hpp',
|
|
'.cc', '.hh', '.m', '.mm', '.java', '.go', '.rs', '.cs', '.vue', '.svelte'
|
|
}
|
|
|
|
SKIP_DIR_NAMES = {
|
|
'.git', '.hg', '.svn', '__pycache__', '.mypy_cache', '.pytest_cache', '.idea', '.vscode',
|
|
'.venv', 'venv', 'env', '.tox', '.cache', 'Resources', 'icons', 'tex', 'terminal'
|
|
}
|
|
|
|
MAX_FILE_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB
|
|
|
|
|
|
def is_third_party(path: Path) -> bool:
|
|
for part in path.parts:
|
|
# normalize case on Windows
|
|
name = part.lower()
|
|
if name in {n.lower() for n in THIRD_PARTY_DIR_NAMES}:
|
|
return True
|
|
return False
|
|
|
|
|
|
def should_skip_dir(dirname: str) -> bool:
|
|
return dirname.lower() in {n.lower() for n in SKIP_DIR_NAMES}
|
|
|
|
|
|
def is_code_file(path: Path) -> bool:
|
|
return path.suffix.lower() in CODE_EXTENSIONS
|
|
|
|
|
|
def count_lines(path: Path) -> int:
|
|
try:
|
|
if path.stat().st_size > MAX_FILE_SIZE_BYTES:
|
|
return 0
|
|
# Try text read with universal newlines, fallback to binary
|
|
try:
|
|
with path.open('r', encoding='utf-8', errors='ignore') as f:
|
|
return sum(1 for _ in f)
|
|
except Exception:
|
|
with path.open('rb') as f:
|
|
return f.read().count(b'\n')
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def main(root: Path) -> None:
|
|
third_party_loc = 0
|
|
first_party_loc = 0
|
|
third_party_files = 0
|
|
first_party_files = 0
|
|
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
# prune directories
|
|
dirnames[:] = [d for d in dirnames if not should_skip_dir(d)]
|
|
|
|
current = Path(dirpath)
|
|
current_is_third = is_third_party(current)
|
|
|
|
for fn in filenames:
|
|
p = current / fn
|
|
if not is_code_file(p):
|
|
continue
|
|
loc = count_lines(p)
|
|
if current_is_third:
|
|
third_party_loc += loc
|
|
third_party_files += 1
|
|
else:
|
|
first_party_loc += loc
|
|
first_party_files += 1
|
|
|
|
total_loc = first_party_loc + third_party_loc
|
|
open_source_rate = (third_party_loc / total_loc) * 100 if total_loc else 0.0
|
|
|
|
print('Open-Source Rate (by LOC): {:.2f}%'.format(open_source_rate))
|
|
print('\nBreakdown:')
|
|
print(' First-party LOC : {} ({} files)'.format(first_party_loc, first_party_files))
|
|
print(' Third-party LOC : {} ({} files)'.format(third_party_loc, third_party_files))
|
|
print(' Total LOC : {}'.format(total_loc))
|
|
print('\nNotes:')
|
|
print(' - Third-party directories are heuristically detected by common names: {}'.format(', '.join(sorted(THIRD_PARTY_DIR_NAMES))))
|
|
print(' - Static asset directories are skipped: {}'.format(', '.join(sorted(SKIP_DIR_NAMES))))
|
|
print(' - Counted code extensions: {}'.format(', '.join(sorted(CODE_EXTENSIONS))))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main(Path('.').resolve())
|
|
|