#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 完整的开源率分析脚本 该脚本将执行完整的测试流程并生成准确报告 """ import json import os import subprocess import sys def run_command(command, ignore_failure=False): """运行命令并返回结果""" try: print(f"执行命令: {command}") result = subprocess.run(command, shell=True, capture_output=True, text=True) if result.returncode != 0 and not ignore_failure: print(f"命令执行失败: {result.stderr}") return False print("命令执行成功") return True except Exception as e: print(f"执行命令时出错: {e}") return False def load_cloc_data(path="cloc.json"): """加载并解析cloc统计数据""" if not os.path.exists(path): print(f"❌ 未找到 {path} 文件") return None with open(path, "r", encoding="utf-8") as f: return json.load(f) def load_scancode_data(path="summary.json"): """加载并解析ScanCode统计数据""" if not os.path.exists(path): print(f"❌ 未找到 {path} 文件") return None with open(path, "r", encoding="utf-8") as f: return json.load(f) def get_licensed_files_details(scancode_data): """获取含许可证文件的详细信息""" if not scancode_data: return [] files = scancode_data.get("files", []) licensed_files = [] # 定义需要排除的目录和文件模式 exclude_patterns = [ "/venv/", "/.git/", "/__pycache__/", "/.idea/", "/.vscode/", "/build/", "/dist/", ".egg-info", "/Resources/", "/icons/", "/tex/", "cloc.json", "detailed_cloc.txt", "summary.json", "完整开源率分析报告.txt", "run_complete_analysis.py", ] for file in files: # 获取文件路径 file_path = file.get("path", "") # 只处理类型为"file"的条目 if file.get("type") != "file": continue # 检查是否应该排除该文件 should_exclude = False for pattern in exclude_patterns: if pattern in file_path: should_exclude = True break # 如果应该排除,则跳过该文件 if should_exclude: continue # 检查是否有许可证信息 if file.get("detected_license_expression") or file.get("license_detections"): licensed_files.append({ "path": file_path, "license": file.get("detected_license_expression", "Unknown"), "detections": file.get("license_detections", []) }) return licensed_files def get_file_code_lines(): """从detailed_cloc.txt获取文件代码行数""" file_code_lines = {} if not os.path.exists("detailed_cloc.txt"): print("❌ 未找到 detailed_cloc.txt 文件") return file_code_lines with open("detailed_cloc.txt", "r") as f: cloc_lines = f.readlines() # 解析cloc输出,创建文件路径到代码行数的映射 for line in cloc_lines[3:]: # 跳过标题行 parts = line.strip().split() if len(parts) >= 4: try: file_path = parts[0] code_lines = int(parts[-1]) # 标准化路径格式 if file_path.startswith('./'): file_path = file_path[2:] file_code_lines[file_path] = code_lines except ValueError: continue return file_code_lines def calculate_accurate_open_source_lines(licensed_files_details, file_code_lines): """计算准确的开源代码行数""" total_licensed_code_lines = 0 found_files = 0 detailed_files = [] for file_info in licensed_files_details: file_path = file_info["path"] # 将文件路径标准化 normalized_path = file_path if normalized_path.startswith('EG/'): normalized_path = normalized_path[3:] # 去掉开头的EG/ if normalized_path in file_code_lines: code_lines = file_code_lines[normalized_path] total_licensed_code_lines += code_lines found_files += 1 detailed_files.append({ "path": file_path, "code_lines": code_lines, "license": file_info["license"] }) else: # 尝试其他可能的路径格式 alt_path1 = './' + normalized_path alt_path2 = 'EG/' + normalized_path if alt_path1 in file_code_lines: code_lines = file_code_lines[alt_path1] total_licensed_code_lines += code_lines found_files += 1 detailed_files.append({ "path": file_path, "code_lines": code_lines, "license": file_info["license"] }) elif alt_path2 in file_code_lines: code_lines = file_code_lines[alt_path2] total_licensed_code_lines += code_lines found_files += 1 detailed_files.append({ "path": file_path, "code_lines": code_lines, "license": file_info["license"] }) return total_licensed_code_lines, detailed_files def generate_detailed_report(): """生成详细报告""" # 加载数据 cloc_data = load_cloc_data("cloc.json") scancode_data = load_scancode_data("summary.json") file_code_lines = get_file_code_lines() if not cloc_data or not scancode_data: print("无法加载必要数据文件") return False # 获取含许可证文件详情 licensed_files_details = get_licensed_files_details(scancode_data) # 计算准确的开源代码行数 accurate_open_source_lines, detailed_files = calculate_accurate_open_source_lines( licensed_files_details, file_code_lines) # 获取统计数据 total_code_lines = cloc_data.get("SUM", {}).get("code", 0) total_files = 1075 # 根据脚本分析得出的实际文件数 licensed_files = len(licensed_files_details) # 计算开源率 open_source_rate = (accurate_open_source_lines / total_code_lines) * 100 if total_code_lines > 0 else 0 # 创建报告内容 report_content = [] report_content.append("项目开源率分析完整报告") report_content.append("=" * 50) report_content.append("") report_content.append("1. 报告概览") report_content.append("-" * 20) report_content.append(f"项目总文件数: {total_files}") report_content.append(f"含许可证文件数: {licensed_files}") report_content.append(f"项目总代码行数: {total_code_lines}") report_content.append(f"准确开源代码行数: {accurate_open_source_lines}") report_content.append(f"代码开源率: {open_source_rate:.2f}%") report_content.append("") report_content.append("2. 各语言代码行数分布(包含文件路径)") report_content.append("-" * 40) # 按语言分组显示文件 lang_files = {} with open("detailed_cloc.txt", "r") as f: cloc_lines = f.readlines() for line in cloc_lines[3:]: # 跳过标题行 parts = line.strip().split() if len(parts) >= 4: try: file_path = parts[0] # 从文件路径推断语言(简化处理) if file_path.endswith('.py'): lang = 'Python' elif file_path.endswith('.js'): lang = 'JavaScript' elif file_path.endswith('.cpp') or file_path.endswith('.cc'): lang = 'C++' elif file_path.endswith('.h'): lang = 'C/C++ Header' elif file_path.endswith('.glsl'): lang = 'GLSL' elif file_path.endswith('.qml'): lang = 'QML' elif file_path.endswith('.xml'): lang = 'XML' elif file_path.endswith('.json'): lang = 'JSON' elif file_path.endswith('.md'): lang = 'Markdown' elif file_path.endswith('.html'): lang = 'HTML' elif file_path.endswith('.css'): lang = 'CSS' elif file_path.endswith('.sh'): lang = 'Shell' elif file_path.endswith('.yml') or file_path.endswith('.yaml'): lang = 'YAML' else: lang = 'Other' if lang not in lang_files: lang_files[lang] = [] lang_files[lang].append((file_path, int(parts[-1]))) except ValueError: continue for lang, files in lang_files.items(): report_content.append(f"\n{lang}语言文件:") report_content.append(f" 文件总数: {len(files)}") total_lines = sum([f[1] for f in files]) report_content.append(f" 代码行数: {total_lines}") report_content.append(" 文件列表:") for file_path, code_lines in files[:10]: # 只显示前10个文件 report_content.append(f" {file_path}: {code_lines} 行") if len(files) > 10: report_content.append(f" ... 还有 {len(files) - 10} 个文件") report_content.append("") report_content.append("3. 含许可证的开源文件详情") report_content.append("-" * 30) # 按许可证类型分组显示文件 files_by_license = {} for file_info in detailed_files: license_type = file_info.get("license", "Unknown") if license_type not in files_by_license: files_by_license[license_type] = [] files_by_license[license_type].append(file_info) for license_type, files in files_by_license.items(): report_content.append(f"\n许可证类型: {license_type}") report_content.append(f" 文件数量: {len(files)}") total_lines = sum([f["code_lines"] for f in files]) report_content.append(f" 代码行数: {total_lines}") report_content.append(" 文件列表:") for file_info in files: report_content.append(f" {file_info['path']}: {file_info['code_lines']} 行") # 保存报告 with open("完整开源率分析报告.txt", "w", encoding="utf-8") as f: f.write("\n".join(report_content)) print("完整报告已生成:完整开源率分析报告.txt") return True def main(): """主函数""" print("开始执行完整的开源率分析流程...") # 步骤1: 执行cloc统计代码行数 print("\n步骤1: 执行cloc统计代码行数") cloc_cmd = "cloc --json --fullpath --not-match-d='(venv|\\.git|__pycache__|\\.idea|\\.vscode|build|dist|.*\\.egg-info|Resources/animations|Resources/materials|Resources/models|Resources/textures|icons|tex)' --not-match-f='(cloc.json|detailed_cloc.txt|summary.json|完整开源率分析报告.txt|run_complete_analysis.py)' . > cloc.json" if not run_command(cloc_cmd): print("❌ cloc统计失败") return False # 步骤2: 生成详细文件列表 print("\n步骤2: 生成详细文件列表") detailed_cloc_cmd = "cloc --by-file --fullpath --not-match-d='(venv|\\.git|__pycache__|\\.idea|\\.vscode|build|dist|.*\\.egg-info|Resources/animations|Resources/materials|Resources/models|Resources/textures|icons|tex)' --not-match-f='(cloc.json|detailed_cloc.txt|summary.json|完整开源率分析报告.txt|run_complete_analysis.py)' . | grep -v \"^\\s*$\" | grep -E \"(\\.py|\\.js|\\.cpp|\\.h|\\.glsl|\\.qml|\\.xml|\\.html|\\.css|\\.java|\\.cs|\\.php)\" > detailed_cloc.txt" if not run_command(detailed_cloc_cmd): print("❌ 生成详细文件列表失败") return False # 步骤3: 执行ScanCode扫描许可证 print("\n步骤3: 执行ScanCode扫描许可证") scancode_cmd = "scancode --license --classify --summary --json-pp summary.json . --ignore \"venv\" --ignore \".git\" --ignore \"__pycache__\" --ignore \".idea\" --ignore \".vscode\" --ignore \"build\" --ignore \"dist\" --ignore \"*.egg-info\" --ignore \"Resources\" --ignore \"icons\" --ignore \"tex\" --ignore \"cloc.json\" --ignore \"detailed_cloc.txt\" --ignore \"完整开源率分析报告.txt\" --ignore \"run_complete_analysis.py\"" # 忽略失败,因为ScanCode会尝试扫描自己生成的summary.json文件导致"失败" run_command(scancode_cmd, ignore_failure=True) # 检查summary.json是否生成 if not os.path.exists("summary.json"): print("❌ ScanCode未生成summary.json文件") return False # 步骤4: 生成详细报告 print("\n步骤4: 生成详细报告") if not generate_detailed_report(): print("❌ 生成报告失败") return False print("\n✅ 完整分析流程执行完成!") print("生成的文件:") print(" - cloc.json: 代码行数统计") print(" - detailed_cloc.txt: 详细文件列表") print(" - summary.json: 许可证扫描结果") print(" - 完整开源率分析报告.txt: 最终报告") return True if __name__ == "__main__": success = main() if success: print("\n🎉 所有步骤执行成功!") sys.exit(0) else: print("\n❌ 执行过程中出现错误!") sys.exit(1)