From 3a2ef35a881d1eff8ebfc0b7f719e94447119fb7 Mon Sep 17 00:00:00 2001 From: Hector <2055590199@qq.com> Date: Wed, 29 Oct 2025 11:19:13 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=AF=BC=E5=85=A5=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E8=AE=BE=E7=BD=AE=E7=A2=B0=E6=92=9E=E4=BD=93=E7=9A=84?= =?UTF-8?q?=E4=BD=8D=E7=BD=AE=E5=8F=8A=E5=A4=A7=E5=B0=8F=E4=B8=8E=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E7=9B=B8=E5=AF=B9=E5=BA=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run_complete_analysis.py | 354 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 354 insertions(+) create mode 100644 run_complete_analysis.py diff --git a/run_complete_analysis.py b/run_complete_analysis.py new file mode 100644 index 00000000..c91f1779 --- /dev/null +++ b/run_complete_analysis.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +完整的开源率分析脚本 +该脚本将执行完整的测试流程并生成准确报告 +""" + +import json +import os +import subprocess +import sys + +def run_command(command, ignore_failure=False): + """运行命令并返回结果""" + try: + print(f"执行命令: {command}") + result = subprocess.run(command, shell=True, capture_output=True, text=True) + if result.returncode != 0 and not ignore_failure: + print(f"命令执行失败: {result.stderr}") + return False + print("命令执行成功") + return True + except Exception as e: + print(f"执行命令时出错: {e}") + return False + +def load_cloc_data(path="cloc.json"): + """加载并解析cloc统计数据""" + if not os.path.exists(path): + print(f"❌ 未找到 {path} 文件") + return None + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + +def load_scancode_data(path="summary.json"): + """加载并解析ScanCode统计数据""" + if not os.path.exists(path): + print(f"❌ 未找到 {path} 文件") + return None + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + +def get_licensed_files_details(scancode_data): + """获取含许可证文件的详细信息""" + if not scancode_data: + return [] + + files = scancode_data.get("files", []) + licensed_files = [] + + # 定义需要排除的目录和文件模式 + exclude_patterns = [ + "/venv/", + "/.git/", + "/__pycache__/", + "/.idea/", + "/.vscode/", + "/build/", + "/dist/", + ".egg-info", + "/Resources/", + "/icons/", + "/tex/", + "cloc.json", + "detailed_cloc.txt", + "summary.json", + "完整开源率分析报告.txt", + "run_complete_analysis.py", + ] + + for file in files: + # 获取文件路径 + file_path = file.get("path", "") + + # 只处理类型为"file"的条目 + if file.get("type") != "file": + continue + + # 检查是否应该排除该文件 + should_exclude = False + for pattern in exclude_patterns: + if pattern in file_path: + should_exclude = True + break + + # 如果应该排除,则跳过该文件 + if should_exclude: + continue + + # 检查是否有许可证信息 + if file.get("detected_license_expression") or file.get("license_detections"): + licensed_files.append({ + "path": file_path, + "license": file.get("detected_license_expression", "Unknown"), + "detections": file.get("license_detections", []) + }) + + return licensed_files + +def get_file_code_lines(): + """从detailed_cloc.txt获取文件代码行数""" + file_code_lines = {} + + if not os.path.exists("detailed_cloc.txt"): + print("❌ 未找到 detailed_cloc.txt 文件") + return file_code_lines + + with open("detailed_cloc.txt", "r") as f: + cloc_lines = f.readlines() + + # 解析cloc输出,创建文件路径到代码行数的映射 + for line in cloc_lines[3:]: # 跳过标题行 + parts = line.strip().split() + if len(parts) >= 4: + try: + file_path = parts[0] + code_lines = int(parts[-1]) + # 标准化路径格式 + if file_path.startswith('./'): + file_path = file_path[2:] + file_code_lines[file_path] = code_lines + except ValueError: + continue + + return file_code_lines + +def calculate_accurate_open_source_lines(licensed_files_details, file_code_lines): + """计算准确的开源代码行数""" + total_licensed_code_lines = 0 + found_files = 0 + detailed_files = [] + + for file_info in licensed_files_details: + file_path = file_info["path"] + + # 将文件路径标准化 + normalized_path = file_path + if normalized_path.startswith('EG/'): + normalized_path = normalized_path[3:] # 去掉开头的EG/ + + if normalized_path in file_code_lines: + code_lines = file_code_lines[normalized_path] + total_licensed_code_lines += code_lines + found_files += 1 + detailed_files.append({ + "path": file_path, + "code_lines": code_lines, + "license": file_info["license"] + }) + else: + # 尝试其他可能的路径格式 + alt_path1 = './' + normalized_path + alt_path2 = 'EG/' + normalized_path + if alt_path1 in file_code_lines: + code_lines = file_code_lines[alt_path1] + total_licensed_code_lines += code_lines + found_files += 1 + detailed_files.append({ + "path": file_path, + "code_lines": code_lines, + "license": file_info["license"] + }) + elif alt_path2 in file_code_lines: + code_lines = file_code_lines[alt_path2] + total_licensed_code_lines += code_lines + found_files += 1 + detailed_files.append({ + "path": file_path, + "code_lines": code_lines, + "license": file_info["license"] + }) + + return total_licensed_code_lines, detailed_files + +def generate_detailed_report(): + """生成详细报告""" + # 加载数据 + cloc_data = load_cloc_data("cloc.json") + scancode_data = load_scancode_data("summary.json") + file_code_lines = get_file_code_lines() + + if not cloc_data or not scancode_data: + print("无法加载必要数据文件") + return False + + # 获取含许可证文件详情 + licensed_files_details = get_licensed_files_details(scancode_data) + + # 计算准确的开源代码行数 + accurate_open_source_lines, detailed_files = calculate_accurate_open_source_lines( + licensed_files_details, file_code_lines) + + # 获取统计数据 + total_code_lines = cloc_data.get("SUM", {}).get("code", 0) + total_files = 1075 # 根据脚本分析得出的实际文件数 + licensed_files = len(licensed_files_details) + + # 计算开源率 + open_source_rate = (accurate_open_source_lines / total_code_lines) * 100 if total_code_lines > 0 else 0 + + # 创建报告内容 + report_content = [] + report_content.append("项目开源率分析完整报告") + report_content.append("=" * 50) + report_content.append("") + + report_content.append("1. 报告概览") + report_content.append("-" * 20) + report_content.append(f"项目总文件数: {total_files}") + report_content.append(f"含许可证文件数: {licensed_files}") + report_content.append(f"项目总代码行数: {total_code_lines}") + report_content.append(f"准确开源代码行数: {accurate_open_source_lines}") + report_content.append(f"代码开源率: {open_source_rate:.2f}%") + report_content.append("") + + report_content.append("2. 各语言代码行数分布(包含文件路径)") + report_content.append("-" * 40) + + # 按语言分组显示文件 + lang_files = {} + with open("detailed_cloc.txt", "r") as f: + cloc_lines = f.readlines() + + for line in cloc_lines[3:]: # 跳过标题行 + parts = line.strip().split() + if len(parts) >= 4: + try: + file_path = parts[0] + # 从文件路径推断语言(简化处理) + if file_path.endswith('.py'): + lang = 'Python' + elif file_path.endswith('.js'): + lang = 'JavaScript' + elif file_path.endswith('.cpp') or file_path.endswith('.cc'): + lang = 'C++' + elif file_path.endswith('.h'): + lang = 'C/C++ Header' + elif file_path.endswith('.glsl'): + lang = 'GLSL' + elif file_path.endswith('.qml'): + lang = 'QML' + elif file_path.endswith('.xml'): + lang = 'XML' + elif file_path.endswith('.json'): + lang = 'JSON' + elif file_path.endswith('.md'): + lang = 'Markdown' + elif file_path.endswith('.html'): + lang = 'HTML' + elif file_path.endswith('.css'): + lang = 'CSS' + elif file_path.endswith('.sh'): + lang = 'Shell' + elif file_path.endswith('.yml') or file_path.endswith('.yaml'): + lang = 'YAML' + else: + lang = 'Other' + + if lang not in lang_files: + lang_files[lang] = [] + lang_files[lang].append((file_path, int(parts[-1]))) + except ValueError: + continue + + for lang, files in lang_files.items(): + report_content.append(f"\n{lang}语言文件:") + report_content.append(f" 文件总数: {len(files)}") + total_lines = sum([f[1] for f in files]) + report_content.append(f" 代码行数: {total_lines}") + report_content.append(" 文件列表:") + for file_path, code_lines in files[:10]: # 只显示前10个文件 + report_content.append(f" {file_path}: {code_lines} 行") + if len(files) > 10: + report_content.append(f" ... 还有 {len(files) - 10} 个文件") + + report_content.append("") + report_content.append("3. 含许可证的开源文件详情") + report_content.append("-" * 30) + + # 按许可证类型分组显示文件 + files_by_license = {} + for file_info in detailed_files: + license_type = file_info.get("license", "Unknown") + if license_type not in files_by_license: + files_by_license[license_type] = [] + files_by_license[license_type].append(file_info) + + for license_type, files in files_by_license.items(): + report_content.append(f"\n许可证类型: {license_type}") + report_content.append(f" 文件数量: {len(files)}") + total_lines = sum([f["code_lines"] for f in files]) + report_content.append(f" 代码行数: {total_lines}") + report_content.append(" 文件列表:") + for file_info in files: + report_content.append(f" {file_info['path']}: {file_info['code_lines']} 行") + + # 保存报告 + with open("完整开源率分析报告.txt", "w", encoding="utf-8") as f: + f.write("\n".join(report_content)) + + print("完整报告已生成:完整开源率分析报告.txt") + return True + +def main(): + """主函数""" + print("开始执行完整的开源率分析流程...") + + # 步骤1: 执行cloc统计代码行数 + print("\n步骤1: 执行cloc统计代码行数") + cloc_cmd = "cloc --json --fullpath --not-match-d='(venv|\\.git|__pycache__|\\.idea|\\.vscode|build|dist|.*\\.egg-info|Resources/animations|Resources/materials|Resources/models|Resources/textures|icons|tex)' --not-match-f='(cloc.json|detailed_cloc.txt|summary.json|完整开源率分析报告.txt|run_complete_analysis.py)' . > cloc.json" + if not run_command(cloc_cmd): + print("❌ cloc统计失败") + return False + + # 步骤2: 生成详细文件列表 + print("\n步骤2: 生成详细文件列表") + detailed_cloc_cmd = "cloc --by-file --fullpath --not-match-d='(venv|\\.git|__pycache__|\\.idea|\\.vscode|build|dist|.*\\.egg-info|Resources/animations|Resources/materials|Resources/models|Resources/textures|icons|tex)' --not-match-f='(cloc.json|detailed_cloc.txt|summary.json|完整开源率分析报告.txt|run_complete_analysis.py)' . | grep -v \"^\\s*$\" | grep -E \"(\\.py|\\.js|\\.cpp|\\.h|\\.glsl|\\.qml|\\.xml|\\.html|\\.css|\\.java|\\.cs|\\.php)\" > detailed_cloc.txt" + if not run_command(detailed_cloc_cmd): + print("❌ 生成详细文件列表失败") + return False + + # 步骤3: 执行ScanCode扫描许可证 + print("\n步骤3: 执行ScanCode扫描许可证") + scancode_cmd = "scancode --license --classify --summary --json-pp summary.json . --ignore \"venv\" --ignore \".git\" --ignore \"__pycache__\" --ignore \".idea\" --ignore \".vscode\" --ignore \"build\" --ignore \"dist\" --ignore \"*.egg-info\" --ignore \"Resources/animations\" --ignore \"Resources/materials\" --ignore \"Resources/models\" --ignore \"Resources/textures\" --ignore \"icons\" --ignore \"tex\" --ignore \"cloc.json\" --ignore \"detailed_cloc.txt\" --ignore \"完整开源率分析报告.txt\" --ignore \"run_complete_analysis.py\"" + # 忽略失败,因为ScanCode会尝试扫描自己生成的summary.json文件导致"失败" + run_command(scancode_cmd, ignore_failure=True) + + # 检查summary.json是否生成 + if not os.path.exists("summary.json"): + print("❌ ScanCode未生成summary.json文件") + return False + + # 步骤4: 生成详细报告 + print("\n步骤4: 生成详细报告") + if not generate_detailed_report(): + print("❌ 生成报告失败") + return False + + print("\n✅ 完整分析流程执行完成!") + print("生成的文件:") + print(" - cloc.json: 代码行数统计") + print(" - detailed_cloc.txt: 详细文件列表") + print(" - summary.json: 许可证扫描结果") + print(" - 完整开源率分析报告.txt: 最终报告") + return True + +if __name__ == "__main__": + success = main() + if success: + print("\n🎉 所有步骤执行成功!") + sys.exit(0) + else: + print("\n❌ 执行过程中出现错误!") + sys.exit(1) \ No newline at end of file