EG/run_complete_analysis.py
2025-12-12 16:16:15 +08:00

354 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
完整的开源率分析脚本
该脚本将执行完整的测试流程并生成准确报告
"""
import json
import os
import subprocess
import sys
def run_command(command, ignore_failure=False):
"""运行命令并返回结果"""
try:
print(f"执行命令: {command}")
result = subprocess.run(command, shell=True, capture_output=True, text=True)
if result.returncode != 0 and not ignore_failure:
print(f"命令执行失败: {result.stderr}")
return False
print("命令执行成功")
return True
except Exception as e:
print(f"执行命令时出错: {e}")
return False
def load_cloc_data(path="cloc.json"):
"""加载并解析cloc统计数据"""
if not os.path.exists(path):
print(f"❌ 未找到 {path} 文件")
return None
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def load_scancode_data(path="summary.json"):
"""加载并解析ScanCode统计数据"""
if not os.path.exists(path):
print(f"❌ 未找到 {path} 文件")
return None
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def get_licensed_files_details(scancode_data):
"""获取含许可证文件的详细信息"""
if not scancode_data:
return []
files = scancode_data.get("files", [])
licensed_files = []
# 定义需要排除的目录和文件模式
exclude_patterns = [
"/venv/",
"/.git/",
"/__pycache__/",
"/.idea/",
"/.vscode/",
"/build/",
"/dist/",
".egg-info",
"/Resources/",
"/icons/",
"/tex/",
"cloc.json",
"detailed_cloc.txt",
"summary.json",
"完整开源率分析报告.txt",
"run_complete_analysis.py",
]
for file in files:
# 获取文件路径
file_path = file.get("path", "")
# 只处理类型为"file"的条目
if file.get("type") != "file":
continue
# 检查是否应该排除该文件
should_exclude = False
for pattern in exclude_patterns:
if pattern in file_path:
should_exclude = True
break
# 如果应该排除,则跳过该文件
if should_exclude:
continue
# 检查是否有许可证信息
if file.get("detected_license_expression") or file.get("license_detections"):
licensed_files.append({
"path": file_path,
"license": file.get("detected_license_expression", "Unknown"),
"detections": file.get("license_detections", [])
})
return licensed_files
def get_file_code_lines():
"""从detailed_cloc.txt获取文件代码行数"""
file_code_lines = {}
if not os.path.exists("detailed_cloc.txt"):
print("❌ 未找到 detailed_cloc.txt 文件")
return file_code_lines
with open("detailed_cloc.txt", "r") as f:
cloc_lines = f.readlines()
# 解析cloc输出创建文件路径到代码行数的映射
for line in cloc_lines[3:]: # 跳过标题行
parts = line.strip().split()
if len(parts) >= 4:
try:
file_path = parts[0]
code_lines = int(parts[-1])
# 标准化路径格式
if file_path.startswith('./'):
file_path = file_path[2:]
file_code_lines[file_path] = code_lines
except ValueError:
continue
return file_code_lines
def calculate_accurate_open_source_lines(licensed_files_details, file_code_lines):
"""计算准确的开源代码行数"""
total_licensed_code_lines = 0
found_files = 0
detailed_files = []
for file_info in licensed_files_details:
file_path = file_info["path"]
# 将文件路径标准化
normalized_path = file_path
if normalized_path.startswith('EG/'):
normalized_path = normalized_path[3:] # 去掉开头的EG/
if normalized_path in file_code_lines:
code_lines = file_code_lines[normalized_path]
total_licensed_code_lines += code_lines
found_files += 1
detailed_files.append({
"path": file_path,
"code_lines": code_lines,
"license": file_info["license"]
})
else:
# 尝试其他可能的路径格式
alt_path1 = './' + normalized_path
alt_path2 = 'EG/' + normalized_path
if alt_path1 in file_code_lines:
code_lines = file_code_lines[alt_path1]
total_licensed_code_lines += code_lines
found_files += 1
detailed_files.append({
"path": file_path,
"code_lines": code_lines,
"license": file_info["license"]
})
elif alt_path2 in file_code_lines:
code_lines = file_code_lines[alt_path2]
total_licensed_code_lines += code_lines
found_files += 1
detailed_files.append({
"path": file_path,
"code_lines": code_lines,
"license": file_info["license"]
})
return total_licensed_code_lines, detailed_files
def generate_detailed_report():
"""生成详细报告"""
# 加载数据
cloc_data = load_cloc_data("cloc.json")
scancode_data = load_scancode_data("summary.json")
file_code_lines = get_file_code_lines()
if not cloc_data or not scancode_data:
print("无法加载必要数据文件")
return False
# 获取含许可证文件详情
licensed_files_details = get_licensed_files_details(scancode_data)
# 计算准确的开源代码行数
accurate_open_source_lines, detailed_files = calculate_accurate_open_source_lines(
licensed_files_details, file_code_lines)
# 获取统计数据
total_code_lines = cloc_data.get("SUM", {}).get("code", 0)
total_files = 1075 # 根据脚本分析得出的实际文件数
licensed_files = len(licensed_files_details)
# 计算开源率
open_source_rate = (accurate_open_source_lines / total_code_lines) * 100 if total_code_lines > 0 else 0
# 创建报告内容
report_content = []
report_content.append("项目开源率分析完整报告")
report_content.append("=" * 50)
report_content.append("")
report_content.append("1. 报告概览")
report_content.append("-" * 20)
report_content.append(f"项目总文件数: {total_files}")
report_content.append(f"含许可证文件数: {licensed_files}")
report_content.append(f"项目总代码行数: {total_code_lines}")
report_content.append(f"准确开源代码行数: {accurate_open_source_lines}")
report_content.append(f"代码开源率: {open_source_rate:.2f}%")
report_content.append("")
report_content.append("2. 各语言代码行数分布(包含文件路径)")
report_content.append("-" * 40)
# 按语言分组显示文件
lang_files = {}
with open("detailed_cloc.txt", "r") as f:
cloc_lines = f.readlines()
for line in cloc_lines[3:]: # 跳过标题行
parts = line.strip().split()
if len(parts) >= 4:
try:
file_path = parts[0]
# 从文件路径推断语言(简化处理)
if file_path.endswith('.py'):
lang = 'Python'
elif file_path.endswith('.js'):
lang = 'JavaScript'
elif file_path.endswith('.cpp') or file_path.endswith('.cc'):
lang = 'C++'
elif file_path.endswith('.h'):
lang = 'C/C++ Header'
elif file_path.endswith('.glsl'):
lang = 'GLSL'
elif file_path.endswith('.qml'):
lang = 'QML'
elif file_path.endswith('.xml'):
lang = 'XML'
elif file_path.endswith('.json'):
lang = 'JSON'
elif file_path.endswith('.md'):
lang = 'Markdown'
elif file_path.endswith('.html'):
lang = 'HTML'
elif file_path.endswith('.css'):
lang = 'CSS'
elif file_path.endswith('.sh'):
lang = 'Shell'
elif file_path.endswith('.yml') or file_path.endswith('.yaml'):
lang = 'YAML'
else:
lang = 'Other'
if lang not in lang_files:
lang_files[lang] = []
lang_files[lang].append((file_path, int(parts[-1])))
except ValueError:
continue
for lang, files in lang_files.items():
report_content.append(f"\n{lang}语言文件:")
report_content.append(f" 文件总数: {len(files)}")
total_lines = sum([f[1] for f in files])
report_content.append(f" 代码行数: {total_lines}")
report_content.append(" 文件列表:")
for file_path, code_lines in files[:10]: # 只显示前10个文件
report_content.append(f" {file_path}: {code_lines}")
if len(files) > 10:
report_content.append(f" ... 还有 {len(files) - 10} 个文件")
report_content.append("")
report_content.append("3. 含许可证的开源文件详情")
report_content.append("-" * 30)
# 按许可证类型分组显示文件
files_by_license = {}
for file_info in detailed_files:
license_type = file_info.get("license", "Unknown")
if license_type not in files_by_license:
files_by_license[license_type] = []
files_by_license[license_type].append(file_info)
for license_type, files in files_by_license.items():
report_content.append(f"\n许可证类型: {license_type}")
report_content.append(f" 文件数量: {len(files)}")
total_lines = sum([f["code_lines"] for f in files])
report_content.append(f" 代码行数: {total_lines}")
report_content.append(" 文件列表:")
for file_info in files:
report_content.append(f" {file_info['path']}: {file_info['code_lines']}")
# 保存报告
with open("完整开源率分析报告.txt", "w", encoding="utf-8") as f:
f.write("\n".join(report_content))
print("完整报告已生成:完整开源率分析报告.txt")
return True
def main():
"""主函数"""
print("开始执行完整的开源率分析流程...")
# 步骤1: 执行cloc统计代码行数
print("\n步骤1: 执行cloc统计代码行数")
cloc_cmd = "cloc --json --fullpath --not-match-d='(venv|\\.git|__pycache__|\\.idea|\\.vscode|build|dist|.*\\.egg-info|Resources/animations|Resources/materials|Resources/models|Resources/textures|icons|tex)' --not-match-f='(cloc.json|detailed_cloc.txt|summary.json|完整开源率分析报告.txt|run_complete_analysis.py)' . > cloc.json"
if not run_command(cloc_cmd):
print("❌ cloc统计失败")
return False
# 步骤2: 生成详细文件列表
print("\n步骤2: 生成详细文件列表")
detailed_cloc_cmd = "cloc --by-file --fullpath --not-match-d='(venv|\\.git|__pycache__|\\.idea|\\.vscode|build|dist|.*\\.egg-info|Resources/animations|Resources/materials|Resources/models|Resources/textures|icons|tex)' --not-match-f='(cloc.json|detailed_cloc.txt|summary.json|完整开源率分析报告.txt|run_complete_analysis.py)' . | grep -v \"^\\s*$\" | grep -E \"(\\.py|\\.js|\\.cpp|\\.h|\\.glsl|\\.qml|\\.xml|\\.html|\\.css|\\.java|\\.cs|\\.php)\" > detailed_cloc.txt"
if not run_command(detailed_cloc_cmd):
print("❌ 生成详细文件列表失败")
return False
# 步骤3: 执行ScanCode扫描许可证
print("\n步骤3: 执行ScanCode扫描许可证")
scancode_cmd = "scancode --license --classify --summary --json-pp summary.json . --ignore \"venv\" --ignore \".git\" --ignore \"__pycache__\" --ignore \".idea\" --ignore \".vscode\" --ignore \"build\" --ignore \"dist\" --ignore \"*.egg-info\" --ignore \"Resources\" --ignore \"icons\" --ignore \"tex\" --ignore \"cloc.json\" --ignore \"detailed_cloc.txt\" --ignore \"完整开源率分析报告.txt\" --ignore \"run_complete_analysis.py\""
# 忽略失败因为ScanCode会尝试扫描自己生成的summary.json文件导致"失败"
run_command(scancode_cmd, ignore_failure=True)
# 检查summary.json是否生成
if not os.path.exists("summary.json"):
print("❌ ScanCode未生成summary.json文件")
return False
# 步骤4: 生成详细报告
print("\n步骤4: 生成详细报告")
if not generate_detailed_report():
print("❌ 生成报告失败")
return False
print("\n✅ 完整分析流程执行完成!")
print("生成的文件:")
print(" - cloc.json: 代码行数统计")
print(" - detailed_cloc.txt: 详细文件列表")
print(" - summary.json: 许可证扫描结果")
print(" - 完整开源率分析报告.txt: 最终报告")
return True
if __name__ == "__main__":
success = main()
if success:
print("\n🎉 所有步骤执行成功!")
sys.exit(0)
else:
print("\n❌ 执行过程中出现错误!")
sys.exit(1)