#!/usr/bin/env python # -*- coding: utf-8 -*- """ 解析Word分析报告,提取评分数据 """ import win32com.client import os import re import json def extract_scores_from_text(text): """从文本中提取评分数据""" scores = {} # 提取学号 student_id_match = re.search(r'学号:(\d+)', text) if student_id_match: scores['studentId'] = student_id_match.group(1) # 提取姓名 name_match = re.search(r'姓名:([^\s]+)', text) if name_match: scores['name'] = name_match.group(1) # 提取总分 total_match = re.search(r'总分[::]\s*(\d+)', text) if total_match: scores['totalScore'] = int(total_match.group(1)) # 提取各项能力评分(学校评分和企业评分) abilities = ['数据采集', '数据清洗', '数据分析', '结果解读', '工具实操'] for ability in abilities: # 学校评分 school_pattern = rf'{ability}[能力]*.*?学校[评分为]*[::]*\s*(\d+)\s*分' school_match = re.search(school_pattern, text) if school_match: scores[f'{ability}_学校'] = int(school_match.group(1)) # 企业评分 company_pattern = rf'{ability}[能力]*.*?企业[评分为]*[::]*\s*(\d+)\s*分' company_match = re.search(company_pattern, text) if company_match: scores[f'{ability}_企业'] = int(company_match.group(1)) # 提取学生自评和互评 self_eval_match = re.search(r'学生自评[::]*\s*(\d+)\s*分', text) if self_eval_match: scores['学生自评'] = int(self_eval_match.group(1)) peer_eval_match = re.search(r'学生互评[::]*\s*(\d+)\s*分', text) if peer_eval_match: scores['学生互评'] = int(peer_eval_match.group(1)) return scores def parse_all_reports(base_dir='分析报告'): """解析所有报告文件""" word = win32com.client.Dispatch('Word.Application') word.Visible = False all_scores = {} # 遍历所有课程文件夹 for course_dir in os.listdir(base_dir): course_path = os.path.join(base_dir, course_dir) if not os.path.isdir(course_path): continue print(f'\n处理课程: {course_dir}') for filename in os.listdir(course_path): if not filename.endswith('.doc'): continue file_path = os.path.join(course_path, filename) doc_path = os.path.abspath(file_path) try: doc = word.Documents.Open(doc_path) text = doc.Content.Text doc.Close() # 提取评分 scores = extract_scores_from_text(text) if scores.get('studentId'): student_id = scores['studentId'] if student_id not in all_scores: all_scores[student_id] = { 'name': scores.get('name', ''), 'courses': {} } all_scores[student_id]['courses'][course_dir] = scores print(f' ✓ {filename}: {scores.get("name", "")} - {scores.get("totalScore", 0)}分') except Exception as e: print(f' ✗ 错误 {filename}: {e}') word.Quit() return all_scores if __name__ == '__main__': print('开始解析Word文档...\n') scores = parse_all_reports() # 保存为JSON output_file = '分析报告/extracted_scores.json' with open(output_file, 'w', encoding='utf-8') as f: json.dump(scores, f, ensure_ascii=False, indent=2) print(f'\n\n解析完成!') print(f'共处理 {len(scores)} 名学生的数据') print(f'结果已保存至: {output_file}')