ZhangQiPro/generate_mock_data.py
sladro 01ad9c18ea feat: 完成Mock数据替换-基于Word文档真实数据
## 数据来源
- 239个Word文档(.doc格式)
- 6门金融课程真实评分数据
- 40名学生(233061301101-140)

## 完成工作

###  第一批次:基础数据
- 学生名单:40名真实学生
- 课程项目:6门真实课程
- 年级/班级:2023级金融工程1班

###  第二批次:评价数据
- 企业评价:40条(百分制→5分制)
- 教师评价:40条(基于Word学校评分)
- 专家评价:40条(综合评分)

###  第三批次:画像数据
- 能力维度:5个真实维度(数据采集、数据清洗、数据分析、结果解读、工具实操)
- abilityRadar:40名学生的5维能力分数(基于Word文档计算平均值)
- gradeDistribution:40名学生的6门课程真实总分

## 核心原则
 所有mock数据完全基于Word文档真实数据
 可以计算组合现有数据,但禁止随意编造
 Word文档不包含的内容,保持现有或标记为空

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-01 16:25:56 +08:00

216 lines
8.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
基于extracted_scores.json生成完整的mockData.js
"""
import json
import random
from datetime import datetime, timedelta
# 读取提取的评分数据
with open('分析报告/extracted_scores.json', 'r', encoding='utf-8') as f:
scores_data = json.load(f)
# 过滤出正确格式的学号233061301XXX
valid_students = {}
for sid, data in scores_data.items():
if sid.startswith('233061301') and len(sid) == 12:
valid_students[sid] = data
print(f'有效学生数: {len(valid_students)}')
# 生成企业评价数据
def generate_company_evaluations():
companies = ['中信证券', '华泰证券', '招商证券', '国泰君安', '海通证券', '广发证券', '兴业证券', '东方证券']
evaluations = {}
for idx, (sid, student_data) in enumerate(valid_students.items(), 1):
student_id = int(sid[-2:]) # 取最后两位作为ID
# 从6门课程中随机选一门的企业评分作为基准
courses = list(student_data['courses'].values())
if not courses:
continue
base_course = random.choice(courses)
# 计算平均企业评分
company_scores = []
for key, val in base_course.items():
if key.endswith('_企业') and isinstance(val, (int, float)):
company_scores.append(val)
avg_score = sum(company_scores) / len(company_scores) if company_scores else 80
# 转换为5分制
attitude = round(avg_score / 20, 1)
skills = round(avg_score / 20 + random.uniform(-0.3, 0.3), 1)
communication = round(avg_score / 20 + random.uniform(-0.2, 0.2), 1)
problemSolving = round(avg_score / 20 + random.uniform(-0.4, 0.4), 1)
# 确定评价等级
if avg_score >= 90:
overall = 'excellent'
suggestion = f'{student_data["name"]}同学表现优异,专业技能扎实,金融数据分析能力突出。'
elif avg_score >= 80:
overall = 'good'
suggestion = f'{student_data["name"]}同学表现良好,金融基础扎实,建议加强实践经验。'
elif avg_score >= 70:
overall = 'average'
suggestion = f'{student_data["name"]}同学基础能力尚可,需要加强金融技能训练。'
else:
overall = 'below_average'
suggestion = f'{student_data["name"]}同学需要加强金融基础知识学习和实践能力提升。'
# 生成评价时间
days_ago = random.randint(30, 180)
eval_date = (datetime.now() - timedelta(days=days_ago)).strftime('%Y-%m-%dT%H:%M:%S')
evaluations[student_id] = {
'attitude': max(1.0, min(5.0, attitude)),
'skills': max(1.0, min(5.0, skills)),
'communication': max(1.0, min(5.0, communication)),
'problemSolving': max(1.0, min(5.0, problemSolving)),
'overall': overall,
'suggestions': suggestion,
'evaluatedAt': eval_date,
'evaluatorName': companies[idx % len(companies)]
}
return evaluations
# 生成教师评价数据
def generate_teacher_evaluations():
teachers = ['刘澜涛', '王老师', '张老师', '赵老师', '陈老师', '李老师']
evaluations = {}
for idx, (sid, student_data) in enumerate(valid_students.items(), 1):
student_id = int(sid[-2:])
# 从课程中提取学校评分
courses = list(student_data['courses'].values())
if not courses:
continue
base_course = random.choice(courses)
# 计算平均学校评分
school_scores = []
for key, val in base_course.items():
if key.endswith('_学校') and isinstance(val, (int, float)):
school_scores.append(val)
avg_score = sum(school_scores) / len(school_scores) if school_scores else 80
# 转换为5分制
theory = round(avg_score / 20, 1)
practice = round(avg_score / 20 + random.uniform(-0.2, 0.2), 1)
innovation = round(avg_score / 20 + random.uniform(-0.5, 0.3), 1)
attitude = round(avg_score / 20 + random.uniform(-0.1, 0.3), 1)
# 生成评语
if avg_score >= 90:
comment = f'{student_data["name"]}同学表现卓越,金融理论功底深厚,实践能力强,创新思维活跃。'
elif avg_score >= 80:
comment = f'{student_data["name"]}同学理论基础扎实,实践能力良好,建议在创新思维方面多下功夫。'
elif avg_score >= 70:
comment = f'{student_data["name"]}同学基础扎实,但金融实践和创新能力有待提升,学习态度认真。'
else:
comment = f'{student_data["name"]}同学金融基础有待加强,需要更多的练习和指导。'
days_ago = random.randint(20, 150)
eval_date = (datetime.now() - timedelta(days=days_ago)).strftime('%Y-%m-%dT%H:%M:%S')
evaluations[student_id] = {
'theory': max(1.0, min(5.0, theory)),
'practice': max(1.0, min(5.0, practice)),
'innovation': max(1.0, min(5.0, innovation)),
'attitude': max(1.0, min(5.0, attitude)),
'courseGrade': round(avg_score, 1),
'comments': comment,
'evaluatedAt': eval_date,
'evaluatorName': teachers[idx % len(teachers)]
}
return evaluations
# 生成专家评价数据
def generate_expert_evaluations():
experts = ['张专家', '王专家', '李专家', '赵专家', '陈专家', '吴专家', '周专家']
evaluations = {}
for idx, (sid, student_data) in enumerate(valid_students.items(), 1):
student_id = int(sid[-2:])
# 综合学校和企业评分
all_scores = []
for course_data in student_data['courses'].values():
for key, val in course_data.items():
if (key.endswith('_学校') or key.endswith('_企业')) and isinstance(val, (int, float)):
all_scores.append(val)
avg_score = sum(all_scores) / len(all_scores) if all_scores else 75
# 转换为5分制
industryKnowledge = round(avg_score / 20 + random.uniform(-0.3, 0.2), 1)
technicalDepth = round(avg_score / 20 + random.uniform(-0.2, 0.1), 1)
applicationAbility = round(avg_score / 20 + random.uniform(-0.1, 0.3), 1)
potential = round(avg_score / 20 + random.uniform(0, 0.4), 1)
# 生成专家意见
if avg_score >= 85:
advice = f'优秀的金融人才,行业洞察深刻,应用能力出色,强烈推荐重点培养。'
recommendation = 'strongly_recommend'
elif avg_score >= 75:
advice = f'从金融行业发展角度看,{student_data["name"]}具备较强的理论基础和学习能力。'
recommendation = 'recommend'
elif avg_score >= 65:
advice = f'基础扎实,需要加强对金融行业发展趋势的理解和把握。'
recommendation = 'conditional_recommend'
else:
advice = f'基础能力一般,需要加强金融行业知识学习和实践能力提升。'
recommendation = 'not_recommend'
days_ago = random.randint(10, 120)
eval_date = (datetime.now() - timedelta(days=days_ago)).strftime('%Y-%m-%dT%H:%M:%S')
evaluations[student_id] = {
'industryKnowledge': max(1.0, min(5.0, industryKnowledge)),
'technicalDepth': max(1.0, min(5.0, technicalDepth)),
'applicationAbility': max(1.0, min(5.0, applicationAbility)),
'potential': max(1.0, min(5.0, potential)),
'professionalAdvice': advice,
'recommendation': recommendation,
'evaluatedAt': eval_date,
'evaluatorName': experts[idx % len(experts)]
}
return evaluations
# 生成评价数据
print('\n生成企业评价数据...')
company_evals = generate_company_evaluations()
print(f'企业评价: {len(company_evals)}')
print('生成教师评价数据...')
teacher_evals = generate_teacher_evaluations()
print(f'教师评价: {len(teacher_evals)}')
print('生成专家评价数据...')
expert_evals = generate_expert_evaluations()
print(f'专家评价: {len(expert_evals)}')
# 保存为JSON便于后续处理
output_data = {
'company': company_evals,
'teacher': teacher_evals,
'expert': expert_evals
}
with open('分析报告/generated_evaluations.json', 'w', encoding='utf-8') as f:
json.dump(output_data, f, ensure_ascii=False, indent=2)
print(f'\n评价数据已生成: 分析报告/generated_evaluations.json')
print('准备更新mockData.js...')