102 lines
3.4 KiB
Python
102 lines
3.4 KiB
Python
import json
|
||
import re
|
||
|
||
qa_file = '/Users/tianjianyong/apps/Company/kangda-robot-backend/ruoyi-fastapi-backend/doc/qa.txt'
|
||
output_file = '/Users/tianjianyong/apps/Company/kangda-robot-backend/ruoyi-fastapi-backend/config/static_qa.json'
|
||
|
||
with open(qa_file, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
qa_pairs = []
|
||
current_category = None
|
||
current_question = None
|
||
current_answer = None
|
||
|
||
lines = content.split('\n')
|
||
|
||
for line in lines:
|
||
line = line.strip()
|
||
|
||
if not line:
|
||
continue
|
||
|
||
if line.startswith('一、') or line.startswith('二、') or line.startswith('三、') or line.startswith('四、') or line.startswith('五、') or line.startswith('六、'):
|
||
if current_question and current_answer:
|
||
qa_pairs.append({
|
||
"question": current_question,
|
||
"answer": current_answer,
|
||
"category": current_category,
|
||
"priority": 10,
|
||
"sub_questions": [],
|
||
"variations": []
|
||
})
|
||
current_question = None
|
||
current_answer = None
|
||
|
||
if '公司概况' in line:
|
||
current_category = '公司概况'
|
||
elif '历史发展' in line:
|
||
current_category = '历史发展'
|
||
elif '生产基地与研发中心' in line:
|
||
current_category = '生产基地与研发中心'
|
||
elif '财务数据' in line:
|
||
current_category = '财务数据'
|
||
elif '业务介绍' in line:
|
||
current_category = '业务介绍'
|
||
elif '日常聊天' in line:
|
||
current_category = '日常聊天'
|
||
else:
|
||
current_category = '其他'
|
||
|
||
elif line.startswith('(一)') or line.startswith('(二)') or line.startswith('(三)'):
|
||
if current_question and current_answer:
|
||
qa_pairs.append({
|
||
"question": current_question,
|
||
"answer": current_answer,
|
||
"category": current_category,
|
||
"priority": 10,
|
||
"sub_questions": [],
|
||
"variations": []
|
||
})
|
||
current_question = None
|
||
current_answer = None
|
||
|
||
elif re.match(r'^\d+\.', line) or re.match(r'^[一二三四五六七八九十]+\.', line):
|
||
if current_question and current_answer:
|
||
qa_pairs.append({
|
||
"question": current_question,
|
||
"answer": current_answer,
|
||
"category": current_category,
|
||
"priority": 10,
|
||
"sub_questions": [],
|
||
"variations": []
|
||
})
|
||
|
||
question_match = re.match(r'^\d+\.|^[一二三四五六七八九十]+\.', line)
|
||
if question_match:
|
||
current_question = line[question_match.end():].strip()
|
||
current_answer = None
|
||
|
||
elif line.startswith('•答案:'):
|
||
if current_question:
|
||
current_answer = line[4:].strip()
|
||
|
||
if current_question and current_answer:
|
||
qa_pairs.append({
|
||
"question": current_question,
|
||
"answer": current_answer,
|
||
"category": current_category,
|
||
"priority": 10,
|
||
"sub_questions": [],
|
||
"variations": []
|
||
})
|
||
|
||
output = {
|
||
"qa_pairs": qa_pairs
|
||
}
|
||
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
json.dump(output, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"成功转换 {len(qa_pairs)} 个问答对到 {output_file}")
|