feat: 初始化BidMaster-CLI项目
- 实现三层架构(CLI/Agent/Tools) - 完成招标文件解析器(支持Excel/CSV/Word) - 实现AI智能表格识别和分类 - 支持商务/技术评分项智能分离 - 实现RAG知识库管理 - 完成专业目录结构生成 - 修复编码规范违规问题 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
commit
47fc26f3f0
33
.env.example
Normal file
33
.env.example
Normal file
@ -0,0 +1,33 @@
|
||||
# 环境变量示例文件
|
||||
# 复制为.env并填写实际值
|
||||
|
||||
# LLM配置
|
||||
BIDMASTER_LLM_PROVIDER=openai
|
||||
BIDMASTER_API_KEY=your_api_key_here
|
||||
BIDMASTER_MODEL_NAME=gpt-4
|
||||
# BIDMASTER_BASE_URL=https://api.openai.com/v1 # 可选,不设置将使用默认值
|
||||
|
||||
# Azure OpenAI 配置(仅在使用azure_openai时需要)
|
||||
# BIDMASTER_AZURE_API_VERSION=2024-02-15-preview
|
||||
# BIDMASTER_AZURE_DEPLOYMENT=your-deployment-name
|
||||
|
||||
# Ollama 配置(仅在使用ollama时需要)
|
||||
# BIDMASTER_OLLAMA_HOST=http://localhost:11434
|
||||
|
||||
# 其他提供商示例:
|
||||
# 智谱AI: BIDMASTER_LLM_PROVIDER=zhipu BIDMASTER_API_KEY=your_zhipu_key
|
||||
# 通义千问: BIDMASTER_LLM_PROVIDER=qwen BIDMASTER_API_KEY=your_qwen_key
|
||||
# DeepSeek: BIDMASTER_LLM_PROVIDER=deepseek BIDMASTER_API_KEY=your_deepseek_key
|
||||
# Anthropic: BIDMASTER_LLM_PROVIDER=anthropic BIDMASTER_API_KEY=your_anthropic_key
|
||||
|
||||
# 向量数据库配置
|
||||
BIDMASTER_CHROMA_PATH=./data/kb
|
||||
BIDMASTER_EMBEDDING_MODEL=text-embedding-3-small
|
||||
|
||||
# 性能配置
|
||||
BIDMASTER_MAX_WORKERS=4
|
||||
BIDMASTER_CHUNK_SIZE=1000
|
||||
BIDMASTER_MAX_TOKENS=8000
|
||||
|
||||
# 日志配置
|
||||
BIDMASTER_LOG_LEVEL=INFO
|
||||
10
.gitignore
vendored
Normal file
10
.gitignore
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
# Python-generated files
|
||||
__pycache__/
|
||||
*.py[oc]
|
||||
build/
|
||||
dist/
|
||||
wheels/
|
||||
*.egg-info
|
||||
|
||||
# Virtual environments
|
||||
.venv
|
||||
25
.pre-commit-config.yaml
Normal file
25
.pre-commit-config.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
# Pre-commit钩子配置
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 25.9.0
|
||||
hooks:
|
||||
- id: black
|
||||
language_version: python3.11
|
||||
|
||||
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
||||
rev: v0.13.1
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --exit-non-zero-on-fix]
|
||||
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 6.0.1
|
||||
hooks:
|
||||
- id: isort
|
||||
args: ["--profile", "black"]
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.5.0
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies: [types-all]
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
||||
3.11
|
||||
253
ARCHITECTURE.md
Normal file
253
ARCHITECTURE.md
Normal file
@ -0,0 +1,253 @@
|
||||
# BidMaster-CLI 架构与编码规范
|
||||
|
||||
## 一、项目架构设计
|
||||
|
||||
### 1.1 分层架构
|
||||
```
|
||||
项目采用三层架构,严格分离关注点:
|
||||
|
||||
1. CLI层 (Interface Layer)
|
||||
- 负责命令解析和用户交互
|
||||
- 使用Click框架构建
|
||||
|
||||
2. Agent层 (Orchestration Layer)
|
||||
- 使用LangGraph编排三个Agent
|
||||
- Analysis Agent: 文档解析
|
||||
- Generation Agent: 内容生成
|
||||
- Assembly Agent: 文档组装
|
||||
|
||||
3. 工具层 (Tooling Layer)
|
||||
- 原子化工具函数
|
||||
- RAG检索、文档处理、表格生成
|
||||
```
|
||||
|
||||
### 1.2 目录结构规范
|
||||
```
|
||||
bidmaster-cli/
|
||||
├── src/
|
||||
│ └── bidmaster/
|
||||
│ ├── cli/ # 命令行接口 (≤8个文件)
|
||||
│ ├── agents/ # Agent逻辑 (≤8个文件)
|
||||
│ ├── tools/ # 工具函数 (≤8个文件)
|
||||
│ ├── models/ # 数据模型
|
||||
│ ├── config/ # 配置管理
|
||||
│ └── utils/ # 公共工具
|
||||
├── tests/ # 测试文件
|
||||
│ ├── unit/
|
||||
│ └── integration/
|
||||
├── templates/ # Word模板文件
|
||||
├── data/ # 数据存储
|
||||
└── config/ # 配置文件
|
||||
```
|
||||
|
||||
## 二、编码规范
|
||||
|
||||
### 2.1 代码风格
|
||||
```python
|
||||
# 强制使用工具链
|
||||
- Black: 代码格式化 (line-length=88)
|
||||
- Ruff: 代码检查 (E, F, I, N, UP规则)
|
||||
- isort: 导入排序 (profile=black)
|
||||
- mypy: 类型检查 (strict模式)
|
||||
```
|
||||
|
||||
### 2.2 命名规范
|
||||
```python
|
||||
# 类名: PascalCase
|
||||
class WordProcessor:
|
||||
pass
|
||||
|
||||
# 函数/变量: snake_case
|
||||
def parse_document(file_path: Path) -> dict:
|
||||
result_data = {}
|
||||
|
||||
# 常量: UPPER_CASE
|
||||
MAX_RETRY_COUNT = 3
|
||||
DEFAULT_TIMEOUT = 30
|
||||
|
||||
# 私有成员: 单下划线前缀
|
||||
def _internal_method():
|
||||
pass
|
||||
```
|
||||
|
||||
### 2.3 类型注解
|
||||
```python
|
||||
# 100%类型覆盖,使用Python 3.11+语法
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
def process_file(
|
||||
file_path: Path,
|
||||
encoding: str = "utf-8"
|
||||
) -> dict[str, Any]:
|
||||
"""所有公共函数必须有类型注解"""
|
||||
pass
|
||||
```
|
||||
|
||||
## 三、核心开发原则
|
||||
|
||||
### 3.1 错误处理
|
||||
```python
|
||||
# 立即失败原则,不使用静默处理或后备方案
|
||||
class BidMasterError(Exception):
|
||||
"""基础异常类"""
|
||||
|
||||
# 错误必须明确抛出
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"文件不存在: {file_path}")
|
||||
|
||||
# 禁止吞没异常
|
||||
# 错误的做法:
|
||||
try:
|
||||
process()
|
||||
except:
|
||||
pass # 禁止!
|
||||
|
||||
# 正确的做法:
|
||||
try:
|
||||
process()
|
||||
except SpecificError as e:
|
||||
logger.error(f"处理失败: {e}")
|
||||
raise # 重新抛出
|
||||
```
|
||||
|
||||
### 3.2 配置管理
|
||||
```python
|
||||
# 使用Pydantic Settings
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# 配置分三层:默认值、配置文件、环境变量
|
||||
api_key: str # 敏感信息只从环境变量读取
|
||||
model_name: str = "gpt-4"
|
||||
chunk_size: int = 1000
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_prefix = "BIDMASTER_"
|
||||
|
||||
# 单例模式
|
||||
settings = Settings()
|
||||
```
|
||||
|
||||
### 3.3 日志规范
|
||||
```python
|
||||
import logging
|
||||
|
||||
# 分级日志
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 统一日志格式
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
# 正确使用日志级别
|
||||
logger.debug("调试信息")
|
||||
logger.info("正常流程")
|
||||
logger.warning("警告信息")
|
||||
logger.error("错误信息")
|
||||
```
|
||||
|
||||
## 四、代码质量保证
|
||||
|
||||
### 4.1 Pre-commit配置
|
||||
```yaml
|
||||
# .pre-commit-config.yaml
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
||||
hooks:
|
||||
- id: ruff
|
||||
- repo: https://github.com/pycqa/isort
|
||||
hooks:
|
||||
- id: isort
|
||||
```
|
||||
|
||||
### 4.2 测试规范
|
||||
```python
|
||||
# 使用pytest
|
||||
import pytest
|
||||
|
||||
# 测试文件命名: test_*.py
|
||||
# 测试函数命名: test_*
|
||||
|
||||
@pytest.fixture
|
||||
def sample_data():
|
||||
"""测试固件"""
|
||||
return {"key": "value"}
|
||||
|
||||
def test_parse_document(sample_data):
|
||||
"""测试用例必须有明确断言"""
|
||||
result = parse(sample_data)
|
||||
assert result is not None
|
||||
assert "key" in result
|
||||
```
|
||||
|
||||
### 4.3 文档规范
|
||||
```python
|
||||
def calculate_score(
|
||||
data: dict[str, float],
|
||||
weights: dict[str, float]
|
||||
) -> float:
|
||||
"""计算加权分数
|
||||
|
||||
Args:
|
||||
data: 原始数据字典
|
||||
weights: 权重字典
|
||||
|
||||
Returns:
|
||||
加权后的总分
|
||||
|
||||
Raises:
|
||||
ValueError: 当数据和权重键不匹配时
|
||||
"""
|
||||
pass
|
||||
```
|
||||
|
||||
## 五、依赖管理
|
||||
|
||||
### 5.1 使用uv管理依赖
|
||||
```toml
|
||||
# pyproject.toml
|
||||
[project]
|
||||
name = "bidmaster-cli"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
[project.dependencies]
|
||||
# 只包含必要依赖
|
||||
chromadb = ">=1.1.0"
|
||||
click = ">=8.3.0"
|
||||
langchain = ">=0.3.27"
|
||||
langgraph = ">=0.6.7"
|
||||
pydantic-settings = ">=2.10.1"
|
||||
python-docx = ">=1.2.0"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"black>=25.9.0",
|
||||
"ruff>=0.13.1",
|
||||
"pytest>=8.4.2",
|
||||
"mypy>=1.5.0"
|
||||
]
|
||||
```
|
||||
|
||||
## 六、运维考虑
|
||||
|
||||
### 6.1 版本管理
|
||||
- 使用语义化版本号 (MAJOR.MINOR.PATCH)
|
||||
- Git分支策略: main + develop + feature/*
|
||||
|
||||
### 6.2 性能监控
|
||||
- 关键操作添加耗时日志
|
||||
- 内存使用监控
|
||||
- 向量数据库定期维护
|
||||
|
||||
### 6.3 数据安全
|
||||
- API密钥等敏感信息环境变量管理
|
||||
- 定期备份向量数据库
|
||||
- 日志不记录敏感信息
|
||||
|
||||
这套规范确保代码质量、可维护性和团队协作效率。
|
||||
88
CLAUDE.md
Normal file
88
CLAUDE.md
Normal file
@ -0,0 +1,88 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## 项目概述
|
||||
|
||||
这是一个AI标书撰写助手项目 (BidMaster-CLI),旨在通过智能化标书制作流程来提升投标效率和中标率。
|
||||
|
||||
## 项目架构
|
||||
|
||||
采用三阶段Agent驱动的分层架构:
|
||||
|
||||
### 架构层级
|
||||
1. **Interface Layer (CLI)**: 命令行接口,负责命令解析和用户交互
|
||||
2. **Orchestration Layer (LangGraph)**: 由三个协同Agent构成的核心逻辑层
|
||||
- Phase 1: Analysis Agent - 解析招标文件,生成投标任务清单和文档框架
|
||||
- Phase 2: Generation Agent - 基于任务清单执行RAG内容生成
|
||||
- Phase 3: Assembly Agent - 生成响应表和偏离表
|
||||
3. **Tooling Layer**: 原子化工具集
|
||||
- RAG Tool - 检索增强生成
|
||||
- Bid Parser - 招标文件解析器
|
||||
- Word Processor - Word文档处理器
|
||||
- Table Generator - 表格生成器
|
||||
|
||||
### 关键技术栈
|
||||
- **Word文档处理**: 统一使用 python-docx 库
|
||||
- **依赖管理**: Poetry 或 PDM
|
||||
- **配置管理**: Pydantic-Settings
|
||||
- **代码质量**: Black, Flake8/Ruff, isort, pre-commit
|
||||
- **测试**: pytest
|
||||
|
||||
## 核心CLI命令
|
||||
|
||||
### 知识库管理 (kb)
|
||||
```bash
|
||||
kb init # 初始化向量数据库
|
||||
kb add <path> # 添加文件到知识库
|
||||
kb list # 列出已索引文档
|
||||
kb reset # 清空数据库 [危险操作]
|
||||
kb reindex # 重新索引
|
||||
kb status # 显示知识库统计信息
|
||||
```
|
||||
|
||||
### 标书项目管理 (project)
|
||||
```bash
|
||||
project new # 核心命令:解析招标文件,生成任务清单和Word框架
|
||||
project status # 显示项目任务清单及完成状态
|
||||
```
|
||||
|
||||
### 内容生成 (generate)
|
||||
```bash
|
||||
generate task <id> # 为特定任务ID生成内容
|
||||
generate full # 自动为所有待处理任务生成内容
|
||||
```
|
||||
|
||||
### 合规组装 (assemble)
|
||||
```bash
|
||||
assemble tables # 根据任务清单在Word文档中生成响应表和偏离表
|
||||
```
|
||||
|
||||
## 开发规范
|
||||
|
||||
### 核心原则
|
||||
- 采用模板驱动的内容填充模式
|
||||
- AI Agent输出结构化JSON数据
|
||||
- Tooling Layer负责将JSON填充到Word模板
|
||||
- 严格的模块分离:cli, agent, tools
|
||||
|
||||
### Word文档处理工作流
|
||||
1. 人工制作带占位符 (如 {{chapter_content}}) 的Word模板
|
||||
2. Agent生成与占位符对应的JSON内容
|
||||
3. Word Processor和Table Generator调用python-docx完成填充
|
||||
|
||||
### 版本路线图
|
||||
- **V1.0 (MVP)**: 核心kb、project new、generate task功能
|
||||
- **V1.1 (自动化)**: generate full、assemble tables完整闭环
|
||||
- **V1.2 (增强)**: Agent自我修正、CLI体验优化
|
||||
|
||||
## 性能要求
|
||||
- 启动新项目(解析+框架生成) < 3分钟
|
||||
- 生成3000字章节 < 60秒
|
||||
|
||||
## 配置文件
|
||||
- config.yaml: 支持LLM、Embedding模型、数据库路径等核心参数配置
|
||||
- 敏感信息通过环境变量管理
|
||||
|
||||
## 功能边界
|
||||
系统不负责自动更新Word文档中的动态域(目录、页码),用户需在最终审阅时手动刷新(Ctrl+A -> F9)。
|
||||
42
config/config.yaml
Normal file
42
config/config.yaml
Normal file
@ -0,0 +1,42 @@
|
||||
# BidMaster-CLI 默认配置
|
||||
|
||||
# LLM设置
|
||||
llm:
|
||||
# 提供商: openai, azure_openai, anthropic, zhipu, qwen, deepseek, ollama
|
||||
llm_provider: "openai"
|
||||
model_name: "gpt-4"
|
||||
temperature: 0.1
|
||||
max_tokens: 4000
|
||||
|
||||
# Azure OpenAI 特定配置(使用azure_openai时需要)
|
||||
# azure_api_version: "2024-02-15-preview"
|
||||
# azure_deployment: "your-deployment-name"
|
||||
|
||||
# Ollama 配置(使用ollama时需要)
|
||||
# ollama_host: "http://localhost:11434"
|
||||
|
||||
# 向量数据库设置
|
||||
vector_db:
|
||||
persist_directory: "./data/kb"
|
||||
collection_name: "bidmaster_kb"
|
||||
|
||||
# 嵌入模型设置
|
||||
embedding:
|
||||
model_name: "text-embedding-3-small"
|
||||
chunk_size: 1000
|
||||
chunk_overlap: 200
|
||||
|
||||
# 文档处理设置
|
||||
document:
|
||||
max_file_size: 50MB
|
||||
supported_formats: [".pdf", ".docx", ".txt", ".md"]
|
||||
|
||||
# 性能设置
|
||||
performance:
|
||||
max_workers: 4
|
||||
timeout: 300
|
||||
|
||||
# 日志设置
|
||||
logging:
|
||||
level: INFO
|
||||
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
12
main.py
Normal file
12
main.py
Normal file
@ -0,0 +1,12 @@
|
||||
"""BidMaster CLI 入口点"""
|
||||
|
||||
from src.bidmaster.cli.main import cli
|
||||
|
||||
|
||||
def main():
|
||||
"""主入口函数"""
|
||||
cli()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
30
pyproject.toml
Normal file
30
pyproject.toml
Normal file
@ -0,0 +1,30 @@
|
||||
[project]
|
||||
name = "bidmaster-cli"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"chromadb>=1.1.0",
|
||||
"click>=8.3.0",
|
||||
"langchain>=0.3.27",
|
||||
"langchain-community>=0.3.29",
|
||||
"langgraph>=0.6.7",
|
||||
"openai>=1.109.1",
|
||||
"openpyxl>=3.1.5",
|
||||
"pandas>=2.3.2",
|
||||
"pydantic-settings>=2.10.1",
|
||||
"python-docx>=1.2.0",
|
||||
"requests>=2.32.5",
|
||||
"rich>=14.1.0",
|
||||
"sentence-transformers>=5.1.1",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"black>=25.9.0",
|
||||
"isort>=6.0.1",
|
||||
"pre-commit>=4.3.0",
|
||||
"pytest>=8.4.2",
|
||||
"ruff>=0.13.1",
|
||||
]
|
||||
1
src/bidmaster/__init__.py
Normal file
1
src/bidmaster/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# BidMaster-CLI 主包
|
||||
1
src/bidmaster/agents/__init__.py
Normal file
1
src/bidmaster/agents/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Agent层 - LangGraph编排
|
||||
1
src/bidmaster/agents/analysis.py
Normal file
1
src/bidmaster/agents/analysis.py
Normal file
@ -0,0 +1 @@
|
||||
# Phase 1: 分析Agent - 文档解析
|
||||
1
src/bidmaster/agents/assembly.py
Normal file
1
src/bidmaster/agents/assembly.py
Normal file
@ -0,0 +1 @@
|
||||
# Phase 3: 组装Agent - 文档组装
|
||||
1
src/bidmaster/agents/generation.py
Normal file
1
src/bidmaster/agents/generation.py
Normal file
@ -0,0 +1 @@
|
||||
# Phase 2: 生成Agent - RAG内容生成
|
||||
1
src/bidmaster/agents/graph.py
Normal file
1
src/bidmaster/agents/graph.py
Normal file
@ -0,0 +1 @@
|
||||
# LangGraph工作流编排
|
||||
1
src/bidmaster/cli/__init__.py
Normal file
1
src/bidmaster/cli/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# CLI层 - 命令行接口
|
||||
1
src/bidmaster/cli/assemble.py
Normal file
1
src/bidmaster/cli/assemble.py
Normal file
@ -0,0 +1 @@
|
||||
# 合规组装命令
|
||||
1
src/bidmaster/cli/generate.py
Normal file
1
src/bidmaster/cli/generate.py
Normal file
@ -0,0 +1 @@
|
||||
# 内容生成命令
|
||||
163
src/bidmaster/cli/kb.py
Normal file
163
src/bidmaster/cli/kb.py
Normal file
@ -0,0 +1,163 @@
|
||||
"""知识库管理命令
|
||||
|
||||
提供知识库的初始化、添加文档、查询等功能。
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from ..tools.rag import RAGTool
|
||||
|
||||
console = Console()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@click.group()
|
||||
def kb():
|
||||
"""知识库管理命令"""
|
||||
pass
|
||||
|
||||
|
||||
@kb.command()
|
||||
def init():
|
||||
"""初始化知识库"""
|
||||
try:
|
||||
rag = RAGTool()
|
||||
console.print("✅ 知识库初始化成功", style="green")
|
||||
|
||||
# 显示配置信息
|
||||
console.print(f"📁 数据库路径: {rag.chroma_path}")
|
||||
console.print(f"📚 集合名称: {rag.settings.collection_name}")
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"❌ 初始化失败: {e}", style="red")
|
||||
|
||||
|
||||
@kb.command()
|
||||
@click.argument("file_path", type=click.Path(exists=True))
|
||||
def add(file_path: str):
|
||||
"""添加文档到知识库"""
|
||||
try:
|
||||
rag = RAGTool()
|
||||
|
||||
with console.status("正在处理文档..."):
|
||||
success = rag.add_document(file_path)
|
||||
|
||||
if success:
|
||||
console.print(f"✅ 文档已添加: {file_path}", style="green")
|
||||
else:
|
||||
console.print(f"❌ 添加文档失败: {file_path}", style="red")
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"❌ 处理失败: {e}", style="red")
|
||||
|
||||
|
||||
@kb.command()
|
||||
def status():
|
||||
"""显示知识库状态"""
|
||||
try:
|
||||
rag = RAGTool()
|
||||
stats = rag.get_stats()
|
||||
|
||||
console.print("📊 知识库统计信息", style="bold blue")
|
||||
console.print(f"📄 文档数量: {stats['total_files']}")
|
||||
console.print(f"📝 文档块数: {stats['total_chunks']}")
|
||||
|
||||
if stats['files']:
|
||||
console.print("\n📁 已索引文档:")
|
||||
for file in stats['files']:
|
||||
console.print(f" • {Path(file).name}")
|
||||
else:
|
||||
console.print("\n💡 知识库为空,使用 'kb add <文件路径>' 添加文档")
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"❌ 获取状态失败: {e}", style="red")
|
||||
|
||||
|
||||
@kb.command()
|
||||
@click.argument("query")
|
||||
@click.option("--limit", "-l", default=5, help="返回结果数量")
|
||||
def search(query: str, limit: int):
|
||||
"""搜索知识库内容"""
|
||||
try:
|
||||
rag = RAGTool()
|
||||
|
||||
with console.status("正在搜索..."):
|
||||
results = rag.search(query, k=limit)
|
||||
|
||||
if not results:
|
||||
console.print("🔍 未找到相关内容", style="yellow")
|
||||
return
|
||||
|
||||
console.print(f"🔍 搜索结果 (查询: '{query}')", style="bold blue")
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
console.print(f"\n[bold]结果 {i}[/bold] (相似度: {result['score']:.3f})")
|
||||
|
||||
# 显示来源文件
|
||||
if "source" in result["metadata"]:
|
||||
source = Path(result["metadata"]["source"]).name
|
||||
console.print(f"📄 来源: {source}")
|
||||
|
||||
# 显示内容(截断长文本)
|
||||
content = result["content"]
|
||||
if len(content) > 300:
|
||||
content = content[:300] + "..."
|
||||
|
||||
console.print(f"💭 内容: {content}")
|
||||
console.print("─" * 80)
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"❌ 搜索失败: {e}", style="red")
|
||||
|
||||
|
||||
@kb.command()
|
||||
def list():
|
||||
"""列出所有已索引文档"""
|
||||
try:
|
||||
rag = RAGTool()
|
||||
stats = rag.get_stats()
|
||||
|
||||
if not stats['files']:
|
||||
console.print("📂 知识库为空", style="yellow")
|
||||
return
|
||||
|
||||
table = Table(title="📚 已索引文档")
|
||||
table.add_column("文件名", style="cyan")
|
||||
table.add_column("路径", style="dim")
|
||||
|
||||
for file_path in stats['files']:
|
||||
path_obj = Path(file_path)
|
||||
table.add_row(path_obj.name, str(path_obj.parent))
|
||||
|
||||
console.print(table)
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"❌ 获取文档列表失败: {e}", style="red")
|
||||
|
||||
|
||||
@kb.command()
|
||||
@click.confirmation_option(prompt="确定要重置知识库吗?这将删除所有数据!")
|
||||
def reset():
|
||||
"""重置知识库(危险操作)"""
|
||||
try:
|
||||
rag = RAGTool()
|
||||
|
||||
with console.status("正在重置知识库..."):
|
||||
success = rag.reset_database()
|
||||
|
||||
if success:
|
||||
console.print("✅ 知识库已重置", style="green")
|
||||
else:
|
||||
console.print("❌ 重置失败", style="red")
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"❌ 重置失败: {e}", style="red")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
kb()
|
||||
48
src/bidmaster/cli/main.py
Normal file
48
src/bidmaster/cli/main.py
Normal file
@ -0,0 +1,48 @@
|
||||
"""BidMaster CLI主入口
|
||||
|
||||
提供标书制作的完整命令行界面。
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
|
||||
from .kb import kb
|
||||
from .project import project
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version="0.1.0", prog_name="bidmaster")
|
||||
def cli():
|
||||
"""BidMaster - AI标书撰写助手
|
||||
|
||||
智能化标书制作工具,提供文档分析、内容生成、表格组装等功能。
|
||||
"""
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
|
||||
# 添加子命令组
|
||||
cli.add_command(kb, name="kb")
|
||||
cli.add_command(project, name="project")
|
||||
|
||||
|
||||
@cli.command()
|
||||
def info():
|
||||
"""显示系统信息"""
|
||||
console.print("🚀 BidMaster CLI v0.1.0", style="bold blue")
|
||||
console.print("AI标书撰写助手 - 智能化标书制作工具")
|
||||
console.print("\n📋 可用命令:")
|
||||
console.print(" kb - 知识库管理")
|
||||
console.print(" project - 项目管理和解析器测试")
|
||||
console.print(" info - 显示系统信息")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
214
src/bidmaster/cli/project.py
Normal file
214
src/bidmaster/cli/project.py
Normal file
@ -0,0 +1,214 @@
|
||||
"""项目管理命令
|
||||
|
||||
提供项目创建、状态查看、解析器测试等功能。
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.panel import Panel
|
||||
|
||||
from ..tools.parser import BidParser
|
||||
|
||||
console = Console()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@click.group()
|
||||
def project():
|
||||
"""项目管理命令"""
|
||||
pass
|
||||
|
||||
|
||||
@project.command()
|
||||
@click.argument("scoring_file", type=click.Path(exists=True))
|
||||
@click.option("--deviation-file", "-d", type=click.Path(exists=True), help="偏离表文件(可选)")
|
||||
@click.option("--template-file", "-t", type=click.Path(exists=True), help="模板文件(可选)")
|
||||
def parse(scoring_file: str, deviation_file: str | None, template_file: str | None):
|
||||
"""测试解析器功能"""
|
||||
try:
|
||||
console.print("🔍 开始解析招标文件...", style="blue")
|
||||
|
||||
# 创建解析器
|
||||
parser = BidParser()
|
||||
|
||||
# 执行解析
|
||||
with console.status("正在解析文件..."):
|
||||
bid_structure = parser.parse_bid_requirements(
|
||||
scoring_file=scoring_file,
|
||||
deviation_file=deviation_file,
|
||||
template_file=template_file
|
||||
)
|
||||
|
||||
# 显示解析结果
|
||||
console.print("✅ 解析完成!", style="green")
|
||||
|
||||
# 显示评分标准
|
||||
if bid_structure.scoring_criteria:
|
||||
console.print("\n📊 评分标准:")
|
||||
table = Table(title="评分标准解析结果")
|
||||
table.add_column("技术类别", style="green")
|
||||
table.add_column("评分项", style="yellow")
|
||||
table.add_column("分值", justify="right", style="magenta")
|
||||
table.add_column("章节ID", style="cyan")
|
||||
table.add_column("描述", style="dim")
|
||||
|
||||
for criteria in bid_structure.scoring_criteria:
|
||||
category_name = {
|
||||
"technical_solution": "技术方案",
|
||||
"equipment_spec": "设备配置",
|
||||
"implementation": "实施方案",
|
||||
"quality_safety": "质量安全",
|
||||
"after_sales": "售后服务",
|
||||
"compliance": "技术合规",
|
||||
"commercial": "商务条件",
|
||||
"other": "其他"
|
||||
}.get(criteria.category.value, "其他")
|
||||
|
||||
table.add_row(
|
||||
category_name,
|
||||
criteria.item_name,
|
||||
str(criteria.max_score),
|
||||
criteria.chapter_id,
|
||||
criteria.description[:40] + "..." if len(criteria.description) > 40 else criteria.description
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
|
||||
# 显示偏离项(如果有)
|
||||
if bid_structure.deviation_items:
|
||||
console.print("\n📋 偏离表项目:")
|
||||
for i, item in enumerate(bid_structure.deviation_items[:5], 1): # 只显示前5项
|
||||
console.print(f" {i}. [{item.chapter_id}] {item.requirement[:60]}...")
|
||||
|
||||
if len(bid_structure.deviation_items) > 5:
|
||||
console.print(f" ... 还有{len(bid_structure.deviation_items) - 5}项")
|
||||
|
||||
# 显示模板章节(如果有)
|
||||
if bid_structure.chapters:
|
||||
console.print("\n📚 模板章节:")
|
||||
for chapter in bid_structure.chapters:
|
||||
indent = " " * chapter.level
|
||||
console.print(f"{indent}• {chapter.title}")
|
||||
|
||||
# 显示统计信息
|
||||
stats_panel = Panel(
|
||||
f"📄 评分项数量: {len(bid_structure.scoring_criteria)}\n"
|
||||
f"📋 偏离项数量: {len(bid_structure.deviation_items)}\n"
|
||||
f"📚 章节数量: {len(bid_structure.chapters)}",
|
||||
title="解析统计",
|
||||
title_align="left"
|
||||
)
|
||||
console.print("\n", stats_panel)
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"❌ 解析失败: {e}", style="red")
|
||||
|
||||
|
||||
@project.command()
|
||||
@click.argument("word_file", type=click.Path(exists=True))
|
||||
def smart_parse(word_file: str):
|
||||
"""智能解析Word文档(自动识别评分表和偏离表)"""
|
||||
try:
|
||||
console.print("🔍 开始智能解析Word文档...", style="blue")
|
||||
|
||||
# 检查文件格式
|
||||
if not word_file.lower().endswith('.docx'):
|
||||
console.print("❌ 只支持Word(.docx)格式文件", style="red")
|
||||
return
|
||||
|
||||
# 创建解析器
|
||||
parser = BidParser()
|
||||
|
||||
# 智能解析单个Word文档(区分技术和商务)
|
||||
with console.status("正在智能识别和解析表格..."):
|
||||
technical_criteria, commercial_criteria, deviation_items = parser.parse_word_with_filter(word_file)
|
||||
|
||||
if not technical_criteria and not commercial_criteria and not deviation_items:
|
||||
console.print("❌ 未找到有效的评分表或偏离表", style="red")
|
||||
return
|
||||
|
||||
# 创建标书结构(只使用技术部分)
|
||||
from ..tools.parser import BidStructure
|
||||
bid_structure = BidStructure()
|
||||
bid_structure.scoring_criteria = technical_criteria # 只使用技术部分
|
||||
bid_structure.deviation_items = deviation_items
|
||||
|
||||
# 生成专业目录结构(基于技术部分)
|
||||
bid_structure.chapters = parser._generate_professional_chapters(technical_criteria)
|
||||
parser._map_criteria_to_chapters(bid_structure)
|
||||
|
||||
# 显示解析结果
|
||||
console.print("✅ 智能解析完成!", style="green")
|
||||
|
||||
# 显示商务部分(如果有,但说明不包含在技术目录中)
|
||||
if commercial_criteria:
|
||||
console.print(f"\n💰 识别到商务评分项(共{len(commercial_criteria)}项,已排除):", style="yellow")
|
||||
for item in commercial_criteria[:3]:
|
||||
console.print(f" • {item.item_name} ({item.max_score}分)")
|
||||
if len(commercial_criteria) > 3:
|
||||
console.print(f" • ... 还有{len(commercial_criteria) - 3}项")
|
||||
|
||||
# 显示技术评分标准
|
||||
if bid_structure.scoring_criteria:
|
||||
console.print(f"\n📊 技术评分标准(共{len(technical_criteria)}项,用于目录生成):")
|
||||
table = Table(title="技术评分标准解析结果")
|
||||
table.add_column("技术类别", style="green")
|
||||
table.add_column("评分项", style="yellow")
|
||||
table.add_column("分值", justify="right", style="magenta")
|
||||
table.add_column("章节ID", style="cyan")
|
||||
|
||||
for criteria in bid_structure.scoring_criteria:
|
||||
category_name = {
|
||||
"technical_solution": "技术方案",
|
||||
"equipment_spec": "设备配置",
|
||||
"implementation": "实施方案",
|
||||
"quality_safety": "质量安全",
|
||||
"after_sales": "售后服务",
|
||||
"compliance": "技术合规",
|
||||
"commercial": "商务条件",
|
||||
"other": "其他"
|
||||
}.get(criteria.category.value, "其他")
|
||||
|
||||
table.add_row(
|
||||
category_name,
|
||||
criteria.item_name,
|
||||
str(criteria.max_score),
|
||||
criteria.chapter_id
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
|
||||
# 显示偏离项
|
||||
if bid_structure.deviation_items:
|
||||
console.print(f"\n📋 识别到的偏离表项目: {len(bid_structure.deviation_items)}项")
|
||||
for i, item in enumerate(bid_structure.deviation_items[:3], 1):
|
||||
console.print(f" {i}. {item.requirement[:50]}..." + f" [{item.response_type}]")
|
||||
|
||||
if len(bid_structure.deviation_items) > 3:
|
||||
console.print(f" ... 还有{len(bid_structure.deviation_items) - 3}项")
|
||||
|
||||
# 显示生成的目录结构
|
||||
console.print("\n📚 生成的目录结构:")
|
||||
for chapter in bid_structure.chapters:
|
||||
console.print(f" {chapter.title}")
|
||||
|
||||
# 显示统计信息
|
||||
stats_panel = Panel(
|
||||
f"📄 评分项数量: {len(bid_structure.scoring_criteria)}\\n"
|
||||
f"📋 偏离项数量: {len(bid_structure.deviation_items)}\\n"
|
||||
f"📚 章节数量: {len(bid_structure.chapters)}",
|
||||
title="智能解析统计",
|
||||
title_align="left"
|
||||
)
|
||||
console.print("\\n", stats_panel)
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"❌ 智能解析失败: {e}", style="red")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
project()
|
||||
5
src/bidmaster/config/__init__.py
Normal file
5
src/bidmaster/config/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""配置模块"""
|
||||
|
||||
from .settings import Settings, get_settings
|
||||
|
||||
__all__ = ["Settings", "get_settings"]
|
||||
163
src/bidmaster/config/settings.py
Normal file
163
src/bidmaster/config/settings.py
Normal file
@ -0,0 +1,163 @@
|
||||
"""BidMaster-CLI配置管理模块
|
||||
|
||||
使用Pydantic Settings管理配置,支持三层配置:
|
||||
1. 默认值(代码中定义)
|
||||
2. 配置文件(config.yaml)
|
||||
3. 环境变量(优先级最高)
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
from enum import Enum
|
||||
|
||||
import yaml
|
||||
from pydantic import Field, validator
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class LLMProvider(str, Enum):
|
||||
"""支持的LLM提供商"""
|
||||
OPENAI = "openai"
|
||||
AZURE_OPENAI = "azure_openai"
|
||||
ANTHROPIC = "anthropic"
|
||||
ZHIPU = "zhipu"
|
||||
QWEN = "qwen"
|
||||
DEEPSEEK = "deepseek"
|
||||
OLLAMA = "ollama"
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""BidMaster配置类"""
|
||||
|
||||
# LLM配置
|
||||
llm_provider: LLMProvider = Field(default=LLMProvider.OPENAI, description="LLM提供商")
|
||||
api_key: str = Field(..., description="LLM API密钥")
|
||||
base_url: str | None = Field(default=None, description="API基础URL,None时使用默认")
|
||||
model_name: str = Field(default="gpt-4", description="LLM模型名称")
|
||||
temperature: float = Field(default=0.1, description="生成温度")
|
||||
max_tokens: int = Field(default=4000, description="最大token数")
|
||||
|
||||
# Azure OpenAI特定配置
|
||||
azure_api_version: str | None = Field(default=None, description="Azure API版本")
|
||||
azure_deployment: str | None = Field(default=None, description="Azure部署名称")
|
||||
|
||||
# 本地模型配置
|
||||
ollama_host: str = Field(default="http://localhost:11434", description="Ollama服务地址")
|
||||
|
||||
# 向量数据库配置
|
||||
chroma_path: str = Field(default="./data/kb", description="ChromaDB存储路径")
|
||||
collection_name: str = Field(default="bidmaster_kb", description="集合名称")
|
||||
|
||||
# 嵌入模型配置
|
||||
embedding_model: str = Field(default="text-embedding-3-small", description="嵌入模型")
|
||||
chunk_size: int = Field(default=1000, description="文档块大小")
|
||||
chunk_overlap: int = Field(default=200, description="块重叠大小")
|
||||
|
||||
# 性能配置
|
||||
max_workers: int = Field(default=4, description="最大工作线程数")
|
||||
timeout: int = Field(default=300, description="超时时间(秒)")
|
||||
|
||||
# 日志配置
|
||||
log_level: str = Field(default="INFO", description="日志级别")
|
||||
log_format: str = Field(
|
||||
default="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
description="日志格式"
|
||||
)
|
||||
|
||||
# 文档处理配置
|
||||
max_file_size: str = Field(default="50MB", description="最大文件大小")
|
||||
supported_formats: list[str] = Field(
|
||||
default=[".pdf", ".docx", ".txt", ".md"],
|
||||
description="支持的文档格式"
|
||||
)
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_prefix = "BIDMASTER_"
|
||||
extra = "ignore"
|
||||
|
||||
@validator("base_url", always=True)
|
||||
def validate_base_url(cls, v: str | None, values: dict) -> str:
|
||||
"""根据提供商设置默认base_url"""
|
||||
if v is not None:
|
||||
return v
|
||||
|
||||
provider = values.get("llm_provider", LLMProvider.OPENAI)
|
||||
default_urls = {
|
||||
LLMProvider.OPENAI: "https://api.openai.com/v1",
|
||||
LLMProvider.ANTHROPIC: "https://api.anthropic.com",
|
||||
LLMProvider.ZHIPU: "https://open.bigmodel.cn/api/paas/v4",
|
||||
LLMProvider.QWEN: "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
LLMProvider.DEEPSEEK: "https://api.deepseek.com",
|
||||
LLMProvider.OLLAMA: "http://localhost:11434/v1",
|
||||
}
|
||||
|
||||
return default_urls.get(provider, "https://api.openai.com/v1")
|
||||
|
||||
@validator("model_name", always=True)
|
||||
def validate_model_name(cls, v: str, values: dict) -> str:
|
||||
"""根据提供商设置默认模型名称"""
|
||||
if v != "gpt-4": # 用户已指定模型
|
||||
return v
|
||||
|
||||
provider = values.get("llm_provider", LLMProvider.OPENAI)
|
||||
default_models = {
|
||||
LLMProvider.OPENAI: "gpt-4",
|
||||
LLMProvider.ANTHROPIC: "claude-3-5-sonnet-20241022",
|
||||
LLMProvider.ZHIPU: "glm-4",
|
||||
LLMProvider.QWEN: "qwen-max",
|
||||
LLMProvider.DEEPSEEK: "deepseek-chat",
|
||||
LLMProvider.OLLAMA: "llama3.1:8b",
|
||||
}
|
||||
|
||||
return default_models.get(provider, "gpt-4")
|
||||
|
||||
@classmethod
|
||||
def load_from_yaml(cls, config_path: Path | str = "config/config.yaml") -> "Settings":
|
||||
"""从YAML文件加载配置"""
|
||||
config_path = Path(config_path)
|
||||
|
||||
if not config_path.exists():
|
||||
raise FileNotFoundError(f"配置文件不存在: {config_path}")
|
||||
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
yaml_config = yaml.safe_load(f)
|
||||
|
||||
# 展平嵌套配置
|
||||
flat_config = _flatten_dict(yaml_config)
|
||||
|
||||
# 合并环境变量
|
||||
return cls(**flat_config)
|
||||
|
||||
|
||||
def _flatten_dict(nested_dict: dict[str, Any], prefix: str = "") -> dict[str, Any]:
|
||||
"""展平嵌套字典"""
|
||||
flat = {}
|
||||
|
||||
for key, value in nested_dict.items():
|
||||
new_key = f"{prefix}_{key}" if prefix else key
|
||||
|
||||
if isinstance(value, dict):
|
||||
flat.update(_flatten_dict(value, new_key))
|
||||
else:
|
||||
flat[new_key] = value
|
||||
|
||||
return flat
|
||||
|
||||
|
||||
# 全局配置实例
|
||||
_settings_instance: Settings | None = None
|
||||
|
||||
|
||||
def get_settings() -> Settings:
|
||||
"""获取全局配置实例(单例模式)"""
|
||||
global _settings_instance
|
||||
|
||||
if _settings_instance is None:
|
||||
try:
|
||||
_settings_instance = Settings.load_from_yaml()
|
||||
except FileNotFoundError:
|
||||
# 配置文件不存在时使用环境变量
|
||||
_settings_instance = Settings()
|
||||
|
||||
return _settings_instance
|
||||
13
src/bidmaster/models/__init__.py
Normal file
13
src/bidmaster/models/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
"""数据模型模块"""
|
||||
|
||||
from .task import Task, TaskStatus, TaskType
|
||||
from .project import Project, ProjectStatus, DocumentChapter
|
||||
|
||||
__all__ = [
|
||||
"Task",
|
||||
"TaskStatus",
|
||||
"TaskType",
|
||||
"Project",
|
||||
"ProjectStatus",
|
||||
"DocumentChapter",
|
||||
]
|
||||
81
src/bidmaster/models/project.py
Normal file
81
src/bidmaster/models/project.py
Normal file
@ -0,0 +1,81 @@
|
||||
"""项目数据模型
|
||||
|
||||
定义标书项目的数据结构,包括项目信息、任务清单、文档结构等。
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .task import Task
|
||||
|
||||
|
||||
class ProjectStatus(str, Enum):
|
||||
"""项目状态"""
|
||||
CREATED = "created"
|
||||
ANALYZING = "analyzing"
|
||||
GENERATING = "generating"
|
||||
COMPLETED = "completed"
|
||||
|
||||
|
||||
class DocumentChapter(BaseModel):
|
||||
"""文档章节结构"""
|
||||
|
||||
id: str = Field(..., description="章节ID")
|
||||
title: str = Field(..., description="章节标题")
|
||||
level: int = Field(..., description="章节层级")
|
||||
requirements: str = Field(default="", description="章节内容要求")
|
||||
word_count: int = Field(default=0, description="预期字数")
|
||||
is_generated: bool = Field(default=False, description="是否已生成内容")
|
||||
|
||||
|
||||
class Project(BaseModel):
|
||||
"""项目模型"""
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, description="项目唯一标识")
|
||||
name: str = Field(..., description="项目名称")
|
||||
description: str = Field(default="", description="项目描述")
|
||||
status: ProjectStatus = Field(default=ProjectStatus.CREATED, description="项目状态")
|
||||
|
||||
# 文件路径
|
||||
source_file: str = Field(..., description="招标文件路径")
|
||||
work_dir: str = Field(..., description="工作目录")
|
||||
|
||||
# 文档结构
|
||||
chapters: list[DocumentChapter] = Field(default_factory=list, description="文档章节结构")
|
||||
tasks: list[Task] = Field(default_factory=list, description="任务清单")
|
||||
|
||||
# 时间戳
|
||||
created_at: datetime = Field(default_factory=datetime.now, description="创建时间")
|
||||
updated_at: datetime = Field(default_factory=datetime.now, description="更新时间")
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
def add_task(self, task: Task) -> None:
|
||||
"""添加任务"""
|
||||
self.tasks.append(task)
|
||||
self.updated_at = datetime.now()
|
||||
|
||||
def get_task_by_id(self, task_id: UUID) -> Task | None:
|
||||
"""根据ID获取任务"""
|
||||
for task in self.tasks:
|
||||
if task.id == task_id:
|
||||
return task
|
||||
return None
|
||||
|
||||
def get_pending_tasks(self) -> list[Task]:
|
||||
"""获取待处理任务"""
|
||||
return [task for task in self.tasks if task.status.value == "pending"]
|
||||
|
||||
@property
|
||||
def progress(self) -> float:
|
||||
"""获取项目进度(0-1)"""
|
||||
if not self.tasks:
|
||||
return 0.0
|
||||
completed = len([t for t in self.tasks if t.status.value == "completed"])
|
||||
return completed / len(self.tasks)
|
||||
70
src/bidmaster/models/task.py
Normal file
70
src/bidmaster/models/task.py
Normal file
@ -0,0 +1,70 @@
|
||||
"""任务数据模型
|
||||
|
||||
定义标书制作过程中的任务结构,包括任务状态、内容要求等。
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TaskStatus(str, Enum):
|
||||
"""任务状态"""
|
||||
PENDING = "pending"
|
||||
IN_PROGRESS = "in_progress"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class TaskType(str, Enum):
|
||||
"""任务类型"""
|
||||
ANALYSIS = "analysis" # 文档分析
|
||||
CONTENT = "content" # 内容生成
|
||||
TABLE = "table" # 表格生成
|
||||
ASSEMBLY = "assembly" # 文档组装
|
||||
|
||||
|
||||
class Task(BaseModel):
|
||||
"""任务模型"""
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, description="任务唯一标识")
|
||||
title: str = Field(..., description="任务标题")
|
||||
description: str = Field(default="", description="任务描述")
|
||||
task_type: TaskType = Field(..., description="任务类型")
|
||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="任务状态")
|
||||
|
||||
# 任务内容要求
|
||||
requirements: dict[str, Any] = Field(default_factory=dict, description="任务要求")
|
||||
chapter_id: str | None = Field(default=None, description="关联章节ID")
|
||||
|
||||
# 执行结果
|
||||
output: dict[str, Any] = Field(default_factory=dict, description="任务输出")
|
||||
error_message: str | None = Field(default=None, description="错误信息")
|
||||
|
||||
# 时间戳
|
||||
created_at: datetime = Field(default_factory=datetime.now, description="创建时间")
|
||||
updated_at: datetime = Field(default_factory=datetime.now, description="更新时间")
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
def mark_in_progress(self) -> None:
|
||||
"""标记任务为进行中"""
|
||||
self.status = TaskStatus.IN_PROGRESS
|
||||
self.updated_at = datetime.now()
|
||||
|
||||
def mark_completed(self, output: dict[str, Any] | None = None) -> None:
|
||||
"""标记任务为完成"""
|
||||
self.status = TaskStatus.COMPLETED
|
||||
self.updated_at = datetime.now()
|
||||
if output:
|
||||
self.output.update(output)
|
||||
|
||||
def mark_failed(self, error_message: str) -> None:
|
||||
"""标记任务为失败"""
|
||||
self.status = TaskStatus.FAILED
|
||||
self.error_message = error_message
|
||||
self.updated_at = datetime.now()
|
||||
1
src/bidmaster/tools/__init__.py
Normal file
1
src/bidmaster/tools/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# 工具层 - 原子化工具集
|
||||
751
src/bidmaster/tools/parser.py
Normal file
751
src/bidmaster/tools/parser.py
Normal file
@ -0,0 +1,751 @@
|
||||
"""招标文件解析器
|
||||
|
||||
解析评分要求表格、偏离表要求,结合Word模板生成标书结构。
|
||||
支持Excel、CSV、Word表格格式。
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, List
|
||||
from enum import Enum
|
||||
|
||||
import pandas as pd
|
||||
from docx import Document
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..config import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TechnicalCategory(str, Enum):
|
||||
"""评分类别"""
|
||||
TECHNICAL_SOLUTION = "technical_solution" # 技术方案完整性
|
||||
EQUIPMENT_SPEC = "equipment_spec" # 设备规格和可靠性
|
||||
IMPLEMENTATION = "implementation" # 实施方案
|
||||
QUALITY_SAFETY = "quality_safety" # 质量安全体系
|
||||
AFTER_SALES = "after_sales" # 售后服务
|
||||
COMPLIANCE = "compliance" # 合规性响应
|
||||
COMMERCIAL = "commercial" # 商务条件(价格、资质、商务条款等)
|
||||
OTHER = "other" # 其他类别
|
||||
|
||||
|
||||
class ScoringCriteria(BaseModel):
|
||||
"""评分标准"""
|
||||
|
||||
item_name: str = Field(..., description="评分项名称")
|
||||
max_score: float = Field(..., description="最高分值")
|
||||
description: str = Field(default="", description="评分要求描述")
|
||||
category: TechnicalCategory = Field(default=TechnicalCategory.OTHER, description="技术类别")
|
||||
chapter_id: str = Field(..., description="对应章节ID")
|
||||
|
||||
|
||||
class DeviationItem(BaseModel):
|
||||
"""偏离项"""
|
||||
|
||||
requirement: str = Field(..., description="招标要求")
|
||||
response_type: str = Field(default="正偏离", description="响应类型")
|
||||
chapter_id: str = Field(..., description="对应章节ID")
|
||||
|
||||
|
||||
class DocumentChapter(BaseModel):
|
||||
"""文档章节"""
|
||||
|
||||
id: str = Field(..., description="章节ID")
|
||||
title: str = Field(..., description="章节标题")
|
||||
level: int = Field(..., description="章节层级")
|
||||
template_placeholder: str | None = Field(default=None, description="模板占位符")
|
||||
|
||||
|
||||
class ChapterTemplate:
|
||||
"""标准章节模板"""
|
||||
|
||||
STANDARD_CHAPTERS = {
|
||||
"evaluation_index": {
|
||||
"id": "eval_index",
|
||||
"title": "1. 评标索引表(技术评分完全对应)",
|
||||
"level": 1,
|
||||
"placeholder": "{{evaluation_index_content}}"
|
||||
},
|
||||
"compliance_response": {
|
||||
"id": "compliance",
|
||||
"title": "2. 实质性响应/星号条款偏离表",
|
||||
"level": 1,
|
||||
"placeholder": "{{compliance_response_content}}"
|
||||
},
|
||||
"technical_solution": {
|
||||
"id": "tech_solution",
|
||||
"title": "3. 总体技术方案",
|
||||
"level": 1,
|
||||
"placeholder": "{{technical_solution_content}}"
|
||||
},
|
||||
"equipment_spec": {
|
||||
"id": "equipment",
|
||||
"title": "4. 关键设备规格书及检测报告",
|
||||
"level": 1,
|
||||
"placeholder": "{{equipment_spec_content}}"
|
||||
},
|
||||
"implementation": {
|
||||
"id": "implementation",
|
||||
"title": "5. 项目实施与交付计划",
|
||||
"level": 1,
|
||||
"placeholder": "{{implementation_content}}"
|
||||
},
|
||||
"quality_system": {
|
||||
"id": "quality",
|
||||
"title": "6. 质量、安全、环境体系",
|
||||
"level": 1,
|
||||
"placeholder": "{{quality_system_content}}"
|
||||
},
|
||||
"after_sales": {
|
||||
"id": "after_sales",
|
||||
"title": "7. 运维服务及备品备件",
|
||||
"level": 1,
|
||||
"placeholder": "{{after_sales_content}}"
|
||||
},
|
||||
"contract_delivery": {
|
||||
"id": "delivery",
|
||||
"title": "8. 验收与绩效考核对应表",
|
||||
"level": 1,
|
||||
"placeholder": "{{contract_delivery_content}}"
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_category_chapter_mapping(cls) -> dict[TechnicalCategory, str]:
|
||||
"""获取技术类别到章节的映射"""
|
||||
return {
|
||||
TechnicalCategory.TECHNICAL_SOLUTION: "technical_solution",
|
||||
TechnicalCategory.EQUIPMENT_SPEC: "equipment_spec",
|
||||
TechnicalCategory.IMPLEMENTATION: "implementation",
|
||||
TechnicalCategory.QUALITY_SAFETY: "quality_system",
|
||||
TechnicalCategory.AFTER_SALES: "after_sales",
|
||||
TechnicalCategory.COMPLIANCE: "compliance_response",
|
||||
TechnicalCategory.OTHER: "technical_solution"
|
||||
}
|
||||
|
||||
|
||||
class BidStructure(BaseModel):
|
||||
"""标书结构"""
|
||||
|
||||
project_name: str = Field(default="", description="项目名称")
|
||||
scoring_criteria: List[ScoringCriteria] = Field(default_factory=list, description="评分标准")
|
||||
deviation_items: List[DeviationItem] = Field(default_factory=list, description="偏离项")
|
||||
chapters: List[DocumentChapter] = Field(default_factory=list, description="文档章节")
|
||||
|
||||
# 文件路径
|
||||
scoring_file: str = Field(default="", description="评分要求文件路径")
|
||||
deviation_file: str = Field(default="", description="偏离表文件路径")
|
||||
template_file: str = Field(default="", description="模板文件路径")
|
||||
|
||||
|
||||
class BidParser:
|
||||
"""招标文件解析器"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.settings = get_settings()
|
||||
|
||||
def parse_bid_requirements(
|
||||
self,
|
||||
scoring_file: str,
|
||||
deviation_file: str | None = None,
|
||||
template_file: str | None = None
|
||||
) -> BidStructure:
|
||||
"""解析招标要求并生成标书结构"""
|
||||
try:
|
||||
# 验证文件存在
|
||||
self._validate_files(scoring_file, deviation_file, template_file)
|
||||
|
||||
# 创建标书结构
|
||||
bid_structure = BidStructure(
|
||||
scoring_file=scoring_file,
|
||||
deviation_file=deviation_file or "",
|
||||
template_file=template_file or ""
|
||||
)
|
||||
|
||||
# 解析评分要求
|
||||
bid_structure.scoring_criteria = self._parse_scoring_file(scoring_file)
|
||||
|
||||
# 解析偏离表要求(可选)
|
||||
if deviation_file:
|
||||
bid_structure.deviation_items = self._parse_deviation_file(deviation_file)
|
||||
|
||||
# 解析Word模板(可选)
|
||||
if template_file:
|
||||
bid_structure.chapters = self._parse_template_file(template_file)
|
||||
else:
|
||||
# 生成专业目录结构
|
||||
bid_structure.chapters = self._generate_professional_chapters(bid_structure.scoring_criteria)
|
||||
|
||||
# 重新映射评分项到对应章节
|
||||
self._map_criteria_to_chapters(bid_structure)
|
||||
|
||||
|
||||
return bid_structure
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析招标要求失败: {e}")
|
||||
raise
|
||||
|
||||
def _validate_files(self, scoring_file: str, deviation_file: str | None, template_file: str | None) -> None:
|
||||
"""验证文件存在"""
|
||||
# 评分文件是必需的
|
||||
if not Path(scoring_file).exists():
|
||||
raise FileNotFoundError(f"评分要求文件不存在: {scoring_file}")
|
||||
|
||||
# 偏离表和模板文件是可选的
|
||||
if deviation_file and not Path(deviation_file).exists():
|
||||
raise FileNotFoundError(f"偏离表文件不存在: {deviation_file}")
|
||||
|
||||
if template_file and not Path(template_file).exists():
|
||||
raise FileNotFoundError(f"模板文件不存在: {template_file}")
|
||||
|
||||
def _parse_scoring_file(self, file_path: str) -> List[ScoringCriteria]:
|
||||
"""解析评分要求文件"""
|
||||
try:
|
||||
file_path_obj = Path(file_path)
|
||||
suffix = file_path_obj.suffix.lower()
|
||||
|
||||
if suffix in ['.xlsx', '.xls']:
|
||||
return self._parse_excel_scoring(file_path)
|
||||
elif suffix == '.csv':
|
||||
return self._parse_csv_scoring(file_path)
|
||||
elif suffix == '.docx':
|
||||
return self._parse_word_table_scoring(file_path)
|
||||
else:
|
||||
raise ValueError(f"不支持的评分文件格式: {suffix}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析评分文件失败: {e}")
|
||||
raise
|
||||
|
||||
def _parse_excel_scoring(self, file_path: str) -> List[ScoringCriteria]:
|
||||
"""解析Excel评分文件"""
|
||||
df = pd.read_excel(file_path)
|
||||
return self._parse_dataframe_scoring(df)
|
||||
|
||||
def _parse_csv_scoring(self, file_path: str) -> List[ScoringCriteria]:
|
||||
"""解析CSV评分文件"""
|
||||
df = pd.read_csv(file_path, encoding='utf-8-sig')
|
||||
return self._parse_dataframe_scoring(df)
|
||||
|
||||
def _parse_dataframe_scoring(self, df: pd.DataFrame) -> List[ScoringCriteria]:
|
||||
"""从DataFrame解析评分标准的公共逻辑"""
|
||||
# 预期的列名(支持中英文)
|
||||
column_mapping = {
|
||||
'评分项': 'item_name',
|
||||
'评分项目': 'item_name',
|
||||
'项目': 'item_name',
|
||||
'分值': 'max_score',
|
||||
'最高分': 'max_score',
|
||||
'满分': 'max_score',
|
||||
'描述': 'description',
|
||||
'要求': 'description',
|
||||
'评分要求': 'description'
|
||||
}
|
||||
|
||||
# 找到对应的列
|
||||
columns = {}
|
||||
for col in df.columns:
|
||||
for key, value in column_mapping.items():
|
||||
if key in str(col):
|
||||
columns[value] = col
|
||||
break
|
||||
|
||||
if 'item_name' not in columns or 'max_score' not in columns:
|
||||
raise ValueError("文件缺少必要的列:评分项、分值")
|
||||
|
||||
criteria = []
|
||||
for i, row in df.iterrows():
|
||||
if pd.isna(row[columns['item_name']]):
|
||||
continue
|
||||
|
||||
criterion = ScoringCriteria(
|
||||
item_name=str(row[columns['item_name']]).strip(),
|
||||
max_score=float(row[columns['max_score']]),
|
||||
description=str(row.get(columns.get('description', ''), '')).strip(),
|
||||
category=TechnicalCategory.OTHER, # 表格解析默认为OTHER,AI会重新分类
|
||||
chapter_id=f"chapter_{i+1:02d}"
|
||||
)
|
||||
criteria.append(criterion)
|
||||
|
||||
return criteria
|
||||
|
||||
def _parse_word_table_scoring(self, file_path: str) -> List[ScoringCriteria]:
|
||||
"""使用AI解析Word文档中的评分表格"""
|
||||
doc = Document(file_path)
|
||||
criteria = []
|
||||
|
||||
for table in doc.tables:
|
||||
if len(table.rows) < 2:
|
||||
continue
|
||||
|
||||
# 提取表格内容为文本
|
||||
table_text = self._extract_table_text(table)
|
||||
|
||||
# 识别表格类型
|
||||
table_type = self._identify_table_type(table_text)
|
||||
|
||||
if table_type == "scoring":
|
||||
# 使用AI解析评分表格
|
||||
ai_results = self._ai_parse_scoring_table(table_text)
|
||||
if ai_results:
|
||||
criteria.extend(ai_results)
|
||||
|
||||
return criteria
|
||||
|
||||
def _extract_table_text(self, table) -> str:
|
||||
"""提取表格内容为文本格式"""
|
||||
lines = []
|
||||
|
||||
for i, row in enumerate(table.rows):
|
||||
cells = [cell.text.strip() for cell in row.cells]
|
||||
# 使用制表符分隔,便于AI理解
|
||||
line = "\t".join(cells)
|
||||
lines.append(f"行{i+1}: {line}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _ai_parse_scoring_table(self, table_text: str) -> List[ScoringCriteria]:
|
||||
"""使用AI解析评分表格"""
|
||||
try:
|
||||
prompt = f"""
|
||||
请提取表格中的评分项和分值,并智能分类,返回JSON。
|
||||
|
||||
表格内容:
|
||||
{table_text}
|
||||
|
||||
要求:
|
||||
1. 提取评分项名称和分值
|
||||
2. 描述字段用简短概括(不超过30字)
|
||||
3. 根据评分项内容智能分类,重点区分商务和技术:
|
||||
|
||||
**技术类别:**
|
||||
- technical_solution: 技术方案、技术完整性、技术先进性、技术路线
|
||||
- equipment_spec: 设备规格、产品参数、设备可靠性、技术指标
|
||||
- implementation: 项目实施、施工方案、进度计划、实施能力
|
||||
- quality_safety: 质量管理、安全管理、环境管理、质量体系
|
||||
- after_sales: 售后服务、维保服务、培训服务、技术支持
|
||||
- compliance: 技术资质、认证证书、技术合规性
|
||||
|
||||
**商务类别:**
|
||||
- commercial: 价格评分、报价、商务条件、企业资质、财务状况、业绩、投标保证金、商务合规性
|
||||
|
||||
**其他:**
|
||||
- other: 无法明确分类的项目
|
||||
|
||||
格式:
|
||||
{{
|
||||
"scoring_criteria": [
|
||||
{{"item_name": "报价", "max_score": 30, "description": "价格评分标准", "category": "commercial"}},
|
||||
{{"item_name": "技术方案", "max_score": 40, "description": "技术方案评分", "category": "technical_solution"}},
|
||||
{{"item_name": "企业资质", "max_score": 10, "description": "企业资质证明", "category": "commercial"}},
|
||||
{{"item_name": "设备参数", "max_score": 20, "description": "设备技术指标", "category": "equipment_spec"}}
|
||||
]
|
||||
}}
|
||||
|
||||
只返回JSON,无其他文字:"""
|
||||
|
||||
# 调用LLM API
|
||||
response = self._call_llm_api(prompt)
|
||||
|
||||
if not response:
|
||||
raise ValueError("AI解析表格失败:无响应")
|
||||
|
||||
# 解析AI响应
|
||||
try:
|
||||
# 直接解析JSON,失败就抛出异常
|
||||
result_data = json.loads(response)
|
||||
|
||||
scoring_data = result_data.get("scoring_criteria", [])
|
||||
|
||||
criteria = []
|
||||
for i, item in enumerate(scoring_data):
|
||||
# 验证和转换类别
|
||||
category = item.get("category", "other")
|
||||
try:
|
||||
category_enum = TechnicalCategory(category)
|
||||
except ValueError:
|
||||
category_enum = TechnicalCategory.OTHER
|
||||
|
||||
criterion = ScoringCriteria(
|
||||
item_name=item.get("item_name", ""),
|
||||
max_score=float(item.get("max_score", 0)),
|
||||
description=item.get("description", ""),
|
||||
category=category_enum,
|
||||
chapter_id=f"chapter_{i+1:02d}"
|
||||
)
|
||||
criteria.append(criterion)
|
||||
|
||||
return criteria
|
||||
|
||||
except (json.JSONDecodeError, ValueError, KeyError) as e:
|
||||
logger.error(f"解析AI响应失败: {e}")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI解析表格失败: {e}")
|
||||
return []
|
||||
|
||||
def _call_llm_api(self, prompt: str) -> str | None:
|
||||
"""调用LLM API"""
|
||||
try:
|
||||
# 使用OpenAI SDK调用DeepSeek
|
||||
client = OpenAI(
|
||||
api_key=self.settings.api_key,
|
||||
base_url=self.settings.base_url
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=self.settings.model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
],
|
||||
temperature=self.settings.temperature,
|
||||
max_tokens=self.settings.max_tokens
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM API调用异常: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _identify_table_type(self, table_text: str) -> str:
|
||||
"""使用AI识别表格类型"""
|
||||
prompt = f"""
|
||||
分析表格内容,判断这是什么类型的表格。
|
||||
|
||||
表格内容:
|
||||
{table_text}
|
||||
|
||||
请判断这个表格属于以下哪种类型:
|
||||
1. scoring - 评分表:包含评分项、分值、评分标准等
|
||||
2. deviation - 偏离表:包含技术要求、响应类型、偏离说明等
|
||||
3. other - 其他表格:不是评分表也不是偏离表
|
||||
|
||||
只返回一个单词:scoring 或 deviation 或 other"""
|
||||
|
||||
response = self._call_llm_api(prompt)
|
||||
if not response:
|
||||
raise ValueError("AI识别表格类型失败:无响应")
|
||||
|
||||
result = response.strip().lower()
|
||||
if result not in ["scoring", "deviation", "other"]:
|
||||
raise ValueError(f"AI返回了无效的表格类型: {result}")
|
||||
|
||||
return result
|
||||
|
||||
def parse_single_word_document(self, file_path: str) -> tuple[List[ScoringCriteria], List[DeviationItem]]:
|
||||
"""解析包含评分表和偏离表的单个Word文档"""
|
||||
try:
|
||||
doc = Document(file_path)
|
||||
scoring_criteria = []
|
||||
deviation_items = []
|
||||
|
||||
for table in doc.tables:
|
||||
if len(table.rows) < 2:
|
||||
continue
|
||||
|
||||
# 提取表格内容为文本
|
||||
table_text = self._extract_table_text(table)
|
||||
|
||||
# 识别表格类型
|
||||
table_type = self._identify_table_type(table_text)
|
||||
|
||||
if table_type == "scoring":
|
||||
# 解析评分表
|
||||
ai_results = self._ai_parse_scoring_table(table_text)
|
||||
if ai_results:
|
||||
scoring_criteria.extend(ai_results)
|
||||
|
||||
elif table_type == "deviation":
|
||||
# 解析偏离表
|
||||
deviation_results = self._ai_parse_deviation_table(table_text)
|
||||
if deviation_results:
|
||||
deviation_items.extend(deviation_results)
|
||||
|
||||
return scoring_criteria, deviation_items
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析Word文档失败: {e}")
|
||||
return [], []
|
||||
|
||||
def parse_word_with_filter(self, file_path: str) -> tuple[List[ScoringCriteria], List[ScoringCriteria], List[DeviationItem]]:
|
||||
"""解析Word文档,返回技术部分、商务部分和偏离项"""
|
||||
try:
|
||||
doc = Document(file_path)
|
||||
all_criteria = []
|
||||
deviation_items = []
|
||||
|
||||
for table in doc.tables:
|
||||
if len(table.rows) < 2:
|
||||
continue
|
||||
|
||||
# 提取表格内容为文本
|
||||
table_text = self._extract_table_text(table)
|
||||
|
||||
# 识别表格类型
|
||||
table_type = self._identify_table_type(table_text)
|
||||
|
||||
if table_type == "scoring":
|
||||
# 解析评分表
|
||||
ai_results = self._ai_parse_scoring_table(table_text)
|
||||
if ai_results:
|
||||
all_criteria.extend(ai_results)
|
||||
|
||||
elif table_type == "deviation":
|
||||
# 解析偏离表
|
||||
deviation_results = self._ai_parse_deviation_table(table_text)
|
||||
if deviation_results:
|
||||
deviation_items.extend(deviation_results)
|
||||
|
||||
# 分离技术和商务部分
|
||||
technical_criteria = []
|
||||
commercial_criteria = []
|
||||
|
||||
for criteria in all_criteria:
|
||||
if criteria.category == TechnicalCategory.COMMERCIAL:
|
||||
commercial_criteria.append(criteria)
|
||||
else:
|
||||
technical_criteria.append(criteria)
|
||||
|
||||
return technical_criteria, commercial_criteria, deviation_items
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析Word文档失败: {e}")
|
||||
return [], [], []
|
||||
|
||||
def _ai_parse_deviation_table(self, table_text: str) -> List[DeviationItem]:
|
||||
"""使用AI解析偏离表格"""
|
||||
try:
|
||||
prompt = f"""
|
||||
请提取表格中的偏离项,返回JSON。
|
||||
|
||||
表格内容:
|
||||
{table_text}
|
||||
|
||||
要求:
|
||||
1. 提取技术要求和响应类型
|
||||
2. 响应类型如:正偏离、负偏离、无偏离等
|
||||
3. 忽略序号和表头
|
||||
|
||||
格式:
|
||||
{{
|
||||
"deviation_items": [
|
||||
{{"requirement": "设备需符合国标", "response_type": "正偏离"}},
|
||||
{{"requirement": "技术指标要求", "response_type": "无偏离"}}
|
||||
]
|
||||
}}
|
||||
|
||||
只返回JSON,无其他文字:"""
|
||||
|
||||
response = self._call_llm_api(prompt)
|
||||
if not response:
|
||||
raise ValueError("AI解析偏离表失败:无响应")
|
||||
|
||||
# 解析AI响应
|
||||
try:
|
||||
result_data = json.loads(response)
|
||||
deviation_data = result_data.get("deviation_items", [])
|
||||
|
||||
items = []
|
||||
for i, item in enumerate(deviation_data):
|
||||
deviation_item = DeviationItem(
|
||||
requirement=item.get("requirement", ""),
|
||||
response_type=item.get("response_type", "正偏离"),
|
||||
chapter_id=f"deviation_{i+1:02d}"
|
||||
)
|
||||
items.append(deviation_item)
|
||||
|
||||
return items
|
||||
|
||||
except (json.JSONDecodeError, ValueError, KeyError) as e:
|
||||
logger.error(f"解析偏离表AI响应失败: {e}")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI解析偏离表失败: {e}")
|
||||
return []
|
||||
|
||||
def _parse_deviation_file(self, file_path: str) -> List[DeviationItem]:
|
||||
"""解析偏离表要求文件"""
|
||||
try:
|
||||
file_path_obj = Path(file_path)
|
||||
suffix = file_path_obj.suffix.lower()
|
||||
|
||||
if suffix in ['.xlsx', '.xls']:
|
||||
df = pd.read_excel(file_path)
|
||||
elif suffix == '.csv':
|
||||
df = pd.read_csv(file_path, encoding='utf-8-sig')
|
||||
elif suffix == '.docx':
|
||||
return self._parse_word_table_deviation(file_path)
|
||||
else:
|
||||
raise ValueError(f"不支持的偏离文件格式: {suffix}")
|
||||
|
||||
return self._parse_deviation_from_df(df)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析偏离文件失败: {e}")
|
||||
raise
|
||||
|
||||
def _parse_deviation_from_df(self, df: pd.DataFrame) -> List[DeviationItem]:
|
||||
"""从DataFrame解析偏离项"""
|
||||
column_mapping = {
|
||||
'要求': 'requirement',
|
||||
'招标要求': 'requirement',
|
||||
'技术要求': 'requirement',
|
||||
'响应': 'response_type',
|
||||
'类型': 'response_type'
|
||||
}
|
||||
|
||||
columns = {}
|
||||
for col in df.columns:
|
||||
for key, value in column_mapping.items():
|
||||
if key in str(col):
|
||||
columns[value] = col
|
||||
break
|
||||
|
||||
if 'requirement' not in columns:
|
||||
raise ValueError("偏离表缺少必要的列:要求")
|
||||
|
||||
items = []
|
||||
for i, row in df.iterrows():
|
||||
if pd.isna(row[columns['requirement']]):
|
||||
continue
|
||||
|
||||
item = DeviationItem(
|
||||
requirement=str(row[columns['requirement']]).strip(),
|
||||
response_type=str(row.get(columns.get('response_type', ''), '正偏离')).strip(),
|
||||
chapter_id=f"deviation_{i+1:02d}"
|
||||
)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
|
||||
def _parse_word_table_deviation(self, file_path: str) -> List[DeviationItem]:
|
||||
"""解析Word表格中的偏离项"""
|
||||
doc = Document(file_path)
|
||||
items = []
|
||||
|
||||
for table in doc.tables:
|
||||
if len(table.rows) < 2:
|
||||
continue
|
||||
|
||||
headers = [cell.text.strip() for cell in table.rows[0].cells]
|
||||
req_col = resp_col = None
|
||||
|
||||
for i, header in enumerate(headers):
|
||||
if '要求' in header:
|
||||
req_col = i
|
||||
elif '响应' in header or '类型' in header:
|
||||
resp_col = i
|
||||
|
||||
if req_col is None:
|
||||
continue
|
||||
|
||||
for j, row in enumerate(table.rows[1:], 1):
|
||||
cells = [cell.text.strip() for cell in row.cells]
|
||||
|
||||
if len(cells) <= req_col:
|
||||
continue
|
||||
|
||||
item = DeviationItem(
|
||||
requirement=cells[req_col],
|
||||
response_type=cells[resp_col] if resp_col and len(cells) > resp_col else "正偏离",
|
||||
chapter_id=f"deviation_{j:02d}"
|
||||
)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
|
||||
def _parse_template_file(self, file_path: str) -> List[DocumentChapter]:
|
||||
"""解析Word模板文件,提取章节结构"""
|
||||
doc = Document(file_path)
|
||||
chapters = []
|
||||
|
||||
for i, paragraph in enumerate(doc.paragraphs):
|
||||
if paragraph.style.name.startswith('Heading'):
|
||||
level = int(paragraph.style.name.split()[-1]) if paragraph.style.name.split()[-1].isdigit() else 1
|
||||
|
||||
chapter = DocumentChapter(
|
||||
id=f"template_chapter_{i+1:02d}",
|
||||
title=paragraph.text.strip(),
|
||||
level=level,
|
||||
template_placeholder=f"{{{{chapter_{i+1:02d}_content}}}}"
|
||||
)
|
||||
chapters.append(chapter)
|
||||
|
||||
return chapters
|
||||
|
||||
def _generate_professional_chapters(self, scoring_criteria: List[ScoringCriteria]) -> List[DocumentChapter]:
|
||||
"""基于评分标准生成专业目录结构"""
|
||||
chapters = []
|
||||
|
||||
# 获取涉及的技术类别
|
||||
categories_used = set()
|
||||
for criteria in scoring_criteria:
|
||||
categories_used.add(criteria.category.value)
|
||||
|
||||
# 1. 评标索引表(始终包含)
|
||||
chapters.append(self._create_standard_chapter("evaluation_index"))
|
||||
|
||||
# 2. 合规响应表(如果有偏离项或合规类评分项)
|
||||
if any(c.category == TechnicalCategory.COMPLIANCE for c in scoring_criteria):
|
||||
chapters.append(self._create_standard_chapter("compliance_response"))
|
||||
else:
|
||||
# 即使没有合规类评分项,也添加偏离表章节(招投标标准要求)
|
||||
chapters.append(self._create_standard_chapter("compliance_response"))
|
||||
|
||||
# 3. 根据评分项类别添加相应章节
|
||||
category_order = [
|
||||
"technical_solution",
|
||||
"equipment_spec",
|
||||
"implementation",
|
||||
"quality_system",
|
||||
"after_sales"
|
||||
]
|
||||
|
||||
for category_key in category_order:
|
||||
if category_key in categories_used or category_key == "technical_solution":
|
||||
# 技术方案章节始终包含(作为兜底章节)
|
||||
chapters.append(self._create_standard_chapter(category_key))
|
||||
|
||||
# 4. 验收与绩效考核对应表(标准履约要求,始终包含)
|
||||
chapters.append(self._create_standard_chapter("contract_delivery"))
|
||||
|
||||
return chapters
|
||||
|
||||
def _create_standard_chapter(self, chapter_key: str) -> DocumentChapter:
|
||||
"""创建标准章节"""
|
||||
template = ChapterTemplate.STANDARD_CHAPTERS[chapter_key]
|
||||
return DocumentChapter(
|
||||
id=template["id"],
|
||||
title=template["title"],
|
||||
level=template["level"],
|
||||
template_placeholder=template["placeholder"]
|
||||
)
|
||||
|
||||
def _map_criteria_to_chapters(self, bid_structure: BidStructure) -> None:
|
||||
"""将评分标准映射到对应章节"""
|
||||
category_mapping = ChapterTemplate.get_category_chapter_mapping()
|
||||
|
||||
# 创建章节ID映射表
|
||||
chapter_id_map = {}
|
||||
for chapter in bid_structure.chapters:
|
||||
# 从标准章节找到对应的category_key
|
||||
for category_key, template_data in ChapterTemplate.STANDARD_CHAPTERS.items():
|
||||
if template_data["id"] == chapter.id:
|
||||
chapter_id_map[category_key] = chapter.id
|
||||
break
|
||||
|
||||
# 映射评分项到对应章节
|
||||
for criteria in bid_structure.scoring_criteria:
|
||||
category_chapter_key = category_mapping.get(criteria.category, "technical_solution")
|
||||
criteria.chapter_id = chapter_id_map.get(category_chapter_key, "tech_solution")
|
||||
225
src/bidmaster/tools/rag.py
Normal file
225
src/bidmaster/tools/rag.py
Normal file
@ -0,0 +1,225 @@
|
||||
"""RAG检索增强生成工具
|
||||
|
||||
基于ChromaDB的文档检索系统,支持文档索引、相似度搜索和内容检索。
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import chromadb
|
||||
from chromadb.config import Settings as ChromaSettings
|
||||
from chromadb.utils import embedding_functions
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from langchain_community.document_loaders import (
|
||||
PyPDFLoader,
|
||||
TextLoader,
|
||||
UnstructuredWordDocumentLoader,
|
||||
)
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from ..config import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RAGTool:
|
||||
"""RAG工具类"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.settings = get_settings()
|
||||
self.chroma_path = Path(self.settings.chroma_path)
|
||||
self.chroma_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 初始化ChromaDB客户端
|
||||
self.client = chromadb.PersistentClient(
|
||||
path=str(self.chroma_path),
|
||||
settings=ChromaSettings(anonymized_telemetry=False)
|
||||
)
|
||||
|
||||
# 初始化嵌入函数
|
||||
self.embedding_function = self._get_embedding_function()
|
||||
|
||||
# 获取或创建集合
|
||||
self.collection = self.client.get_or_create_collection(
|
||||
name=self.settings.collection_name,
|
||||
embedding_function=self.embedding_function,
|
||||
metadata={"description": "BidMaster知识库"}
|
||||
)
|
||||
|
||||
# 初始化文本分割器
|
||||
self.text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=self.settings.chunk_size,
|
||||
chunk_overlap=self.settings.chunk_overlap,
|
||||
length_function=len,
|
||||
)
|
||||
|
||||
def add_document(self, file_path: str) -> bool:
|
||||
"""添加文档到知识库"""
|
||||
try:
|
||||
file_path_obj = Path(file_path)
|
||||
|
||||
if not file_path_obj.exists():
|
||||
raise FileNotFoundError(f"文件不存在: {file_path}")
|
||||
|
||||
# 加载文档
|
||||
documents = self._load_document(file_path_obj)
|
||||
if not documents:
|
||||
logger.warning(f"未能从文件中提取内容: {file_path}")
|
||||
return False
|
||||
|
||||
# 分割文档
|
||||
chunks = self.text_splitter.split_documents(documents)
|
||||
|
||||
# 添加到向量数据库
|
||||
self._add_chunks_to_db(chunks, file_path)
|
||||
|
||||
logger.info(f"成功添加文档: {file_path} ({len(chunks)}个块)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"添加文档失败 {file_path}: {e}")
|
||||
return False
|
||||
|
||||
def search(self, query: str, k: int = 5) -> list[dict[str, Any]]:
|
||||
"""搜索相关内容"""
|
||||
try:
|
||||
results = self.collection.query(
|
||||
query_texts=[query],
|
||||
n_results=k,
|
||||
include=["documents", "metadatas", "distances"]
|
||||
)
|
||||
|
||||
# 格式化结果
|
||||
formatted_results = []
|
||||
if results["documents"] and results["documents"][0]:
|
||||
for i, doc in enumerate(results["documents"][0]):
|
||||
result = {
|
||||
"content": doc,
|
||||
"metadata": results["metadatas"][0][i] if results["metadatas"] else {},
|
||||
"score": 1 - results["distances"][0][i] if results["distances"] else 0.0
|
||||
}
|
||||
formatted_results.append(result)
|
||||
|
||||
return formatted_results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"搜索失败: {e}")
|
||||
return []
|
||||
|
||||
def get_stats(self) -> dict[str, Any]:
|
||||
"""获取知识库统计信息"""
|
||||
try:
|
||||
count = self.collection.count()
|
||||
files = set()
|
||||
|
||||
# 获取所有文档的文件路径
|
||||
if count > 0:
|
||||
all_data = self.collection.get(include=["metadatas"])
|
||||
for metadata in all_data["metadatas"]:
|
||||
if "source" in metadata:
|
||||
files.add(metadata["source"])
|
||||
|
||||
return {
|
||||
"total_chunks": count,
|
||||
"total_files": len(files),
|
||||
"files": list(files)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取统计信息失败: {e}")
|
||||
return {"total_chunks": 0, "total_files": 0, "files": []}
|
||||
|
||||
def reset_database(self) -> bool:
|
||||
"""重置数据库"""
|
||||
try:
|
||||
# 删除集合
|
||||
self.client.delete_collection(name=self.settings.collection_name)
|
||||
|
||||
# 重新创建集合
|
||||
self.collection = self.client.get_or_create_collection(
|
||||
name=self.settings.collection_name,
|
||||
metadata={"description": "BidMaster知识库"}
|
||||
)
|
||||
|
||||
logger.info("数据库已重置")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"重置数据库失败: {e}")
|
||||
return False
|
||||
|
||||
def _load_document(self, file_path: Path) -> list[Document]:
|
||||
"""根据文件类型加载文档"""
|
||||
suffix = file_path.suffix.lower()
|
||||
|
||||
loaders = {
|
||||
".pdf": PyPDFLoader,
|
||||
".txt": TextLoader,
|
||||
".md": TextLoader,
|
||||
".docx": UnstructuredWordDocumentLoader,
|
||||
}
|
||||
|
||||
loader_class = loaders.get(suffix)
|
||||
if not loader_class:
|
||||
raise ValueError(f"不支持的文件格式: {suffix}")
|
||||
|
||||
# 使用encoding参数处理文本文件
|
||||
if suffix in [".txt", ".md"]:
|
||||
loader = loader_class(str(file_path), encoding="utf-8")
|
||||
else:
|
||||
loader = loader_class(str(file_path))
|
||||
|
||||
return loader.load()
|
||||
|
||||
def _add_chunks_to_db(self, chunks: list[Document], source_file: str) -> None:
|
||||
"""将文档块添加到数据库"""
|
||||
if not chunks:
|
||||
return
|
||||
|
||||
documents = []
|
||||
metadatas = []
|
||||
ids = []
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
# 生成唯一ID
|
||||
chunk_id = self._generate_chunk_id(source_file, i, chunk.page_content)
|
||||
|
||||
documents.append(chunk.page_content)
|
||||
metadatas.append({
|
||||
"source": source_file,
|
||||
"chunk_index": i,
|
||||
"chunk_size": len(chunk.page_content),
|
||||
**chunk.metadata
|
||||
})
|
||||
ids.append(chunk_id)
|
||||
|
||||
# 批量添加到ChromaDB
|
||||
self.collection.add(
|
||||
documents=documents,
|
||||
metadatas=metadatas,
|
||||
ids=ids
|
||||
)
|
||||
|
||||
def _generate_chunk_id(self, source_file: str, chunk_index: int, content: str) -> str:
|
||||
"""生成块的唯一ID"""
|
||||
content_hash = hashlib.md5(content.encode()).hexdigest()[:8]
|
||||
return f"{Path(source_file).stem}_{chunk_index}_{content_hash}"
|
||||
|
||||
def _get_embedding_function(self):
|
||||
"""获取嵌入函数"""
|
||||
embedding_model = self.settings.embedding_model
|
||||
|
||||
if embedding_model.startswith("text-embedding-"):
|
||||
# OpenAI嵌入模型
|
||||
return embedding_functions.OpenAIEmbeddingFunction(
|
||||
api_key=self.settings.api_key,
|
||||
model_name=embedding_model
|
||||
)
|
||||
else:
|
||||
# 本地sentence-transformers模型
|
||||
return embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||
model_name=embedding_model
|
||||
)
|
||||
1
src/bidmaster/tools/table.py
Normal file
1
src/bidmaster/tools/table.py
Normal file
@ -0,0 +1 @@
|
||||
# 表格生成器
|
||||
1
src/bidmaster/tools/word.py
Normal file
1
src/bidmaster/tools/word.py
Normal file
@ -0,0 +1 @@
|
||||
# Word文档处理器
|
||||
1
src/bidmaster/utils/__init__.py
Normal file
1
src/bidmaster/utils/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# 公共工具模块
|
||||
1
src/bidmaster/utils/logger.py
Normal file
1
src/bidmaster/utils/logger.py
Normal file
@ -0,0 +1 @@
|
||||
# 日志配置
|
||||
6
templates/README.md
Normal file
6
templates/README.md
Normal file
@ -0,0 +1,6 @@
|
||||
# Word模板文件目录
|
||||
|
||||
存放带占位符的Word模板文件,如:
|
||||
- 标书模板.docx
|
||||
- 技术方案模板.docx
|
||||
- 响应表模板.docx
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# 测试包
|
||||
1
tests/fixtures/__init__.py
vendored
Normal file
1
tests/fixtures/__init__.py
vendored
Normal file
@ -0,0 +1 @@
|
||||
# 测试数据固件
|
||||
1
tests/integration/__init__.py
Normal file
1
tests/integration/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# 集成测试
|
||||
1
tests/integration/test_workflow.py
Normal file
1
tests/integration/test_workflow.py
Normal file
@ -0,0 +1 @@
|
||||
# 工作流集成测试
|
||||
1
tests/unit/__init__.py
Normal file
1
tests/unit/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# 单元测试
|
||||
1
tests/unit/test_parser.py
Normal file
1
tests/unit/test_parser.py
Normal file
@ -0,0 +1 @@
|
||||
# 文档解析器测试
|
||||
1
tests/unit/test_rag.py
Normal file
1
tests/unit/test_rag.py
Normal file
@ -0,0 +1 @@
|
||||
# RAG工具测试
|
||||
1
tests/unit/test_word.py
Normal file
1
tests/unit/test_word.py
Normal file
@ -0,0 +1 @@
|
||||
# Word处理器测试
|
||||
Loading…
Reference in New Issue
Block a user