This commit is contained in:
sladro 2026-02-02 22:56:07 +08:00
parent dfc4d1cfa7
commit 1719bd5463

View File

@ -160,33 +160,38 @@ async def converse_with_chat_assistant(
logger.warning(f"意图分类失败使用RAG服务: {e}")
# ========== 3. RAG历史缓存查找 ==========
logger.info(f'[SemanticCache] 准备执行RAG历史缓存查找 | redis={redis is not None} | chat_id={converse_params.chat_id}')
if redis:
lookup_hash = get_question_hash(converse_params.question)
logger.info(f'[SemanticCache] 开始查找 | chat_id={converse_params.chat_id} | question={converse_params.question} | hash={lookup_hash} | threshold=0.60')
service = get_semantic_cache_service()
logger.info(f'[SemanticCache] service实例: {service}')
cache_result = await service.lookup(
converse_params.chat_id,
converse_params.question,
redis
)
logger.info(f'[SemanticCache] 查找结果 | found={cache_result is not None}')
if cache_result:
cached_answer, cache_similarity = cache_result
logger.info(f'[RAG_SOURCE] 命中RAG会话历史 | chat_id={converse_params.chat_id} | question={converse_params.question} | similarity={cache_similarity:.2f} | answer_length={len(cached_answer)}')
logger.info(f'[SemanticCache] 流式响应使用RAG历史缓存答案chat_id={converse_params.chat_id}')
return StreamingResponse(
stream_cached_response(cached_answer, converse_params.chat_id, start_time, cache_source='rag_history'),
media_type='text/event-stream',
headers={
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
'Transfer-Encoding': 'chunked'
}
# 注意:当 KB_PROVIDER=es_bm25 时内部KB答案应直接来源于 ES 检索结果,
# 避免 rag:semantic:cache 抢答导致“curl 与 API 不一致”。
if KBConfig.KB_PROVIDER != 'es_bm25':
logger.info(f'[SemanticCache] 准备执行RAG历史缓存查找 | redis={redis is not None} | chat_id={converse_params.chat_id}')
if redis:
lookup_hash = get_question_hash(converse_params.question)
logger.info(f'[SemanticCache] 开始查找 | chat_id={converse_params.chat_id} | question={converse_params.question} | hash={lookup_hash} | threshold=0.60')
service = get_semantic_cache_service()
logger.info(f'[SemanticCache] service实例: {service}')
cache_result = await service.lookup(
converse_params.chat_id,
converse_params.question,
redis
)
logger.info(f'[SemanticCache] 查找结果 | found={cache_result is not None}')
if cache_result:
cached_answer, cache_similarity = cache_result
logger.info(f'[RAG_SOURCE] 命中RAG会话历史 | chat_id={converse_params.chat_id} | question={converse_params.question} | similarity={cache_similarity:.2f} | answer_length={len(cached_answer)}')
logger.info(f'[SemanticCache] 流式响应使用RAG历史缓存答案chat_id={converse_params.chat_id}')
return StreamingResponse(
stream_cached_response(cached_answer, converse_params.chat_id, start_time, cache_source='rag_history'),
media_type='text/event-stream',
headers={
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
'Transfer-Encoding': 'chunked'
}
)
else:
logger.info(f"[SemanticCache] skip lookup because KB_PROVIDER=es_bm25 | chat_id={converse_params.chat_id}")
# ========== 4. RAG服务调用在这里根据 KB_PROVIDER 选择 ragflow / es_bm25 ==========
try:
@ -205,9 +210,8 @@ async def converse_with_chat_assistant(
)
if KBESService.is_confident(metrics):
async def cache_store_func(answer: str):
if redis and answer and len(answer.strip()) >= 10:
await _async_store_qa(converse_params.chat_id, converse_params.question, answer, redis)
# KB_PROVIDER=es_bm25 时不写入 rag:semantic:cache避免污染后续命中
cache_store_func = None
return StreamingResponse(
stream_kb_es_response(