unsloth/008测试ollama示例.py
2025-07-18 09:43:17 +08:00

59 lines
1.7 KiB
Python

import json
import requests
base_url = 'http://localhost:11434'
session = requests.session()
# # 获取ollama模型列表
# response_model_list = session.get(f'{base_url}/api/tags')
# response_model_list.raise_for_status()
# response_model_list = response_model_list.json()
# print("-"*50)
# print(response_model_list)
# # 生成任务--response
# prompt = "介绍一下中国."
# stream =True
# data = {
# 'model': 'Qwen3-8B:latest',
# 'prompt': f"""请直接回答以下问题,不要显示思考过程、分析步骤或推理过程:{prompt}注意:请直接给出答案,不要包含"让我想想"、"分析一下"、"首先"、"然后"等思考过程的表述。""",
# 'stream': stream,
# 'temperature':0.7,
# 'max_tokens': 200
# }
# response_generate = session.post(f'{base_url}/api/generate', json=data, stream=stream)
# response_generate.raise_for_status()
# print("-"*50)
# if stream:
# for line in response_generate.iter_lines():
# print(json.loads(line.decode('utf-8')))
# else:
# response_generate = response_generate.json()
# print(response_generate)
# 对话模式--message--content
messages = [{"role": "user", "content": "请问你有什么能帮我的吗?"}]
stream = True
enable_thinking = True
data = {
'model': 'Qwen3-8B-q8:latest',
'messages' : messages,
'stream': stream,
'temperature':0.7,
'max_tokens': 200,
# 'enable_thinking':enable_thinking
}
response_chat = session.post(f'{base_url}/api/chat', json=data, stream=stream)
print("-"*50)
if stream:
for line in response_chat.iter_lines():
print(json.loads(line.decode('utf-8')).get("message").get("content"), end="", flush=True)
print("\n")
else:
response_chat = response_chat.json()
print(response_chat)