59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
import json
|
|
import requests
|
|
|
|
base_url = 'http://localhost:11434'
|
|
|
|
session = requests.session()
|
|
|
|
# # 获取ollama模型列表
|
|
# response_model_list = session.get(f'{base_url}/api/tags')
|
|
# response_model_list.raise_for_status()
|
|
# response_model_list = response_model_list.json()
|
|
# print("-"*50)
|
|
# print(response_model_list)
|
|
|
|
|
|
# # 生成任务--response
|
|
# prompt = "介绍一下中国."
|
|
# stream =True
|
|
# data = {
|
|
# 'model': 'Qwen3-8B:latest',
|
|
# 'prompt': f"""请直接回答以下问题,不要显示思考过程、分析步骤或推理过程:{prompt}注意:请直接给出答案,不要包含"让我想想"、"分析一下"、"首先"、"然后"等思考过程的表述。""",
|
|
# 'stream': stream,
|
|
# 'temperature':0.7,
|
|
# 'max_tokens': 200
|
|
# }
|
|
|
|
# response_generate = session.post(f'{base_url}/api/generate', json=data, stream=stream)
|
|
# response_generate.raise_for_status()
|
|
# print("-"*50)
|
|
# if stream:
|
|
# for line in response_generate.iter_lines():
|
|
# print(json.loads(line.decode('utf-8')))
|
|
# else:
|
|
# response_generate = response_generate.json()
|
|
# print(response_generate)
|
|
|
|
|
|
# 对话模式--message--content
|
|
messages = [{"role": "user", "content": "请问你有什么能帮我的吗?"}]
|
|
stream = True
|
|
enable_thinking = True
|
|
data = {
|
|
'model': 'Qwen3-8B-q8:latest',
|
|
'messages' : messages,
|
|
'stream': stream,
|
|
'temperature':0.7,
|
|
'max_tokens': 200,
|
|
# 'enable_thinking':enable_thinking
|
|
}
|
|
response_chat = session.post(f'{base_url}/api/chat', json=data, stream=stream)
|
|
|
|
print("-"*50)
|
|
if stream:
|
|
for line in response_chat.iter_lines():
|
|
print(json.loads(line.decode('utf-8')).get("message").get("content"), end="", flush=True)
|
|
print("\n")
|
|
else:
|
|
response_chat = response_chat.json()
|
|
print(response_chat) |