1.修改微调模型\n2.保存模型时添加限制使用GPU显存参数,以确保不会爆显存
This commit is contained in:
parent
4e914bb7b3
commit
f318eedac5
@ -25,7 +25,9 @@ dataset = dataset.map(to_chatml)
|
||||
|
||||
# 加载预训练模型
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit",
|
||||
# model_name = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit",
|
||||
model_name = "unsloth/Qwen3-8B-unsloth-bnb-4bit",
|
||||
# model_name = "unsloth/Qwen3-4B-unsloth-bnb-4bit",
|
||||
max_seq_length = 2048, # Context length - can be longer, but uses more memory
|
||||
load_in_4bit = True, # 4bit uses much less memory , 启用QLoRA
|
||||
load_in_8bit = False, # A bit more accurate, uses 2x memory
|
||||
@ -93,8 +95,11 @@ _ = model.generate(
|
||||
streamer = TextStreamer(tokenizer, skip_prompt = True),
|
||||
)
|
||||
|
||||
# model.cpu()
|
||||
|
||||
model.save_pretrained_gguf(
|
||||
"ollama_model",
|
||||
"Qwen3-8B",
|
||||
tokenizer,
|
||||
# quantization_method="q4_k_m" # 或 "q8_0" # 量化模式--默认 q8_0, 可选f16, "q4_k_m", "q8_0", "q5_k_m",
|
||||
# quantization_method="q4_k_m", # 或 "q8_0" # 量化模式--默认 q8_0, 可选f16, "q4_k_m", "q8_0", "q5_k_m",
|
||||
maximum_memory_usage=0.7 # 限制使用 GPU 显存为总容量的 50%
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user