diff --git a/003加载自己的数据集微调.py b/003加载自己的数据集微调.py index 9f31477..f3efe39 100644 --- a/003加载自己的数据集微调.py +++ b/003加载自己的数据集微调.py @@ -25,7 +25,9 @@ dataset = dataset.map(to_chatml) # 加载预训练模型 model, tokenizer = FastLanguageModel.from_pretrained( - model_name = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit", + # model_name = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit", + model_name = "unsloth/Qwen3-8B-unsloth-bnb-4bit", + # model_name = "unsloth/Qwen3-4B-unsloth-bnb-4bit", max_seq_length = 2048, # Context length - can be longer, but uses more memory load_in_4bit = True, # 4bit uses much less memory , 启用QLoRA load_in_8bit = False, # A bit more accurate, uses 2x memory @@ -93,8 +95,11 @@ _ = model.generate( streamer = TextStreamer(tokenizer, skip_prompt = True), ) +# model.cpu() + model.save_pretrained_gguf( - "ollama_model", + "Qwen3-8B", tokenizer, - # quantization_method="q4_k_m" # 或 "q8_0" # 量化模式--默认 q8_0, 可选f16, "q4_k_m", "q8_0", "q5_k_m", + # quantization_method="q4_k_m", # 或 "q8_0" # 量化模式--默认 q8_0, 可选f16, "q4_k_m", "q8_0", "q5_k_m", + maximum_memory_usage=0.7 # 限制使用 GPU 显存为总容量的 50% )