diff --git a/003加载自己的数据集微调.py b/003加载自己的数据集微调.py
index 9f31477..f3efe39 100644
--- a/003加载自己的数据集微调.py
+++ b/003加载自己的数据集微调.py
@@ -25,7 +25,9 @@ dataset = dataset.map(to_chatml)
 
 # 加载预训练模型
 model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit",
+    # model_name = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit",
+    model_name = "unsloth/Qwen3-8B-unsloth-bnb-4bit",
+    # model_name = "unsloth/Qwen3-4B-unsloth-bnb-4bit",
     max_seq_length = 2048,   # Context length - can be longer, but uses more memory
     load_in_4bit = True,     # 4bit uses much less memory , 启用QLoRA
     load_in_8bit = False,    # A bit more accurate, uses 2x memory
@@ -93,8 +95,11 @@ _ = model.generate(
     streamer = TextStreamer(tokenizer, skip_prompt = True),
 )
 
+# model.cpu()
+
 model.save_pretrained_gguf(
-    "ollama_model", 
+    "Qwen3-8B", 
     tokenizer, 
-    # quantization_method="q4_k_m"  # 或 "q8_0"  # 量化模式--默认 q8_0, 可选f16, "q4_k_m", "q8_0", "q5_k_m",
+    # quantization_method="q4_k_m",  # 或 "q8_0"  # 量化模式--默认 q8_0, 可选f16, "q4_k_m", "q8_0", "q5_k_m",
+    maximum_memory_usage=0.7  # 限制使用 GPU 显存为总容量的 50%
 )