# 导入必要的库 from sklearn.datasets import fetch_california_housing from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score import matplotlib.pyplot as plt # 1. 加载数据集(使用加州房价数据集) data = fetch_california_housing(data_home="/home/admin-root/haotian/ML/dataset") X = data.data # 特征数据 y = data.target # 目标数据(房价) # 2. 划分数据集(80%训练,20%测试) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 3. 特征标准化(KNN对特征尺度敏感) scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # 4. 构建KNN回归模型(这里K=5) knn = KNeighborsRegressor(n_neighbors=5) # 5. 训练模型 knn.fit(X_train, y_train) # 6. 预测 y_pred = knn.predict(X_test) # 7. 评估模型性能 mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) # 打印评估指标 print(f"Mean Absolute Error: {mae:.4f}") print(f"Mean Squared Error: {mse:.4f}") print(f"R^2 Score: {r2:.4f}") # 可视化预测结果 vs 实际值(仅在二维数据时更直观) plt.scatter(y_test, y_pred, c='blue', edgecolors='k', alpha=0.7) plt.xlabel("True Values") plt.ylabel("Predictions") plt.title("KNN Regression - True vs Predicted") plt.savefig("./output/knn_r.png")