import unittest import pandas as pd import numpy as np from data_processor import DataProcessor from data_cleaner import DataCleaner from feature_engineer import FeatureEngineer from data_splitter import DataSplitter class TestDataProcessor(unittest.TestCase): def setUp(self): # 创建测试数据 self.test_data = pd.DataFrame({ 'feature1': [1, 2, np.nan, 4, 5], 'feature2': ['A', 'B', 'A', 'B', 'C'], 'target': [0, 1, 0, 1, 0] }) def test_data_cleaner(self): cleaner = DataCleaner() cleaned_data = cleaner.handle_missing_values(self.test_data.copy()) self.assertFalse(cleaned_data.isnull().any().any()) def test_feature_engineer(self): engineer = FeatureEngineer() scaled_data = engineer.scale_features(self.test_data.copy()) self.assertTrue('feature1' in scaled_data.columns) def test_data_splitter(self): splitter = DataSplitter() train, val, test = splitter.train_val_test_split(self.test_data.copy(), 'target') self.assertEqual(len(train) + len(val) + len(test), len(self.test_data)) if __name__ == '__main__': unittest.main()