34 lines
1.2 KiB
Python
34 lines
1.2 KiB
Python
import unittest
|
|
import pandas as pd
|
|
import numpy as np
|
|
from data_processor import DataProcessor
|
|
from data_cleaner import DataCleaner
|
|
from feature_engineer import FeatureEngineer
|
|
from data_splitter import DataSplitter
|
|
|
|
class TestDataProcessor(unittest.TestCase):
|
|
def setUp(self):
|
|
# 创建测试数据
|
|
self.test_data = pd.DataFrame({
|
|
'feature1': [1, 2, np.nan, 4, 5],
|
|
'feature2': ['A', 'B', 'A', 'B', 'C'],
|
|
'target': [0, 1, 0, 1, 0]
|
|
})
|
|
|
|
def test_data_cleaner(self):
|
|
cleaner = DataCleaner()
|
|
cleaned_data = cleaner.handle_missing_values(self.test_data.copy())
|
|
self.assertFalse(cleaned_data.isnull().any().any())
|
|
|
|
def test_feature_engineer(self):
|
|
engineer = FeatureEngineer()
|
|
scaled_data = engineer.scale_features(self.test_data.copy())
|
|
self.assertTrue('feature1' in scaled_data.columns)
|
|
|
|
def test_data_splitter(self):
|
|
splitter = DataSplitter()
|
|
train, val, test = splitter.train_val_test_split(self.test_data.copy(), 'target')
|
|
self.assertEqual(len(train) + len(val) + len(test), len(self.test_data))
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main() |