MLPlatform/function_old/data_process/test_data_processor.py

34 lines
1.2 KiB
Python

import unittest
import pandas as pd
import numpy as np
from data_processor import DataProcessor
from data_cleaner import DataCleaner
from feature_engineer import FeatureEngineer
from data_splitter import DataSplitter
class TestDataProcessor(unittest.TestCase):
def setUp(self):
# 创建测试数据
self.test_data = pd.DataFrame({
'feature1': [1, 2, np.nan, 4, 5],
'feature2': ['A', 'B', 'A', 'B', 'C'],
'target': [0, 1, 0, 1, 0]
})
def test_data_cleaner(self):
cleaner = DataCleaner()
cleaned_data = cleaner.handle_missing_values(self.test_data.copy())
self.assertFalse(cleaned_data.isnull().any().any())
def test_feature_engineer(self):
engineer = FeatureEngineer()
scaled_data = engineer.scale_features(self.test_data.copy())
self.assertTrue('feature1' in scaled_data.columns)
def test_data_splitter(self):
splitter = DataSplitter()
train, val, test = splitter.train_val_test_split(self.test_data.copy(), 'target')
self.assertEqual(len(train) + len(val) + len(test), len(self.test_data))
if __name__ == '__main__':
unittest.main()