Changeset 10710:1083062348a9 in orange


Ignore:
Timestamp:
04/03/12 10:08:35 (2 years ago)
Author:
anze <anze.staric@…>
Branch:
default
Message:

Improved pca unittests.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • Orange/testing/unit/tests/test_projection_linear.py

    r10645 r10710  
    1010from Orange.projection import linear 
    1111 
     12random.seed(0) 
     13 
    1214def normalize(a): 
    1315    a = a if isinstance(a, np.ndarray) else np.array(a) 
     
    2729    return domain, d 
    2830 
     31def create_base(ncomponents, m): 
     32    vector = np.random.randint(0,5,m) 
     33    while not vector.any(): 
     34        vector = np.random.randint(0,5,m) 
     35    principal_components = np.zeros((ncomponents, m)) 
     36    k = float(m) / ncomponents 
     37    to = 0 
     38    for i in range(1,ncomponents): 
     39        from_, to = int(k*(i-1)), int(k*i) 
     40        principal_components[i-1,from_:to] = vector[from_:to] 
     41    principal_components[ncomponents-1,to:m] = normalize(vector[to:m]) 
     42    return principal_components 
    2943 
    3044 
    3145class TestPca(unittest.TestCase): 
    32     def create_normal_dataset(self): 
    33         self.principal_component = normalize([random.randint(0, 5) for _ in range(10)]) 
    34         self.dataset = data.Table(*prepare_dataset(components=[self.principal_component])) 
    35  
    36     def create_wide_dataset(self): 
    37         self.principal_component = normalize([random.randint(0, 5) for _ in range(250)]) 
    38         self.dataset = data.Table(*prepare_dataset(components=[self.principal_component])) 
     46    def create_dataset(self, ncomponents=3, m=10): 
     47        self.principal_components = create_base(ncomponents, m) 
     48        self.dataset = data.Table(*prepare_dataset(components=self.principal_components)) 
    3949 
    4050    def create_empty_dataset(self): 
     
    5464 
    5565 
    56     def test_pca_on_normal_data(self): 
    57         self.create_normal_dataset() 
    58  
    59         pca = linear.Pca(standardize=False)(self.dataset) 
    60         self.assertIsInstance(pca, linear.PcaProjector) 
    61  
    62         absolute_error = (np.abs(pca.projection[0]) - np.abs(self.principal_component)).sum() 
    63         self.assertAlmostEqual(absolute_error, 0.) 
    64  
    65     def test_pca_on_wide_data(self): 
    66         self.create_wide_dataset() 
    67  
    68         pca = linear.Pca(standardize=False)(self.dataset) 
    69         self.assertIsInstance(pca, linear.PcaProjector) 
    70  
    71         absolute_error = (np.abs(pca.projection[0]) - np.abs(self.principal_component)).sum() 
    72         self.assertAlmostEqual(absolute_error, 0., 1) 
     66    def test_pca(self): 
     67        for m in (10, 250): 
     68            self.create_dataset(m=m) 
     69 
     70            pca = linear.Pca(standardize=False)(self.dataset) 
     71 
     72            self.assertInCorrectSpace(pca.projection[pca.variances > 0.01, :]) 
     73            for v in pca.projection: 
     74                # projections vectors should be normalized 
     75                self.assertAlmostEqual(np.linalg.norm(v), 1.) 
     76 
     77            # Components should have decreasing variants 
     78            self.assertListEqual(pca.variances.tolist(), sorted(pca.variances, reverse=True)) 
    7379 
    7480    def test_pca_with_standardization(self): 
    75         self.create_normal_dataset() 
     81        self.create_dataset(ncomponents=1) 
    7682 
    7783        pca = linear.Pca(standardize=True)(self.dataset) 
    78         eigen_vector = pca.projection[0] 
    79         non_zero_elements = eigen_vector[eigen_vector.nonzero()] 
     84        projection = pca.projection[0] 
     85        non_zero_elements = projection[projection.nonzero()] 
    8086 
    8187        # since values in all dimensions are normally distributed, dimensions should be treated as equally important 
     
    8389 
    8490    def test_pca_with_variance_covered(self): 
    85         self.create_normal_dataset() 
     91        ncomponents = 3 
     92        self.create_dataset(ncomponents=ncomponents) 
    8693 
    8794        pca = linear.Pca(variance_covered=.99)(self.dataset) 
    88         # all data points lie in one dimension, one component should cover all the variance 
     95 
    8996        nvectors, vector_dimension = pca.projection.shape 
    90         self.assertEqual(nvectors, 1) 
     97        self.assertEqual(nvectors, ncomponents) 
    9198 
    9299    def test_pca_with_max_components(self): 
    93         self.create_normal_dataset() 
    94100        max_components = 3 
     101        self.create_dataset(ncomponents = max_components + 3) 
    95102 
    96103        pca = linear.Pca(max_components=max_components)(self.dataset) 
    97         # all data points lie in one dimension, one component should cover all the variance 
     104 
    98105        nvectors, vector_dimension = pca.projection.shape 
    99106        self.assertEqual(nvectors, max_components) 
     
    102109        self.create_dataset_with_unknowns() 
    103110 
    104         pca = linear.Pca()(self.dataset) 
    105  
    106  
     111        linear.Pca()(self.dataset) 
     112 
     113    def test_total_variance_remains_the_same(self): 
     114        for m in (10, 250): 
     115            self.create_dataset(m=m) 
     116 
     117            pca = linear.Pca()(self.dataset) 
     118 
     119            self.assertAlmostEqual(pca.variance_sum, pca.variances.sum()) 
     120            self.assertAlmostEqual(pca.variance_sum, (self.principal_components != 0).sum()) 
    107121 
    108122    def test_pca_on_empty_data(self): 
     
    117131        with self.assertRaises(ValueError): 
    118132            linear.Pca()(self.dataset) 
     133 
     134    def assertInCorrectSpace(self, vectors): 
     135        vectors = vectors.copy() 
     136        for component in self.principal_components: 
     137            i = component.nonzero()[0][0] 
     138            coef = vectors[:,i] / component[i] 
     139            vectors -= np.dot(coef.reshape(-1, 1), component.reshape(1, -1)) 
     140 
     141        for vector in vectors: 
     142            for value in vector: 
     143                self.assertAlmostEqual(value, 0.) 
    119144 
    120145 
Note: See TracChangeset for help on using the changeset viewer.