source: orange/Orange/testing/unit/tests/test_projection_linear.py @ 10612:85f3705b313d

Revision 10612:85f3705b313d, 5.3 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Updated linear projectors to include class data in projected domain. Also added get_value_from to new features.

Line 
1try:
2    import unittest2 as unittest
3except:
4    import unittest
5
6import numpy as np
7import random
8
9from Orange import data, feature
10from Orange.projection import linear
11
12def normalize(a):
13    a = a if isinstance(a, np.ndarray) else np.array(a)
14    return a / np.linalg.norm(a)
15
16datasets = None
17
18def prepare_dataset(components=((),), n=150):
19    components = components if isinstance(components, np.ndarray) else np.array(components)
20
21    ncomponents, m = components.shape
22    coefficients = np.random.normal(0., 1., (n, ncomponents))
23
24    d = np.dot(coefficients, components)
25
26    domain = data.Domain([feature.Continuous("A%d" % i) for i in range(m)], False)
27    return domain, d
28
29
30class TestPca(unittest.TestCase):
31    def create_normal_dataset(self):
32        self.principal_component = normalize([random.randint(0, 5) for _ in range(10)])
33        self.dataset = data.Table(*prepare_dataset(components=[self.principal_component]))
34
35    def create_wide_dataset(self):
36        self.principal_component = normalize([random.randint(0, 5) for _ in range(250)])
37        self.dataset = data.Table(*prepare_dataset(components=[self.principal_component]))
38
39    def create_empty_dataset(self):
40        self.dataset = data.Table(*prepare_dataset(components=([0, 0, 0, 0, 0],), n=0))
41
42    def create_constant_dataset(self):
43        self.dataset = data.Table(*prepare_dataset(components=([0, 0, 0, 0, 0],)))
44
45    def create_dataset_with_classes(self):
46        domain, features = prepare_dataset(components=[[random.randint(0, 5) for _ in range(10)]])
47        domain = data.Domain(domain.features,
48                             feature.Discrete("C", values=["F", "T"]),
49                             class_vars=[feature.Discrete("MC%i" % i, values=["F", "T"]) for i in range(4)])
50
51        self.dataset = data.Table(domain, np.hstack((features, np.random.random((len(features), 5)))))
52
53
54    def test_pca_on_normal_data(self):
55        self.create_normal_dataset()
56
57        pca = linear.Pca(standardize=False)(self.dataset)
58        self.assertIsInstance(pca, linear.PcaProjector)
59
60        absolute_error = (np.abs(pca.eigen_vectors[0]) - np.abs(self.principal_component)).sum()
61        self.assertAlmostEqual(absolute_error, 0.)
62
63    def test_pca_on_wide_data(self):
64        self.create_wide_dataset()
65
66        pca = linear.Pca(standardize=False)(self.dataset)
67        self.assertIsInstance(pca, linear.PcaProjector)
68
69        absolute_error = (np.abs(pca.eigen_vectors[0]) - np.abs(self.principal_component)).sum()
70        self.assertAlmostEqual(absolute_error, 0., 1)
71
72    def test_pca_with_standardization(self):
73        self.create_normal_dataset()
74
75        pca = linear.Pca(standardize=True)(self.dataset)
76        eigen_vector = pca.eigen_vectors[0]
77        non_zero_elements = eigen_vector[eigen_vector.nonzero()]
78
79        # since values in all dimensions are normally distributed, dimensions should be treated as equally important
80        self.assertAlmostEqual(non_zero_elements.min(), non_zero_elements.max())
81
82    def test_pca_with_variance_covered(self):
83        self.create_normal_dataset()
84
85        pca = linear.Pca(variance_covered=.99)(self.dataset)
86        # all data points lie in one dimension, one component should cover all the variance
87        nvectors, vector_dimension = pca.eigen_vectors.shape
88        self.assertEqual(nvectors, 1)
89
90    def test_pca_with_max_components(self):
91        self.create_normal_dataset()
92        max_components = 3
93
94        pca = linear.Pca(max_components=max_components)(self.dataset)
95        # all data points lie in one dimension, one component should cover all the variance
96        nvectors, vector_dimension = pca.eigen_vectors.shape
97        self.assertEqual(nvectors, max_components)
98
99    def test_pca_converts_domain(self):
100        self.create_dataset_with_classes()
101        pca = linear.Pca(variance_covered=.99)(self.dataset)
102
103        projected_data = pca(self.dataset)
104        converted_data = data.Table(projected_data.domain, self.dataset)
105
106        self.assertItemsEqual(projected_data, converted_data)
107
108    def test_pca_converts_classless_domain(self):
109        self.create_normal_dataset()
110        pca = linear.Pca(variance_covered=.99)(self.dataset)
111
112        projected_data = pca(self.dataset)
113        converted_data = data.Table(projected_data.domain, self.dataset)
114
115        self.assertItemsEqual(projected_data, converted_data)
116
117    def test_pca_keeps_class_vars(self):
118        self.create_dataset_with_classes()
119
120        pca = linear.Pca(variance_covered=.99)(self.dataset)
121        projected_data = pca(self.dataset)
122
123        self.assertIn(self.dataset.domain.class_var, projected_data.domain)
124        for class_ in self.dataset.domain.class_vars:
125            self.assertIn(class_, projected_data.domain)
126        for ex1, ex2 in zip(self.dataset, projected_data):
127            self.assertEqual(ex1.get_class(), ex2.get_class())
128            for v1, v2 in zip(ex1.get_classes(), ex2.get_classes()):
129                self.assertEqual(v2, v2)
130
131
132    def test_pca_on_empty_data(self):
133        self.create_empty_dataset()
134
135        with self.assertRaises(ValueError):
136            linear.Pca()(self.dataset)
137
138    def test_pca_on_only_constant_features(self):
139        self.create_constant_dataset()
140
141        with self.assertRaises(ValueError):
142            linear.Pca()(self.dataset)
143
144
145class TestFda(unittest.TestCase):
146    pass
147
148if __name__ == '__main__':
149    unittest.main()
150
Note: See TracBrowser for help on using the repository browser.