source: orange/Orange/testing/unit/tests/test_projection_linear.py @ 10644:68e7c096ec40

Revision 10644:68e7c096ec40, 6.8 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Fixed handling of unknown values.

Line 
1try:
2    import unittest2 as unittest
3except:
4    import unittest
5
6import numpy as np
7import random
8
9from Orange import data, feature
10from Orange.projection import linear
11
12def normalize(a):
13    a = a if isinstance(a, np.ndarray) else np.array(a)
14    return a / np.linalg.norm(a)
15
16datasets = None
17
18def prepare_dataset(components=((),), n=150):
19    components = components if isinstance(components, np.ndarray) else np.array(components)
20
21    ncomponents, m = components.shape
22    coefficients = np.random.normal(0., 1., (n, ncomponents))
23
24    d = np.dot(coefficients, components)
25
26    domain = data.Domain([feature.Continuous("A%d" % i) for i in range(m)], False)
27    return domain, d
28
29
30
31class TestPca(unittest.TestCase):
32    def create_normal_dataset(self):
33        self.principal_component = normalize([random.randint(0, 5) for _ in range(10)])
34        self.dataset = data.Table(*prepare_dataset(components=[self.principal_component]))
35
36    def create_wide_dataset(self):
37        self.principal_component = normalize([random.randint(0, 5) for _ in range(250)])
38        self.dataset = data.Table(*prepare_dataset(components=[self.principal_component]))
39
40    def create_empty_dataset(self):
41        self.dataset = data.Table(*prepare_dataset(components=([0, 0, 0, 0, 0],), n=0))
42
43    def create_constant_dataset(self):
44        self.dataset = data.Table(*prepare_dataset(components=([0, 0, 0, 0, 0],)))
45
46    def create_dataset_with_unknowns(self, percentage=0.05):
47        self.principal_component = normalize([random.randint(0, 5) for _ in range(10)])
48        self.dataset = data.Table(*prepare_dataset(components=[self.principal_component]))
49
50        for ex in self.dataset:
51            for i, _ in enumerate(ex):
52                if random.random() < percentage:
53                    ex[i] = "?"
54
55
56    def test_pca_on_normal_data(self):
57        self.create_normal_dataset()
58
59        pca = linear.Pca(standardize=False)(self.dataset)
60        self.assertIsInstance(pca, linear.PcaProjector)
61
62        absolute_error = (np.abs(pca.eigen_vectors[0]) - np.abs(self.principal_component)).sum()
63        self.assertAlmostEqual(absolute_error, 0.)
64
65    def test_pca_on_wide_data(self):
66        self.create_wide_dataset()
67
68        pca = linear.Pca(standardize=False)(self.dataset)
69        self.assertIsInstance(pca, linear.PcaProjector)
70
71        absolute_error = (np.abs(pca.eigen_vectors[0]) - np.abs(self.principal_component)).sum()
72        self.assertAlmostEqual(absolute_error, 0., 1)
73
74    def test_pca_with_standardization(self):
75        self.create_normal_dataset()
76
77        pca = linear.Pca(standardize=True)(self.dataset)
78        eigen_vector = pca.eigen_vectors[0]
79        non_zero_elements = eigen_vector[eigen_vector.nonzero()]
80
81        # since values in all dimensions are normally distributed, dimensions should be treated as equally important
82        self.assertAlmostEqual(non_zero_elements.min(), non_zero_elements.max())
83
84    def test_pca_with_variance_covered(self):
85        self.create_normal_dataset()
86
87        pca = linear.Pca(variance_covered=.99)(self.dataset)
88        # all data points lie in one dimension, one component should cover all the variance
89        nvectors, vector_dimension = pca.eigen_vectors.shape
90        self.assertEqual(nvectors, 1)
91
92    def test_pca_with_max_components(self):
93        self.create_normal_dataset()
94        max_components = 3
95
96        pca = linear.Pca(max_components=max_components)(self.dataset)
97        # all data points lie in one dimension, one component should cover all the variance
98        nvectors, vector_dimension = pca.eigen_vectors.shape
99        self.assertEqual(nvectors, max_components)
100
101    def test_pca_handles_unknowns(self):
102        self.create_dataset_with_unknowns()
103        print self.dataset[0]
104
105        pca = linear.Pca()(self.dataset)
106
107
108
109    def test_pca_on_empty_data(self):
110        self.create_empty_dataset()
111
112        with self.assertRaises(ValueError):
113            linear.Pca()(self.dataset)
114
115    def test_pca_on_only_constant_features(self):
116        self.create_constant_dataset()
117
118        with self.assertRaises(ValueError):
119            linear.Pca()(self.dataset)
120
121
122class TestProjector(unittest.TestCase):
123    def create_normal_dataset(self):
124        self.principal_component = normalize([random.randint(0, 5) for _ in range(10)])
125        self.dataset = data.Table(*prepare_dataset(components=[self.principal_component]))
126
127    def create_dataset_with_classes(self):
128        domain, features = prepare_dataset(components=[[random.randint(0, 5) for _ in range(10)]])
129        domain = data.Domain(domain.features,
130                             feature.Discrete("C", values=["F", "T"]),
131                             class_vars=[feature.Discrete("MC%i" % i, values=["F", "T"]) for i in range(4)])
132
133        self.dataset = data.Table(domain, np.hstack((features, np.random.random((len(features), 5)))))
134
135
136    def test_projected_domain_can_convert_data_with_class(self):
137        self.create_dataset_with_classes()
138        projector = linear.Pca(variance_covered=.99)(self.dataset)
139
140        projected_data = projector(self.dataset)
141        converted_data = data.Table(projected_data.domain, self.dataset)
142
143        self.assertItemsEqual(projected_data, converted_data)
144
145    def test_projected_domain_can_convert_data_without_class(self):
146        self.create_normal_dataset()
147        projector = linear.Pca(variance_covered=.99)(self.dataset)
148
149        projected_data = projector(self.dataset)
150        converted_data = data.Table(projected_data.domain, self.dataset)
151
152        self.assertItemsEqual(projected_data, converted_data)
153
154    def test_projected_domain_contains_class_vars(self):
155        self.create_dataset_with_classes()
156
157        projector = linear.Pca(variance_covered=.99)(self.dataset)
158        projected_data = projector(self.dataset)
159
160        self.assertIn(self.dataset.domain.class_var, projected_data.domain)
161        for class_ in self.dataset.domain.class_vars:
162            self.assertIn(class_, projected_data.domain)
163        for ex1, ex2 in zip(self.dataset, projected_data):
164            self.assertEqual(ex1.get_class(), ex2.get_class())
165            for v1, v2 in zip(ex1.get_classes(), ex2.get_classes()):
166                self.assertEqual(v2, v2)
167
168
169    def test_projects_example(self):
170        self.create_normal_dataset()
171        projector = linear.Pca(variance_covered=.99)(self.dataset)
172
173        projector(self.dataset[0])
174
175    def test_projects_data_table(self):
176        self.create_normal_dataset()
177        projector = linear.Pca(variance_covered=.99)(self.dataset)
178
179        projector(self.dataset)
180
181    def test_converts_input_domain_if_needed(self):
182        self.create_normal_dataset()
183        projector = linear.Pca(variance_covered=.99)(self.dataset)
184
185        new_examples = data.Table(data.Domain(self.dataset.domain.features[:5]), [[1.,2.,3.,4.,5.]])
186
187        projector(new_examples)
188
189
190class TestFda(unittest.TestCase):
191    pass
192
193if __name__ == '__main__':
194    unittest.main()
195
Note: See TracBrowser for help on using the repository browser.