source: orange/Orange/testing/testing.py @ 10964:d65c874966b9

Revision 10964:d65c874966b9, 19.9 KB checked in by Ales Erjavec <ales.erjavec@…>, 21 months ago (diff)

Test that the returned distribution has the proper variable (class_var).

Line 
1"""\
2Orange unit testing
3===================
4
5This module contains some classes in common use by Orange unit testing
6framework. In particular its most useful feature is the BaseTestOnData
7(along with ``test_on_data`` function and ``datasets_driven`` class decorators)
8class for automating data driven tests.
9 
10         
11Example of use ::
12
13    from Orange.testing import testing
14    import unittest
15   
16    data = [("one", 1),
17            ("two", 2)]
18   
19    # Data driven with data_iter argument
20    # data must be reiterable multiple times if more than one test member defined
21    @data_driven(data_iter=data)
22    class TestDemo(unittest.TestCase):
23        @test_on_data
24        def test_instance_on(self, arg):
25            self.assertIsInstance(arg, int)
26           
27        @test_on_data
28        def test_add(self, arg):
29            res = arg + arg
30           
31    # data_driven without argument
32    @data_driven
33    class TestDemo1(unittest.TestCase):
34        @test_on_data(data_iter=data)
35        def test_instance_on(self, arg):
36            self.assertIsInstance(arg, int)
37           
38        @test_on_data(data_iter=data)
39        def test_add(self, arg):
40            res = arg + arg
41   
42    # data_driven without arg, using a static data_iter method
43    @data_driven
44    class TestDemo1(unittest.TestCase):
45        @test_on_data
46        def test_instance_on(self, arg):
47            self.assertIsInstance(arg, int)
48           
49        @test_on_data
50        def test_add(self, arg):
51            res = arg + arg
52           
53        @staticmethod
54        def data_iter():
55            yield "iris", Orange.data.Table("doc:iris")
56       
57    #@data_driven(data_iter=testing.datasets_iter(testing.CLASSIFICATION_DATASETS | testing.CLASSLES_DATASETS))
58    @datasets_driven(data_iter=testing.CLASSIFICATION_DATASETS |\
59                     testing.CLASSLESS_DATASETS)
60    class TestDefaultLearner(unittest.TestCase):
61        @test_on_data
62        def test_learner_on(self, dataset):
63            import Orange
64            Orange.classifcation.majority.MajorityLearner(dataset)
65           
66        # this overloads the class decorator's flags
67        @test_on_datasets(testing.CLASSLES_DATASETS)
68        def test_raise_missing_class_on(self, dataset):
69            import Orange
70            Orange.classifcation.majority.MajorityLearner(dataset)
71       
72"""
73from __future__ import absolute_import
74try:
75    # on python 2.6
76    import unittest2 as unittest
77    import pickle
78except:
79    import unittest
80    import cPickle as pickle
81
82import os, sys
83from functools import wraps
84import itertools
85from functools import partial
86
87import orange
88from Orange.data import preprocess
89
90TEST_CLASSIFICATION = 1
91TEST_REGRESSION = 2
92TEST_PICKLE = 4
93
94TEST_ALL = 7
95TEST_ALL_CLASSIFICATION = TEST_ALL - TEST_REGRESSION
96TEST_ALL_REGRESSION = TEST_ALL - TEST_ALL_CLASSIFICATION
97
98TEST_CLASSLESS = 8
99
100DISCRETIZE_DOMAIN = 16
101CONTINUIZE_DOMAIN = 32
102
103datasetsdir = os.path.join(os.path.dirname(orange.__file__), "doc", "datasets")
104
105def open_data(name, flags=0):
106    """ Open a named data-set return it.
107    """
108    dataset = orange.ExampleTable(os.path.join(datasetsdir, name))
109    if flags & CONTINUIZE_DOMAIN:
110        preprocessor = preprocess.Continuize()
111        dataset = preprocessor(dataset)
112    elif flags & DISCRETIZE_DOMAIN:
113        preprocessor = preprocess.Discretize(method=orange.EquiNDiscretization(),
114                                               discretize_class=False)
115        dataset = preprocessor(dataset)
116    dataset.name = name
117    return dataset
118
119CLASSIFICATION_DATASETS = ["iris", "brown-selected", "lenses", "monks-1"]
120REGRESSION_DATASETS = ["housing", "auto-mpg", "servo"]
121CLASSLES_DATASETS = ["water-treatment"]
122ALL_DATASETS = CLASSIFICATION_DATASETS + REGRESSION_DATASETS + CLASSLES_DATASETS
123
124
125from collections import namedtuple
126ExtraArgs = namedtuple("ExtraArgs", "args kwargs")
127
128
129def _expanded(func, name, extra_args):
130    """ Return an expanded function name and the function itself.
131    """
132    from functools import wraps
133    if isinstance(extra_args, ExtraArgs):
134        extra_args, extra_kwargs = extra_args
135    else:
136        extra_kwargs = {}
137    @wraps(func)
138    def expanded(*fixed_args, **fixed_kwargs):
139        call = partial(partial(func, *fixed_args, **fixed_kwargs), *extra_args, **extra_kwargs)
140        return call()
141#    expanded = partial(func, args, kwargs)
142#    expanded = wraps(func)(expanded)
143    newname = func.__name__ + "_" + name.replace("-", "_")
144    expanded.__name__ = newname
145    expanded.__doc__ = None
146    return newname, expanded
147
148def _expanded_lazy(func, name, args_getter):
149    """ Return an expanded function name and the function itself.
150    arge_getter must return the expanded arguments when called.
151   
152    """
153    from functools import wraps
154    @wraps(func)
155    def expanded(*fixed_args, **kwargs):
156        kwargs = kwargs.copy()
157        extra_args = args_getter()
158        if isinstance(extra_args, ExtraArgs):
159            extra_args, extra_kwargs = extra_args
160        else:
161            extra_kwargs = {}
162        call = partial(partial(func, fixed_args, kwargs), extra_args, extra_kwargs)
163        return call()
164
165    newname = func.__name__ + "_" + name.replace("-", "_")
166    expanded.__name__ = newname
167    expanded.__doc__ = None
168    return newname, expanded
169
170
171def _data_driven_cls_decorator(cls, data_iter=None, lazy=False):
172    """ A class decorator that expands TestCase subclass
173    methods decorated with `test_on_data` or `data_driven`
174    decorator.
175   
176    """
177    if data_iter is None:
178        data_iter = getattr(cls, "data_iter", None) # data_iter should be a staticmethod or classmethod
179        if data_iter is not None:
180            data_iter = data_iter()
181
182    if data_iter is not None:
183        data_iter = list(data_iter) # Because it needs to be iterated multiple times (each member not overriding it)
184
185    for test_name in dir(cls):
186        val = getattr(cls, test_name)
187        if hasattr(val, "_data_iter"):
188            member_data_iter = val._data_iter
189            if member_data_iter is None or member_data_iter == (None, False):
190                member_data_iter, lazy_iter = data_iter, lazy
191            else:
192                if isinstance(member_data_iter, tuple):
193                    member_data_iter, lazy_iter = member_data_iter
194                else:
195                    lazy_iter = lazy
196
197            assert(member_data_iter is not None)
198            for name, expand_args in iter(member_data_iter):
199                if lazy:
200                    newname, expanded = _expanded_lazy(val, name, expand_args)
201                else:
202                    newname, expanded = _expanded(val, name, expand_args)
203
204                setattr(cls, newname, expanded)
205            setattr(cls, test_name, None)
206            #setattr(cls, "__" + test_name, val)
207    return cls
208
209def data_driven(cls=None, data_iter=None):
210    """ Class decorator for building data driven test cases.
211   
212    :param data_iter: An iterator supplying the names and arguments for
213        the expanded test.
214   
215    Example ::
216   
217        data_for_tests = [("one", (1, )), ("two", (2, ))]
218       
219        @data_driven(data_iter=data_for_tests)
220        class MyTestCase(unittest.TestCase):
221            @test_on_data
222            def test_add_on(self, number):
223                number + number
224               
225    The tests are then accessible from the command line ::
226   
227        python -m unittest MyTestCase.MyTestCase.test_add_on_one
228       
229    """
230    if data_iter is not None:
231        #Used as
232        # @data_driven(data_iter=...)
233        # class ...
234        return partial(_data_driven_cls_decorator, data_iter=data_iter)
235    elif cls is not None:
236        #Used as
237        # @data_driven
238        # class ...
239        return _data_driven_cls_decorator(cls)
240
241
242
243def data_driven_lazy(cls=None, data_iter=None):
244    if lazy_data_iter is not None:
245        #Used as
246        # @data_driven_lazy(data_iter= ...)
247        # class ...
248        return partial(_data_driven_cls_decorator, data_iter=data_iter, lazy=True)
249    elif cls is not None:
250        #Used as
251        # @data_driven_lazy
252        # class ...
253        return _data_driven_cls_decorator(cls, lazy=True)
254
255def test_on_data(test_func=None, data_iter=None):
256    """ Decorator for test member of unittest.TestCase, signaling that it
257    wants to be expanded (replicated) on each test's data case. This decorator
258    accepts an optional parameter (an data case iterator, see
259    `Data Iterators`_) which overrides the iterator passed to
260    :obj:`data_driven` decorator.
261   
262    Example ::
263   
264        @data_driven
265        class MyTestCase(TestCase):
266            @test_on_data(datasets_iterator())
267            def test_on(self, data)
268                ''' This will be a separate test case for each data-set
269                instance.
270                '''
271                print data.name
272               
273    .. note:: The actual expanding is done by `data_driven` class decorator.
274   
275    .. note:: Within the unittest framework `test_on` test will be expanded
276        to `test_on_iris`, `test_on_lenses` ... for each dataset returned
277        by :obj:`datasets_iterator`. You can then run individual tests from
278        the command line (requires Python 2.7) ::
279                   
280           python -m unittest mymodule.MyTestCase.test_on_iris
281   
282    """
283    def set_iter(func):
284        func._data_iter = data_iter, False
285        return func
286
287    if data_iter is not None:
288        return set_iter
289    else:
290        return set_iter(test_func)
291
292
293def test_on_data_lazy(test_func=None, data_iter=None):
294    """ Same as :func:`test_on_data` except the ``data_iter`` is
295    interpreted as a lazy data iterator (see `Data Iterators`_).
296   
297    """
298    def set_iter(func):
299        func._data_iter = data_iter, True
300        return func
301
302    if data_iter is not None:
303        return set_iter
304    else:
305        return set_iter(test_func)
306
307
308def datasets_iter(datasets=ALL_DATASETS, preprocess=0):
309    for name in datasets:
310        data = open_data(name, flags=preprocess)
311        name = name.replace("-", "_")
312        yield name, (data,)
313
314
315def datasets_iter_lazy(datasets=ALL_DATASETS, preprocess=0):
316    for name in datasets:
317        data = lambda : (open_data(name, flags=preprocess),)
318        name = name.replace("-", "_")
319        yield name, data
320
321
322def test_on_datasets(test_func=None, datasets=ALL_DATASETS):
323    """ same as ``test_on_data(data_iter=datasests_iter(datasets))``
324    """
325    if test_func is None:
326        return test_on_data(data_iter=datasets_iter(datasets))
327    else:
328        return test_on_data(data_iter=datasets_iter(datasets))(test_func)
329
330
331def datasets_driven(cls=None, datasets=ALL_DATASETS, preprocess=0):
332    """ same as ```data_driven(data_iter=datasets_iter(datasets)```
333    """
334    if  cls is None:
335        return data_driven(data_iter=datasets_iter(datasets, preprocess))
336    else:
337        return data_driven(data_iter=datasets_iter(datasets, preprocess))(cls)
338
339
340class DataTestCase(unittest.TestCase):
341    """ Base class for data driven tests.
342    """
343
344import Orange
345from Orange.evaluation import testing as _testing
346from Orange.evaluation import scoring as _scoring
347from Orange.core import MakeRandomIndices2 as _MakeRandomIndices2
348
349
350class LearnerTestCase(DataTestCase):
351    """ A basic test class for orange learner class. Must define
352    class variable `LEARNER` in a subclass or define the proper
353    setUp method which sets ``self.learner``.
354   
355    """
356
357    LEARNER = None
358
359    def setUp(self):
360        """ Set up the learner for the test from the ``LEARNER`` class member.
361        """
362        self.learner = self.LEARNER
363
364    @test_on_data
365    def test_learner_on(self, dataset):
366        """ Default test case for Orange learners.
367        """
368        if isinstance(dataset.domain.class_var, Orange.feature.Discrete):
369            indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset)
370        else:
371            indices = _MakeRandomIndices2(p0=0.3)(dataset)
372        learn = dataset.select(indices, 1)
373        test = dataset.select(indices, 0)
374
375        classifier = self.learner(learn)
376
377        # Test for classVar
378        self.assertTrue(hasattr(classifier, "class_var"))
379        self.assertIs(classifier.class_var, dataset.domain.class_var)
380
381        res = _testing.test_on_data([classifier], test)
382
383        for ex in test:
384            self.assertIsInstance(classifier(ex, Orange.core.GetValue),
385                                  Orange.core.Value)
386            self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities),
387                                  Orange.core.Distribution)
388
389            value, dist = classifier(ex, Orange.core.GetBoth)
390
391            self.assertIsInstance(value, Orange.core.Value)
392            self.assertIsInstance(dist, Orange.core.Distribution)
393
394            self.assertIs(dist.variable, classifier.class_var)
395
396            if isinstance(dist, Orange.core.ContDistribution):
397                dist_sum = sum(dist.values())
398            else:
399                dist_sum = sum(dist)
400
401            self.assertGreater(dist_sum, 0.0)
402            self.assertLess(abs(dist_sum - 1.0), 1e-3)
403
404            # just for fun also test this
405#            self.assertLess(abs(dist_sum - dist.abs), 1e-3)
406            # not fun because it fails
407
408        # Store classifier for possible use in subclasses
409        self.classifier = classifier
410
411    @test_on_data
412    def test_pickling_on(self, dataset):
413        """ Test learner and classifier pickling.
414        """
415        classifier = self.learner(dataset)
416
417        s = pickle.dumps(classifier)
418        classifier_clone = pickle.loads(s)
419
420        indices = orange.MakeRandomIndices2(p0=20)(dataset)
421        test = dataset.select(indices, 0)
422
423        for ex in test:
424            if isinstance(dataset.domain.class_var, Orange.feature.Continuous):
425                # Test to third digit after the decimal point
426                self.assertAlmostEqual(classifier(ex, orange.GetValue).native(),
427                                       classifier_clone(ex, orange.GetValue).native(),
428                                       min(3, dataset.domain.class_var.number_of_decimals),
429                                       "Pickled and original classifier return a different value!")
430            else:
431                self.assertEqual(classifier(ex, orange.GetValue),
432                                 classifier_clone(ex, orange.GetValue),
433                                 "Pickled and original classifier return a different value!")
434
435
436class MeasureAttributeTestCase(DataTestCase):
437    """ Test orange MeasureAttribute subclass.
438   
439    .. todo:: Test if measures respect `handlesDiscrete`, `handlesContinuous`
440        `computesThresholds`, `needs` (raise the appropriate exception). Test
441        `thresholdFunction`.
442    """
443    MEASURE = None
444    """ MEASURE must be defined in the subclass
445    """
446
447    def setUp(self):
448        self.measure = self.MEASURE
449
450    @test_on_data
451    def test_measure_attribute_on(self, data):
452        """ Default test for attribute measures.
453        """
454        scores = []
455        for attr in data.domain.attributes:
456            score = self.measure(attr, data)
457#            self.assertTrue(score >= 0.0)
458            scores.append(score)
459        # any scores actually non zero
460        self.assertTrue(any(score > 0.0 for score in scores))
461
462
463    def test_pickle(self):
464        """ Test attribute measure pickling support.
465        """
466        s = pickle.dumps(self.measure)
467        measure = pickle.loads(s)
468        # TODO: make sure measure computes the same scores as measure
469
470
471class PreprocessorTestCase(DataTestCase):
472    """ Test orange.Preprocessor subclass
473   
474    """
475    PREPROCESSOR = None
476
477    def setUp(self):
478        self.preprocessor = self.PREPROCESSOR
479
480    @test_on_data
481    def test_preprocessor_on(self, dataset):
482        """ Test preprocessor on dataset
483        """
484        newdata = self.preprocessor(dataset)
485
486    def test_pickle(self):
487        """ Test preprocessor pickling
488        """
489        if isinstance(self.preprocessor, type):
490            prep = self.preprocessor() # Test the default constructed
491            s = pickle.dumps(prep)
492            prep = pickle.loads(s)
493
494        s = pickle.dumps(self.preprocessor)
495        prep = pickle.loads(s)
496
497
498from Orange.distance import distance_matrix
499from Orange.utils import member_set
500
501
502class DistanceTestCase(DataTestCase):
503    """ Test orange.ExamplesDistance/Constructor
504    """
505    DISTANCE_CONSTRUCTOR = None
506
507    def setUp(self):
508        self.distance_constructor = self.DISTANCE_CONSTRUCTOR
509
510    @test_on_data
511    def test_distance_on(self, dataset):
512        import numpy
513        indices = orange.MakeRandomIndices2(dataset, min(20, len(dataset)))
514        dataset = dataset.select(indices, 0)
515        with member_set(self.distance_constructor, "ignore_class", True):
516            mat = distance_matrix(dataset, self.distance_constructor)
517
518        self.assertIsInstance(mat, Orange.misc.SymMatrix)
519        self.assertEqual(mat.dim, len(dataset))
520
521        m = numpy.array(list(mat))
522        self.assertTrue((m >= 0.0).all())
523
524        if dataset.domain.class_var:
525            with member_set(self.distance_constructor, "ignore_class", False):
526                try:
527                    mat = distance_matrix(dataset, self.distance_constructor)
528                except orange.KernelException, ex:
529                    if "not supported" in str(ex):
530                        return
531                    else:
532                        raise
533            m1 = numpy.array(list(mat))
534            self.assertTrue((m1 != m).all() or dataset, "%r does not seem to respect the 'ignore_class' flag")
535
536def test_case_script(path):
537    """ Return a TestCase instance from a script in `path`.
538    The script will be run in the directory it is in.
539   
540    :param path: The path to the script to test
541    :type path: str
542    """
543    dirname = os.path.dirname(os.path.realpath(path))
544    _dir = {}
545    def setUp():
546        _dir["cwd"] = os.path.realpath(os.curdir)
547        os.chdir(dirname)
548    def tearDown():
549        os.chdir(_dir["cwd"])
550
551    def runScript():
552        execfile(path, {})
553
554    runScript.__name__ = "runScript %s" % os.path.basename(path)
555    return unittest.FunctionTestCase(runScript, setUp=setUp, tearDown=tearDown)
556
557
558def test_suite_scripts(path):
559    """ Return a TestSuite for testing all scripts in a directory `path`
560   
561    :param path: Directory path
562    :type path: str
563    """
564    import glob
565    return unittest.TestSuite([test_case_script(os.path.join(path, name)) for name in glob.glob1(path, "*.py")])
566
567
568_default_run = unittest.TestCase.run
569def enable_pdb():
570    """ Enable the python pdb postmortem debugger to handle any
571    raised exception during the test for interactive debugging.
572   
573    For example you can examine exceptions in tests from ipython -pdb ::
574   
575        In [1]: import Orange.testing.testing as testing
576        In [2]: testing.enable_pdb()
577        In [3]: run tests/test_preprocessors.py
578        ---...
579        KernelException...
580        ipdb>
581       
582    .. warning:: This modifies the unittest.TestCase.run method
583   
584    """
585
586    def run(self, result=None):
587        if result is None:
588            result = self.defaultTestResult()
589        result.startTest(self)
590        testMethod = getattr(self, self._testMethodName)
591        try:
592            try:
593                self.setUp()
594                testMethod()
595                result.addSuccess(self)
596#            except self.failureException:
597#                result.addFailure(self, self._exc_info())
598            except KeyboardInterrupt:
599                raise
600            finally:
601                self.tearDown()
602        finally:
603            result.stopTest(self)
604
605    unittest.TestCase.run = run
606
607def disable_pdb():
608    """ Disables the python pdb postmortem debugger to handle
609    exceptions raised during test run.
610   
611    """
612    unittest.TestCase.run = _default_run
613
614try:
615    __IPYTHON__  #We are running tests from ipython
616    if getattr(__IPYTHON__.shell, "call_pdb", None): # Is pdb enabled
617        enable_pdb()
618except NameError:
619    pass
620
621
622def test_module(module):
623    """ A helper function to run all tests from a module. 
624    """
625    loader = unittest.TestLoader()
626    suite = loader.loadTestsFromModule(module)
627    runner = unittest.TextTestRunner()
628    return runner.run(suite)
Note: See TracBrowser for help on using the repository browser.