source: orange/Orange/misc/testing.py @ 10305:17f7fe8a9393

Revision 10305:17f7fe8a9393, 19.8 KB checked in by mstajdohar, 2 years ago (diff)

Testing pickle on ci server.

Line 
1"""\
2Orange unit testing
3===================
4
5This module contains some classes in common use by Orange unit testing
6framework. In particular its most useful feature is the BaseTestOnData
7(along with ``test_on_data`` function and ``datasets_driven`` class decorators)
8class for automating data driven tests.
9 
10         
11Example of use ::
12
13    from Orange.misc import testing
14    import unittest
15   
16    data = [("one", 1),
17            ("two", 2)]
18   
19    # Data driven with data_iter argument
20    # data must be reiterable multiple times if more than one test member defined
21    @data_driven(data_iter=data)
22    class TestDemo(unittest.TestCase):
23        @test_on_data
24        def test_instance_on(self, arg):
25            self.assertIsInstance(arg, int)
26           
27        @test_on_data
28        def test_add(self, arg):
29            res = arg + arg
30           
31    # data_driven without argument
32    @data_driven
33    class TestDemo1(unittest.TestCase):
34        @test_on_data(data_iter=data)
35        def test_instance_on(self, arg):
36            self.assertIsInstance(arg, int)
37           
38        @test_on_data(data_iter=data)
39        def test_add(self, arg):
40            res = arg + arg
41   
42    # data_driven without arg, using a static data_iter method
43    @data_driven
44    class TestDemo1(unittest.TestCase):
45        @test_on_data
46        def test_instance_on(self, arg):
47            self.assertIsInstance(arg, int)
48           
49        @test_on_data
50        def test_add(self, arg):
51            res = arg + arg
52           
53        @staticmethod
54        def data_iter():
55            yield "iris", Orange.data.Table("doc:iris")
56       
57    #@data_driven(data_iter=testing.datasets_iter(testing.CLASSIFICATION_DATASETS | testing.CLASSLES_DATASETS))
58    @datasets_driven(data_iter=testing.CLASSIFICATION_DATASETS |\
59                     testing.CLASSLESS_DATASETS)
60    class TestDefaultLearner(unittest.TestCase):
61        @test_on_data
62        def test_learner_on(self, dataset):
63            import Orange
64            Orange.classifcation.majority.MajorityLearner(dataset)
65           
66        # this overloads the class decorator's flags
67        @test_on_datasets(testing.CLASSLES_DATASETS)
68        def test_raise_missing_class_on(self, dataset):
69            import Orange
70            Orange.classifcation.majority.MajorityLearner(dataset)
71       
72"""
73from __future__ import absolute_import
74try:
75    # on python 2.6
76    import unittest2 as unittest
77    import pickle
78except:
79    import unittest
80    import cPickle as pickle
81
82import os, sys
83from functools import wraps
84import itertools
85from functools import partial
86
87import orange
88from Orange.preprocess import Preprocessor_discretize, Preprocessor_continuize
89
90TEST_CLASSIFICATION = 1
91TEST_REGRESSION = 2
92TEST_PICKLE = 4
93
94TEST_ALL = 7
95TEST_ALL_CLASSIFICATION = TEST_ALL - TEST_REGRESSION
96TEST_ALL_REGRESSION = TEST_ALL - TEST_ALL_CLASSIFICATION
97
98TEST_CLASSLESS = 8
99
100DISCRETIZE_DOMAIN = 16
101CONTINUIZE_DOMAIN = 32
102
103datasetsdir = os.path.join(os.path.dirname(orange.__file__), "doc", "datasets")
104
105def open_data(name, flags=0):
106    """ Open a named data-set return it.
107    """
108    dataset = orange.ExampleTable(os.path.join(datasetsdir, name))
109    if flags & CONTINUIZE_DOMAIN:
110        preprocessor = Preprocessor_continuize()
111        dataset = preprocessor(dataset)
112    elif flags & DISCRETIZE_DOMAIN:
113        preprocessor = Preprocessor_discretize(method=orange.EquiNDiscretization(),
114                                               discretize_class=False)
115        dataset = preprocessor(dataset)
116    dataset.name = name
117    return dataset
118
119CLASSIFICATION_DATASETS = ["iris", "brown-selected", "lenses", "monks-1"]
120REGRESSION_DATASETS = ["housing", "auto-mpg", "servo"]
121CLASSLES_DATASETS = ["water-treatment"]
122ALL_DATASETS = CLASSIFICATION_DATASETS + REGRESSION_DATASETS + CLASSLES_DATASETS
123
124
125from collections import namedtuple
126ExtraArgs = namedtuple("ExtraArgs", "args kwargs")
127
128
129def _expanded(func, name, extra_args):
130    """ Return an expanded function name and the function itself.
131    """
132    from functools import wraps
133    if isinstance(extra_args, ExtraArgs):
134        extra_args, extra_kwargs = extra_args
135    else:
136        extra_kwargs = {}
137    @wraps(func)
138    def expanded(*fixed_args, **fixed_kwargs):
139        call = partial(partial(func, *fixed_args, **fixed_kwargs), *extra_args, **extra_kwargs)
140        return call()
141#    expanded = partial(func, args, kwargs)
142#    expanded = wraps(func)(expanded)
143    newname = func.__name__ + "_" + name.replace("-", "_")
144    expanded.__name__ = newname
145    expanded.__doc__ = None
146    return newname, expanded
147
148def _expanded_lazy(func, name, args_getter):
149    """ Return an expanded function name and the function itself.
150    arge_getter must return the expanded arguments when called.
151   
152    """
153    from functools import wraps
154    @wraps(func)
155    def expanded(*fixed_args, **kwargs):
156        kwargs = kwargs.copy()
157        extra_args = args_getter()
158        if isinstance(extra_args, ExtraArgs):
159            extra_args, extra_kwargs = extra_args
160        else:
161            extra_kwargs = {}
162        call = partial(partial(func, fixed_args, kwargs), extra_args, extra_kwargs)
163        return call()
164
165    newname = func.__name__ + "_" + name.replace("-", "_")
166    expanded.__name__ = newname
167    expanded.__doc__ = None
168    return newname, expanded
169
170
171def _data_driven_cls_decorator(cls, data_iter=None, lazy=False):
172    """ A class decorator that expands TestCase subclass
173    methods decorated with `test_on_data` or `data_driven`
174    decorator.
175   
176    """
177    if data_iter is None:
178        data_iter = getattr(cls, "data_iter", None) # data_iter should be a staticmethod or classmethod
179        if data_iter is not None:
180            data_iter = data_iter()
181
182    if data_iter is not None:
183        data_iter = list(data_iter) # Because it needs to be iterated multiple times (each member not overriding it)
184
185    for test_name in dir(cls):
186        val = getattr(cls, test_name)
187        if hasattr(val, "_data_iter"):
188            member_data_iter = val._data_iter
189            if member_data_iter is None or member_data_iter == (None, False):
190                member_data_iter, lazy_iter = data_iter, lazy
191            else:
192                if isinstance(member_data_iter, tuple):
193                    member_data_iter, lazy_iter = member_data_iter
194                else:
195                    lazy_iter = lazy
196
197            assert(member_data_iter is not None)
198            for name, expand_args in iter(member_data_iter):
199                if lazy:
200                    newname, expanded = _expanded_lazy(val, name, expand_args)
201                else:
202                    newname, expanded = _expanded(val, name, expand_args)
203
204                setattr(cls, newname, expanded)
205            setattr(cls, test_name, None)
206            #setattr(cls, "__" + test_name, val)
207    return cls
208
209def data_driven(cls=None, data_iter=None):
210    """ Class decorator for building data driven test cases.
211   
212    :param data_iter: An iterator supplying the names and arguments for
213        the expanded test.
214   
215    Example ::
216   
217        data_for_tests = [("one", (1, )), ("two", (2, ))]
218       
219        @data_driven(data_iter=data_for_tests)
220        class MyTestCase(unittest.TestCase):
221            @test_on_data
222            def test_add_on(self, number):
223                number + number
224               
225    The tests are then accessible from the command line ::
226   
227        python -m unittest MyTestCase.MyTestCase.test_add_on_one
228       
229    """
230    if data_iter is not None:
231        #Used as
232        # @data_driven(data_iter=...)
233        # class ...
234        return partial(_data_driven_cls_decorator, data_iter=data_iter)
235    elif cls is not None:
236        #Used as
237        # @data_driven
238        # class ...
239        return _data_driven_cls_decorator(cls)
240
241
242
243def data_driven_lazy(cls=None, data_iter=None):
244    if lazy_data_iter is not None:
245        #Used as
246        # @data_driven_lazy(data_iter= ...)
247        # class ...
248        return partial(_data_driven_cls_decorator, data_iter=data_iter, lazy=True)
249    elif cls is not None:
250        #Used as
251        # @data_driven_lazy
252        # class ...
253        return _data_driven_cls_decorator(cls, lazy=True)
254
255def test_on_data(test_func=None, data_iter=None):
256    """ Decorator for test member of unittest.TestCase, signaling that it
257    wants to be expanded (replicated) on each test's data case. This decorator
258    accepts an optional parameter (an data case iterator, see
259    `Data Iterators`_) which overrides the iterator passed to
260    :obj:`data_driven` decorator.
261   
262    Example ::
263   
264        @data_driven
265        class MyTestCase(TestCase):
266            @test_on_data(datasets_iterator())
267            def test_on(self, data)
268                ''' This will be a separate test case for each data-set
269                instance.
270                '''
271                print data.name
272               
273    .. note:: The actual expanding is done by `data_driven` class decorator.
274   
275    .. note:: Within the unittest framework `test_on` test will be expanded
276        to `test_on_iris`, `test_on_lenses` ... for each dataset returned
277        by :obj:`datasets_iterator`. You can then run individual tests from
278        the command line (requires Python 2.7) ::
279                   
280           python -m unittest mymodule.MyTestCase.test_on_iris
281   
282    """
283    def set_iter(func):
284        func._data_iter = data_iter, False
285        return func
286
287    if data_iter is not None:
288        return set_iter
289    else:
290        return set_iter(test_func)
291
292
293def test_on_data_lazy(test_func=None, data_iter=None):
294    """ Same as :func:`test_on_data` except the ``data_iter`` is
295    interpreted as a lazy data iterator (see `Data Iterators`_).
296   
297    """
298    def set_iter(func):
299        func._data_iter = data_iter, True
300        return func
301
302    if data_iter is not None:
303        return set_iter
304    else:
305        return set_iter(test_func)
306
307
308def datasets_iter(datasets=ALL_DATASETS, preprocess=0):
309    for name in datasets:
310        data = open_data(name, flags=preprocess)
311        name = name.replace("-", "_")
312        yield name, (data,)
313
314
315def datasets_iter_lazy(datasets=ALL_DATASETS, preprocess=0):
316    for name in datasets:
317        data = lambda : (open_data(name, flags=preprocess),)
318        name = name.replace("-", "_")
319        yield name, data
320
321
322def test_on_datasets(test_func=None, datasets=ALL_DATASETS):
323    """ same as ``test_on_data(data_iter=datasests_iter(datasets))``
324    """
325    if test_func is None:
326        return test_on_data(data_iter=datasets_iter(datasets))
327    else:
328        return test_on_data(data_iter=datasets_iter(datasets))(test_func)
329
330
331def datasets_driven(cls=None, datasets=ALL_DATASETS, preprocess=0):
332    """ same as ```data_driven(data_iter=datasets_iter(datasets)```
333    """
334    if  cls is None:
335        return data_driven(data_iter=datasets_iter(datasets, preprocess))
336    else:
337        return data_driven(data_iter=datasets_iter(datasets, preprocess))(cls)
338
339
340class DataTestCase(unittest.TestCase):
341    """ Base class for data driven tests.
342    """
343
344import Orange
345from Orange.evaluation import testing as _testing
346from Orange.evaluation import scoring as _scoring
347from Orange.core import MakeRandomIndices2 as _MakeRandomIndices2
348
349
350class LearnerTestCase(DataTestCase):
351    """ A basic test class for orange learner class. Must define
352    class variable `LEARNER` in a subclass or define the proper
353    setUp method which sets ``self.learner``.
354   
355    """
356
357    LEARNER = None
358
359    def setUp(self):
360        """ Set up the learner for the test from the ``LEARNER`` class member.
361        """
362        self.learner = self.LEARNER
363
364    @test_on_data
365    def test_learner_on(self, dataset):
366        """ Default test case for Orange learners.
367        """
368        if isinstance(dataset.domain.class_var, Orange.feature.Discrete):
369            indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset)
370        else:
371            indices = _MakeRandomIndices2(p0=0.3)(dataset)
372        learn = dataset.select(indices, 1)
373        test = dataset.select(indices, 0)
374
375        classifier = self.learner(learn)
376
377        # Test for classVar
378        self.assertTrue(hasattr(classifier, "class_var"))
379        self.assertTrue(classifier.class_var is not None)
380
381        res = _testing.test_on_data([classifier], test)
382
383        for ex in test:
384            self.assertIsInstance(classifier(ex, Orange.core.GetValue), Orange.core.Value)
385            self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities), Orange.core.Distribution)
386
387            value, dist = classifier(ex, Orange.core.GetBoth)
388
389            self.assertIsInstance(value, Orange.core.Value)
390            self.assertIsInstance(dist, Orange.core.Distribution)
391
392            if isinstance(dist, Orange.core.ContDistribution):
393                dist_sum = sum(dist.values())
394            else:
395                dist_sum = sum(dist)
396
397            self.assertGreater(dist_sum, 0.0)
398            self.assertLess(abs(dist_sum - 1.0), 1e-3)
399
400            # just for fun also test this
401#            self.assertLess(abs(dist_sum - dist.abs), 1e-3)
402            # not fun because it fails
403
404        # Store classifier for possible use in subclasses
405        self.classifier = classifier
406
407    @test_on_data
408    def test_pickling_on(self, dataset):
409        """ Test learner and classifier pickling.
410        """
411        classifier = self.learner(dataset)
412
413        s = pickle.dumps(classifier)
414        classifier_clone = pickle.loads(s)
415
416        indices = orange.MakeRandomIndices2(p0=20)(dataset)
417        test = dataset.select(indices, 0)
418
419        for ex in test:
420            if isinstance(dataset.domain.class_var, Orange.feature.Continuous):
421                # Test to third digit after the decimal point
422                self.assertAlmostEqual(classifier(ex, orange.GetValue).native(),
423                                       classifier_clone(ex, orange.GetValue).native(),
424                                       min(3, dataset.domain.class_var.number_of_decimals),
425                                       "Pickled and original classifier return a different value!")
426            else:
427                self.assertEqual(classifier(ex, orange.GetValue),
428                                 classifier_clone(ex, orange.GetValue),
429                                 "Pickled and original classifier return a different value!")
430
431
432class MeasureAttributeTestCase(DataTestCase):
433    """ Test orange MeasureAttribute subclass.
434   
435    .. todo:: Test if measures respect `handlesDiscrete`, `handlesContinuous`
436        `computesThresholds`, `needs` (raise the appropriate exception). Test
437        `thresholdFunction`.
438    """
439    MEASURE = None
440    """ MEASURE must be defined in the subclass
441    """
442
443    def setUp(self):
444        self.measure = self.MEASURE
445
446    @test_on_data
447    def test_measure_attribute_on(self, data):
448        """ Default test for attribute measures.
449        """
450        scores = []
451        for attr in data.domain.attributes:
452            score = self.measure(attr, data)
453#            self.assertTrue(score >= 0.0)
454            scores.append(score)
455        # any scores actually non zero
456        self.assertTrue(any(score > 0.0 for score in scores))
457
458
459    def test_pickle(self):
460        """ Test attribute measure pickling support.
461        """
462        s = pickle.dumps(self.measure)
463        measure = pickle.loads(s)
464        # TODO: make sure measure computes the same scores as measure
465
466
467class PreprocessorTestCase(DataTestCase):
468    """ Test orange.Preprocessor subclass
469   
470    """
471    PREPROCESSOR = None
472
473    def setUp(self):
474        self.preprocessor = self.PREPROCESSOR
475
476    @test_on_data
477    def test_preprocessor_on(self, dataset):
478        """ Test preprocessor on dataset
479        """
480        newdata = self.preprocessor(dataset)
481
482    def test_pickle(self):
483        """ Test preprocessor pickling
484        """
485        if isinstance(self.preprocessor, type):
486            prep = self.preprocessor() # Test the default constructed
487            s = pickle.dumps(prep)
488            prep = pickle.loads(s)
489
490        s = pickle.dumps(self.preprocessor)
491        prep = pickle.loads(s)
492
493
494from Orange.distance import distance_matrix
495from Orange.misc import member_set
496
497
498class DistanceTestCase(DataTestCase):
499    """ Test orange.ExamplesDistance/Constructor
500    """
501    DISTANCE_CONSTRUCTOR = None
502
503    def setUp(self):
504        self.distance_constructor = self.DISTANCE_CONSTRUCTOR
505
506    @test_on_data
507    def test_distance_on(self, dataset):
508        import numpy
509        indices = orange.MakeRandomIndices2(dataset, min(20, len(dataset)))
510        dataset = dataset.select(indices, 0)
511        with member_set(self.distance_constructor, "ignore_class", True):
512            mat = distance_matrix(dataset, self.distance_constructor)
513
514        self.assertIsInstance(mat, Orange.misc.SymMatrix)
515        self.assertEqual(mat.dim, len(dataset))
516
517        m = numpy.array(list(mat))
518        self.assertTrue((m >= 0.0).all())
519
520        if dataset.domain.class_var:
521            with member_set(self.distance_constructor, "ignore_class", False):
522                try:
523                    mat = distance_matrix(dataset, self.distance_constructor)
524                except orange.KernelException, ex:
525                    if "not supported" in str(ex):
526                        return
527                    else:
528                        raise
529            m1 = numpy.array(list(mat))
530            self.assertTrue((m1 != m).all() or dataset, "%r does not seem to respect the 'ignore_class' flag")
531
532def test_case_script(path):
533    """ Return a TestCase instance from a script in `path`.
534    The script will be run in the directory it is in.
535   
536    :param path: The path to the script to test
537    :type path: str
538    """
539    dirname = os.path.dirname(os.path.realpath(path))
540    _dir = {}
541    def setUp():
542        _dir["cwd"] = os.path.realpath(os.curdir)
543        os.chdir(dirname)
544    def tearDown():
545        os.chdir(_dir["cwd"])
546
547    def runScript():
548        execfile(path, {})
549
550    runScript.__name__ = "runScript %s" % os.path.basename(path)
551    return unittest.FunctionTestCase(runScript, setUp=setUp, tearDown=tearDown)
552
553
554def test_suite_scripts(path):
555    """ Return a TestSuite for testing all scripts in a directory `path`
556   
557    :param path: Directory path
558    :type path: str
559    """
560    import glob
561    return unittest.TestSuite([test_case_script(os.path.join(path, name)) for name in glob.glob1(path, "*.py")])
562
563
564_default_run = unittest.TestCase.run
565def enable_pdb():
566    """ Enable the python pdb postmortem debugger to handle any
567    raised exception during the test for interactive debugging.
568   
569    For example you can examine exceptions in tests from ipython -pdb ::
570   
571        In [1]: import Orange.misc.testing as testing
572        In [2]: testing.enable_pdb()
573        In [3]: run tests/test_preprocessors.py
574        ---...
575        KernelException...
576        ipdb>
577       
578    .. warning:: This modifies the unittest.TestCase.run method
579   
580    """
581
582    def run(self, result=None):
583        if result is None:
584            result = self.defaultTestResult()
585        result.startTest(self)
586        testMethod = getattr(self, self._testMethodName)
587        try:
588            try:
589                self.setUp()
590                testMethod()
591                result.addSuccess(self)
592#            except self.failureException:
593#                result.addFailure(self, self._exc_info())
594            except KeyboardInterrupt:
595                raise
596            finally:
597                self.tearDown()
598        finally:
599            result.stopTest(self)
600
601    unittest.TestCase.run = run
602
603def disable_pdb():
604    """ Disables the python pdb postmortem debugger to handle
605    exceptions raised during test run.
606   
607    """
608    unittest.TestCase.run = _default_run
609
610try:
611    __IPYTHON__  #We are running tests from ipython
612    if getattr(__IPYTHON__.shell, "call_pdb", None): # Is pdb enabled
613        enable_pdb()
614except NameError:
615    pass
616
617
618def test_module(module):
619    """ A helper function to run all tests from a module. 
620    """
621    loader = unittest.TestLoader()
622    suite = loader.loadTestsFromModule(module)
623    runner = unittest.TextTestRunner()
624    return runner.run(suite)
Note: See TracBrowser for help on using the repository browser.