source: orange/Orange/testing/testing.py @ 11631:44acd5d775c3

Revision 11631:44acd5d775c3, 20.4 KB checked in by Ales Erjavec <ales.erjavec@…>, 9 months ago (diff)

Added test for learner pickling support.

Line 
1"""\
2Orange unit testing
3===================
4
5This module contains some classes in common use by Orange unit testing
6framework. In particular its most useful feature is the BaseTestOnData
7(along with ``test_on_data`` function and ``datasets_driven`` class decorators)
8class for automating data driven tests.
9 
10         
11Example of use ::
12
13    from Orange.testing import testing
14    import unittest
15   
16    data = [("one", 1),
17            ("two", 2)]
18   
19    # Data driven with data_iter argument
20    # data must be reiterable multiple times if more than one test member defined
21    @data_driven(data_iter=data)
22    class TestDemo(unittest.TestCase):
23        @test_on_data
24        def test_instance_on(self, arg):
25            self.assertIsInstance(arg, int)
26           
27        @test_on_data
28        def test_add(self, arg):
29            res = arg + arg
30           
31    # data_driven without argument
32    @data_driven
33    class TestDemo1(unittest.TestCase):
34        @test_on_data(data_iter=data)
35        def test_instance_on(self, arg):
36            self.assertIsInstance(arg, int)
37           
38        @test_on_data(data_iter=data)
39        def test_add(self, arg):
40            res = arg + arg
41   
42    # data_driven without arg, using a static data_iter method
43    @data_driven
44    class TestDemo1(unittest.TestCase):
45        @test_on_data
46        def test_instance_on(self, arg):
47            self.assertIsInstance(arg, int)
48           
49        @test_on_data
50        def test_add(self, arg):
51            res = arg + arg
52           
53        @staticmethod
54        def data_iter():
55            yield "iris", Orange.data.Table("doc:iris")
56       
57    #@data_driven(data_iter=testing.datasets_iter(testing.CLASSIFICATION_DATASETS | testing.CLASSLES_DATASETS))
58    @datasets_driven(data_iter=testing.CLASSIFICATION_DATASETS |\
59                     testing.CLASSLESS_DATASETS)
60    class TestDefaultLearner(unittest.TestCase):
61        @test_on_data
62        def test_learner_on(self, dataset):
63            import Orange
64            Orange.classifcation.majority.MajorityLearner(dataset)
65           
66        # this overloads the class decorator's flags
67        @test_on_datasets(testing.CLASSLES_DATASETS)
68        def test_raise_missing_class_on(self, dataset):
69            import Orange
70            Orange.classifcation.majority.MajorityLearner(dataset)
71       
72"""
73from __future__ import absolute_import
74try:
75    # on python 2.6
76    import unittest2 as unittest
77    import pickle
78except:
79    import unittest
80    import cPickle as pickle
81
82import os, sys
83from functools import wraps
84import itertools
85from functools import partial
86
87import orange
88from Orange.data import preprocess
89
90TEST_CLASSIFICATION = 1
91TEST_REGRESSION = 2
92TEST_PICKLE = 4
93
94TEST_ALL = 7
95TEST_ALL_CLASSIFICATION = TEST_ALL - TEST_REGRESSION
96TEST_ALL_REGRESSION = TEST_ALL - TEST_ALL_CLASSIFICATION
97
98TEST_CLASSLESS = 8
99
100DISCRETIZE_DOMAIN = 16
101CONTINUIZE_DOMAIN = 32
102
103def open_data(name, flags=0):
104    """ Open a named data-set return it.
105    """
106    dataset = orange.ExampleTable(name)
107    if flags & CONTINUIZE_DOMAIN:
108        preprocessor = preprocess.Continuize()
109        dataset = preprocessor(dataset)
110    elif flags & DISCRETIZE_DOMAIN:
111        preprocessor = preprocess.Discretize(method=orange.EquiNDiscretization(),
112                                               discretize_class=False)
113        dataset = preprocessor(dataset)
114    dataset.name = name
115    return dataset
116
117CLASSIFICATION_DATASETS = ["iris", "brown-selected", "lenses", "monks-1"]
118REGRESSION_DATASETS = ["housing", "auto-mpg", "servo"]
119CLASSLES_DATASETS = ["water-treatment"]
120ALL_DATASETS = CLASSIFICATION_DATASETS + REGRESSION_DATASETS + CLASSLES_DATASETS
121
122
123from collections import namedtuple
124ExtraArgs = namedtuple("ExtraArgs", "args kwargs")
125
126
127def _expanded(func, name, extra_args):
128    """ Return an expanded function name and the function itself.
129    """
130    from functools import wraps
131    if isinstance(extra_args, ExtraArgs):
132        extra_args, extra_kwargs = extra_args
133    else:
134        extra_kwargs = {}
135    @wraps(func)
136    def expanded(*fixed_args, **fixed_kwargs):
137        call = partial(partial(func, *fixed_args, **fixed_kwargs), *extra_args, **extra_kwargs)
138        return call()
139#    expanded = partial(func, args, kwargs)
140#    expanded = wraps(func)(expanded)
141    newname = func.__name__ + "_" + name.replace("-", "_")
142    expanded.__name__ = newname
143    expanded.__doc__ = None
144    return newname, expanded
145
146def _expanded_lazy(func, name, args_getter):
147    """ Return an expanded function name and the function itself.
148    arge_getter must return the expanded arguments when called.
149   
150    """
151    from functools import wraps
152    @wraps(func)
153    def expanded(*fixed_args, **kwargs):
154        kwargs = kwargs.copy()
155        extra_args = args_getter()
156        if isinstance(extra_args, ExtraArgs):
157            extra_args, extra_kwargs = extra_args
158        else:
159            extra_kwargs = {}
160        call = partial(partial(func, fixed_args, kwargs), extra_args, extra_kwargs)
161        return call()
162
163    newname = func.__name__ + "_" + name.replace("-", "_")
164    expanded.__name__ = newname
165    expanded.__doc__ = None
166    return newname, expanded
167
168
169def _data_driven_cls_decorator(cls, data_iter=None, lazy=False):
170    """ A class decorator that expands TestCase subclass
171    methods decorated with `test_on_data` or `data_driven`
172    decorator.
173   
174    """
175    if data_iter is None:
176        data_iter = getattr(cls, "data_iter", None) # data_iter should be a staticmethod or classmethod
177        if data_iter is not None:
178            data_iter = data_iter()
179
180    if data_iter is not None:
181        data_iter = list(data_iter) # Because it needs to be iterated multiple times (each member not overriding it)
182
183    for test_name in dir(cls):
184        val = getattr(cls, test_name)
185        if hasattr(val, "_data_iter"):
186            member_data_iter = val._data_iter
187            if member_data_iter is None or member_data_iter == (None, False):
188                member_data_iter, lazy_iter = data_iter, lazy
189            else:
190                if isinstance(member_data_iter, tuple):
191                    member_data_iter, lazy_iter = member_data_iter
192                else:
193                    lazy_iter = lazy
194
195            assert(member_data_iter is not None)
196            for name, expand_args in iter(member_data_iter):
197                if lazy:
198                    newname, expanded = _expanded_lazy(val, name, expand_args)
199                else:
200                    newname, expanded = _expanded(val, name, expand_args)
201
202                setattr(cls, newname, expanded)
203            setattr(cls, test_name, None)
204            #setattr(cls, "__" + test_name, val)
205    return cls
206
207def data_driven(cls=None, data_iter=None):
208    """ Class decorator for building data driven test cases.
209   
210    :param data_iter: An iterator supplying the names and arguments for
211        the expanded test.
212   
213    Example ::
214   
215        data_for_tests = [("one", (1, )), ("two", (2, ))]
216       
217        @data_driven(data_iter=data_for_tests)
218        class MyTestCase(unittest.TestCase):
219            @test_on_data
220            def test_add_on(self, number):
221                number + number
222               
223    The tests are then accessible from the command line ::
224   
225        python -m unittest MyTestCase.MyTestCase.test_add_on_one
226       
227    """
228    if data_iter is not None:
229        #Used as
230        # @data_driven(data_iter=...)
231        # class ...
232        return partial(_data_driven_cls_decorator, data_iter=data_iter)
233    elif cls is not None:
234        #Used as
235        # @data_driven
236        # class ...
237        return _data_driven_cls_decorator(cls)
238
239
240
241def data_driven_lazy(cls=None, data_iter=None):
242    if lazy_data_iter is not None:
243        #Used as
244        # @data_driven_lazy(data_iter= ...)
245        # class ...
246        return partial(_data_driven_cls_decorator, data_iter=data_iter, lazy=True)
247    elif cls is not None:
248        #Used as
249        # @data_driven_lazy
250        # class ...
251        return _data_driven_cls_decorator(cls, lazy=True)
252
253def test_on_data(test_func=None, data_iter=None):
254    """ Decorator for test member of unittest.TestCase, signaling that it
255    wants to be expanded (replicated) on each test's data case. This decorator
256    accepts an optional parameter (an data case iterator, see
257    `Data Iterators`_) which overrides the iterator passed to
258    :obj:`data_driven` decorator.
259   
260    Example ::
261   
262        @data_driven
263        class MyTestCase(TestCase):
264            @test_on_data(datasets_iterator())
265            def test_on(self, data)
266                ''' This will be a separate test case for each data-set
267                instance.
268                '''
269                print data.name
270               
271    .. note:: The actual expanding is done by `data_driven` class decorator.
272   
273    .. note:: Within the unittest framework `test_on` test will be expanded
274        to `test_on_iris`, `test_on_lenses` ... for each dataset returned
275        by :obj:`datasets_iterator`. You can then run individual tests from
276        the command line (requires Python 2.7) ::
277                   
278           python -m unittest mymodule.MyTestCase.test_on_iris
279   
280    """
281    def set_iter(func):
282        func._data_iter = data_iter, False
283        return func
284
285    if data_iter is not None:
286        return set_iter
287    else:
288        return set_iter(test_func)
289
290
291def test_on_data_lazy(test_func=None, data_iter=None):
292    """ Same as :func:`test_on_data` except the ``data_iter`` is
293    interpreted as a lazy data iterator (see `Data Iterators`_).
294   
295    """
296    def set_iter(func):
297        func._data_iter = data_iter, True
298        return func
299
300    if data_iter is not None:
301        return set_iter
302    else:
303        return set_iter(test_func)
304
305
306def datasets_iter(datasets=ALL_DATASETS, preprocess=0):
307    for name in datasets:
308        data = open_data(name, flags=preprocess)
309        name = name.replace("-", "_")
310        yield name, (data,)
311
312
313def datasets_iter_lazy(datasets=ALL_DATASETS, preprocess=0):
314    for name in datasets:
315        data = lambda : (open_data(name, flags=preprocess),)
316        name = name.replace("-", "_")
317        yield name, data
318
319
320def test_on_datasets(test_func=None, datasets=ALL_DATASETS):
321    """ same as ``test_on_data(data_iter=datasests_iter(datasets))``
322    """
323    if test_func is None:
324        return test_on_data(data_iter=datasets_iter(datasets))
325    else:
326        return test_on_data(data_iter=datasets_iter(datasets))(test_func)
327
328
329def datasets_driven(cls=None, datasets=ALL_DATASETS, preprocess=0):
330    """ same as ```data_driven(data_iter=datasets_iter(datasets)```
331    """
332    if  cls is None:
333        return data_driven(data_iter=datasets_iter(datasets, preprocess))
334    else:
335        return data_driven(data_iter=datasets_iter(datasets, preprocess))(cls)
336
337
338class DataTestCase(unittest.TestCase):
339    """ Base class for data driven tests.
340    """
341
342import Orange
343from Orange.evaluation import testing as _testing
344from Orange.evaluation import scoring as _scoring
345from Orange.core import MakeRandomIndices2 as _MakeRandomIndices2
346
347
348class LearnerTestCase(DataTestCase):
349    """ A basic test class for orange learner class. Must define
350    class variable `LEARNER` in a subclass or define the proper
351    setUp method which sets ``self.learner``.
352   
353    """
354
355    LEARNER = None
356
357    def setUp(self):
358        """ Set up the learner for the test from the ``LEARNER`` class member.
359        """
360        self.learner = self.LEARNER
361
362    @test_on_data
363    def test_learner_on(self, dataset):
364        """ Default test case for Orange learners.
365        """
366        if isinstance(dataset.domain.class_var, Orange.feature.Discrete):
367            indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset)
368        else:
369            indices = _MakeRandomIndices2(p0=0.3)(dataset)
370        learn = dataset.select(indices, 1)
371        test = dataset.select(indices, 0)
372
373        classifier = self.learner(learn)
374
375        # Test for classVar
376        self.assertTrue(hasattr(classifier, "class_var"))
377        self.assertIs(classifier.class_var, dataset.domain.class_var)
378
379        res = _testing.test_on_data([classifier], test)
380
381        for ex in test:
382            self.assertIsInstance(classifier(ex, Orange.core.GetValue),
383                                  Orange.core.Value)
384            self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities),
385                                  Orange.core.Distribution)
386
387            value, dist = classifier(ex, Orange.core.GetBoth)
388
389            self.assertIsInstance(value, Orange.core.Value)
390            self.assertIsInstance(dist, Orange.core.Distribution)
391
392            self.assertIs(dist.variable, classifier.class_var)
393
394            if isinstance(dist, Orange.core.ContDistribution):
395                dist_sum = sum(dist.values())
396            else:
397                dist_sum = sum(dist)
398
399            self.assertGreater(dist_sum, 0.0)
400            self.assertLess(abs(dist_sum - 1.0), 1e-3)
401
402            # just for fun also test this
403#            self.assertLess(abs(dist_sum - dist.abs), 1e-3)
404            # not fun because it fails
405
406        # Store classifier for possible use in subclasses
407        self.classifier = classifier
408
409    @test_on_data
410    def test_pickling_on(self, dataset):
411        """ Test learner and classifier pickling.
412        """
413        def clone(obj):
414            return pickle.loads(pickle.dumps(obj))
415
416        cloned_learner = clone(self.learner)
417        classifier = self.learner(dataset)
418        classifier_clone = clone(classifier)
419        classifier_from_cloned = cloned_learner(dataset)
420
421        indices = orange.MakeRandomIndices2(p0=20)(dataset)
422        test = dataset.select(indices, 0)
423        class_var = dataset.domain.class_var
424
425        for ex in test:
426            prediction1 = classifier(ex, orange.GetValue)
427            prediction2 = classifier_clone(ex, orange.GetValue)
428            prediction3 = classifier_from_cloned(ex, orange.GetValue)
429
430            if isinstance(class_var, Orange.feature.Continuous):
431                # Test to third digit after the decimal point
432                self.assertAlmostEqual(
433                    prediction1.native(), prediction2.native(),
434                    min(3, class_var.number_of_decimals),
435                    "Pickled and original classifier return a different "
436                    "value!")
437
438                self.assertAlmostEqual(
439                    prediction1.native(), prediction3.native(),
440                    min(3, class_var.number_of_decimals),
441                    "Pickled and original learner return a different "
442                    "classifier!")
443            else:
444                self.assertEqual(
445                    prediction1, prediction2,
446                    "Pickled and original classifier return a different "
447                    "value!")
448
449                self.assertEqual(
450                    prediction1, prediction3,
451                    "Pickled and original learner return a different "
452                    "classifier!")
453
454
455class MeasureAttributeTestCase(DataTestCase):
456    """ Test orange MeasureAttribute subclass.
457   
458    .. todo:: Test if measures respect `handlesDiscrete`, `handlesContinuous`
459        `computesThresholds`, `needs` (raise the appropriate exception). Test
460        `thresholdFunction`.
461    """
462    MEASURE = None
463    """ MEASURE must be defined in the subclass
464    """
465
466    def setUp(self):
467        self.measure = self.MEASURE
468
469    @test_on_data
470    def test_measure_attribute_on(self, data):
471        """ Default test for attribute measures.
472        """
473        scores = []
474        for attr in data.domain.attributes:
475            score = self.measure(attr, data)
476#            self.assertTrue(score >= 0.0)
477            scores.append(score)
478        # any scores actually non zero
479        self.assertTrue(any(score > 0.0 for score in scores))
480
481
482    def test_pickle(self):
483        """ Test attribute measure pickling support.
484        """
485        s = pickle.dumps(self.measure)
486        measure = pickle.loads(s)
487        # TODO: make sure measure computes the same scores as measure
488
489
490class PreprocessorTestCase(DataTestCase):
491    """ Test orange.Preprocessor subclass
492   
493    """
494    PREPROCESSOR = None
495
496    def setUp(self):
497        self.preprocessor = self.PREPROCESSOR
498
499    @test_on_data
500    def test_preprocessor_on(self, dataset):
501        """ Test preprocessor on dataset
502        """
503        newdata = self.preprocessor(dataset)
504
505    def test_pickle(self):
506        """ Test preprocessor pickling
507        """
508        if isinstance(self.preprocessor, type):
509            prep = self.preprocessor() # Test the default constructed
510            s = pickle.dumps(prep)
511            prep = pickle.loads(s)
512
513        s = pickle.dumps(self.preprocessor)
514        prep = pickle.loads(s)
515
516
517from Orange.distance import distance_matrix
518from Orange.utils import member_set
519
520
521class DistanceTestCase(DataTestCase):
522    """ Test orange.ExamplesDistance/Constructor
523    """
524    DISTANCE_CONSTRUCTOR = None
525
526    def setUp(self):
527        self.distance_constructor = self.DISTANCE_CONSTRUCTOR
528
529    @test_on_data
530    def test_distance_on(self, dataset):
531        import numpy
532        indices = orange.MakeRandomIndices2(dataset, min(20, len(dataset)))
533        dataset = dataset.select(indices, 0)
534        with member_set(self.distance_constructor, "ignore_class", True):
535            mat = distance_matrix(dataset, self.distance_constructor)
536
537        self.assertIsInstance(mat, Orange.misc.SymMatrix)
538        self.assertEqual(mat.dim, len(dataset))
539
540        m = numpy.array(list(mat))
541        self.assertTrue((m >= 0.0).all())
542
543        if dataset.domain.class_var:
544            with member_set(self.distance_constructor, "ignore_class", False):
545                try:
546                    mat = distance_matrix(dataset, self.distance_constructor)
547                except orange.KernelException, ex:
548                    if "not supported" in str(ex):
549                        return
550                    else:
551                        raise
552            m1 = numpy.array(list(mat))
553            self.assertTrue((m1 != m).all() or dataset, "%r does not seem to respect the 'ignore_class' flag")
554
555def test_case_script(path):
556    """ Return a TestCase instance from a script in `path`.
557    The script will be run in the directory it is in.
558   
559    :param path: The path to the script to test
560    :type path: str
561    """
562    dirname = os.path.dirname(os.path.realpath(path))
563    _dir = {}
564    def setUp():
565        _dir["cwd"] = os.path.realpath(os.curdir)
566        os.chdir(dirname)
567    def tearDown():
568        os.chdir(_dir["cwd"])
569
570    def runScript():
571        execfile(path, {})
572
573    runScript.__name__ = "runScript %s" % os.path.basename(path)
574    return unittest.FunctionTestCase(runScript, setUp=setUp, tearDown=tearDown)
575
576
577def test_suite_scripts(path):
578    """ Return a TestSuite for testing all scripts in a directory `path`
579   
580    :param path: Directory path
581    :type path: str
582    """
583    import glob
584    return unittest.TestSuite([test_case_script(os.path.join(path, name)) for name in glob.glob1(path, "*.py")])
585
586
587_default_run = unittest.TestCase.run
588def enable_pdb():
589    """ Enable the python pdb postmortem debugger to handle any
590    raised exception during the test for interactive debugging.
591   
592    For example you can examine exceptions in tests from ipython -pdb ::
593   
594        In [1]: import Orange.testing.testing as testing
595        In [2]: testing.enable_pdb()
596        In [3]: run tests/test_preprocessors.py
597        ---...
598        KernelException...
599        ipdb>
600       
601    .. warning:: This modifies the unittest.TestCase.run method
602   
603    """
604
605    def run(self, result=None):
606        if result is None:
607            result = self.defaultTestResult()
608        result.startTest(self)
609        testMethod = getattr(self, self._testMethodName)
610        try:
611            try:
612                self.setUp()
613                testMethod()
614                result.addSuccess(self)
615#            except self.failureException:
616#                result.addFailure(self, self._exc_info())
617            except KeyboardInterrupt:
618                raise
619            finally:
620                self.tearDown()
621        finally:
622            result.stopTest(self)
623
624    unittest.TestCase.run = run
625
626def disable_pdb():
627    """ Disables the python pdb postmortem debugger to handle
628    exceptions raised during test run.
629   
630    """
631    unittest.TestCase.run = _default_run
632
633try:
634    __IPYTHON__  #We are running tests from ipython
635    if getattr(__IPYTHON__.shell, "call_pdb", None): # Is pdb enabled
636        enable_pdb()
637except:
638    pass
639
640
641def test_module(module):
642    """ A helper function to run all tests from a module. 
643    """
644    loader = unittest.TestLoader()
645    suite = loader.loadTestsFromModule(module)
646    runner = unittest.TextTestRunner()
647    return runner.run(suite)
Note: See TracBrowser for help on using the repository browser.