source: orange/Orange/testing/testing.py @ 11088:1f5732de6ea2

Revision 11088:1f5732de6ea2, 19.8 KB checked in by markotoplak, 15 months ago (diff)

Removed datasetsdir from Orange/testing/testing.py.

RevLine 
[7903]1"""\
2Orange unit testing
3===================
4
5This module contains some classes in common use by Orange unit testing
6framework. In particular its most useful feature is the BaseTestOnData
[8378]7(along with ``test_on_data`` function and ``datasets_driven`` class decorators)
8class for automating data driven tests.
9 
10         
11Example of use ::
[7903]12
[10658]13    from Orange.testing import testing
[8378]14    import unittest
15   
16    data = [("one", 1),
17            ("two", 2)]
18   
19    # Data driven with data_iter argument
20    # data must be reiterable multiple times if more than one test member defined
21    @data_driven(data_iter=data)
22    class TestDemo(unittest.TestCase):
23        @test_on_data
24        def test_instance_on(self, arg):
25            self.assertIsInstance(arg, int)
26           
27        @test_on_data
28        def test_add(self, arg):
29            res = arg + arg
30           
31    # data_driven without argument
32    @data_driven
33    class TestDemo1(unittest.TestCase):
34        @test_on_data(data_iter=data)
35        def test_instance_on(self, arg):
36            self.assertIsInstance(arg, int)
37           
38        @test_on_data(data_iter=data)
39        def test_add(self, arg):
40            res = arg + arg
41   
42    # data_driven without arg, using a static data_iter method
43    @data_driven
44    class TestDemo1(unittest.TestCase):
45        @test_on_data
46        def test_instance_on(self, arg):
47            self.assertIsInstance(arg, int)
48           
49        @test_on_data
50        def test_add(self, arg):
51            res = arg + arg
52           
53        @staticmethod
54        def data_iter():
55            yield "iris", Orange.data.Table("doc:iris")
56       
57    #@data_driven(data_iter=testing.datasets_iter(testing.CLASSIFICATION_DATASETS | testing.CLASSLES_DATASETS))
58    @datasets_driven(data_iter=testing.CLASSIFICATION_DATASETS |\
59                     testing.CLASSLESS_DATASETS)
60    class TestDefaultLearner(unittest.TestCase):
61        @test_on_data
62        def test_learner_on(self, dataset):
63            import Orange
64            Orange.classifcation.majority.MajorityLearner(dataset)
65           
66        # this overloads the class decorator's flags
67        @test_on_datasets(testing.CLASSLES_DATASETS)
68        def test_raise_missing_class_on(self, dataset):
69            import Orange
70            Orange.classifcation.majority.MajorityLearner(dataset)
71       
[7903]72"""
[8378]73from __future__ import absolute_import
[10273]74try:
[10305]75    # on python 2.6
[10273]76    import unittest2 as unittest
[10305]77    import pickle
[10273]78except:
79    import unittest
[10305]80    import cPickle as pickle
81
[7903]82import os, sys
[10180]83from functools import wraps
[7903]84import itertools
[8378]85from functools import partial
[7903]86
[8378]87import orange
[10542]88from Orange.data import preprocess
[7903]89
90TEST_CLASSIFICATION = 1
91TEST_REGRESSION = 2
92TEST_PICKLE = 4
93
94TEST_ALL = 7
95TEST_ALL_CLASSIFICATION = TEST_ALL - TEST_REGRESSION
96TEST_ALL_REGRESSION = TEST_ALL - TEST_ALL_CLASSIFICATION
97
98TEST_CLASSLESS = 8
[8378]99
[7903]100DISCRETIZE_DOMAIN = 16
101CONTINUIZE_DOMAIN = 32
102
[8378]103def open_data(name, flags=0):
104    """ Open a named data-set return it.
105    """
[11088]106    dataset = orange.ExampleTable(name)
[7903]107    if flags & CONTINUIZE_DOMAIN:
[10542]108        preprocessor = preprocess.Continuize()
[7903]109        dataset = preprocessor(dataset)
110    elif flags & DISCRETIZE_DOMAIN:
[10542]111        preprocessor = preprocess.Discretize(method=orange.EquiNDiscretization(),
[8378]112                                               discretize_class=False)
[7903]113        dataset = preprocessor(dataset)
114    dataset.name = name
115    return dataset
116
[8378]117CLASSIFICATION_DATASETS = ["iris", "brown-selected", "lenses", "monks-1"]
[8149]118REGRESSION_DATASETS = ["housing", "auto-mpg", "servo"]
[10180]119CLASSLES_DATASETS = ["water-treatment"]
120ALL_DATASETS = CLASSIFICATION_DATASETS + REGRESSION_DATASETS + CLASSLES_DATASETS
[7903]121
[8378]122
123from collections import namedtuple
124ExtraArgs = namedtuple("ExtraArgs", "args kwargs")
125
126
127def _expanded(func, name, extra_args):
128    """ Return an expanded function name and the function itself.
129    """
130    from functools import wraps
131    if isinstance(extra_args, ExtraArgs):
132        extra_args, extra_kwargs = extra_args
133    else:
134        extra_kwargs = {}
135    @wraps(func)
136    def expanded(*fixed_args, **fixed_kwargs):
137        call = partial(partial(func, *fixed_args, **fixed_kwargs), *extra_args, **extra_kwargs)
138        return call()
139#    expanded = partial(func, args, kwargs)
140#    expanded = wraps(func)(expanded)
141    newname = func.__name__ + "_" + name.replace("-", "_")
142    expanded.__name__ = newname
[10298]143    expanded.__doc__ = None
[8378]144    return newname, expanded
145
146def _expanded_lazy(func, name, args_getter):
147    """ Return an expanded function name and the function itself.
148    arge_getter must return the expanded arguments when called.
[7903]149   
150    """
151    from functools import wraps
152    @wraps(func)
[8378]153    def expanded(*fixed_args, **kwargs):
154        kwargs = kwargs.copy()
155        extra_args = args_getter()
156        if isinstance(extra_args, ExtraArgs):
157            extra_args, extra_kwargs = extra_args
158        else:
159            extra_kwargs = {}
160        call = partial(partial(func, fixed_args, kwargs), extra_args, extra_kwargs)
161        return call()
[10180]162
[8378]163    newname = func.__name__ + "_" + name.replace("-", "_")
[7903]164    expanded.__name__ = newname
[10298]165    expanded.__doc__ = None
[7903]166    return newname, expanded
[8378]167
[10180]168
[8378]169def _data_driven_cls_decorator(cls, data_iter=None, lazy=False):
170    """ A class decorator that expands TestCase subclass
171    methods decorated with `test_on_data` or `data_driven`
172    decorator.
173   
[10180]174    """
[8378]175    if data_iter is None:
176        data_iter = getattr(cls, "data_iter", None) # data_iter should be a staticmethod or classmethod
177        if data_iter is not None:
178            data_iter = data_iter()
[10180]179
[8378]180    if data_iter is not None:
181        data_iter = list(data_iter) # Because it needs to be iterated multiple times (each member not overriding it)
[10180]182
[8378]183    for test_name in dir(cls):
184        val = getattr(cls, test_name)
185        if hasattr(val, "_data_iter"):
186            member_data_iter = val._data_iter
187            if member_data_iter is None or member_data_iter == (None, False):
188                member_data_iter, lazy_iter = data_iter, lazy
189            else:
190                if isinstance(member_data_iter, tuple):
191                    member_data_iter, lazy_iter = member_data_iter
192                else:
193                    lazy_iter = lazy
[10180]194
[8378]195            assert(member_data_iter is not None)
196            for name, expand_args in iter(member_data_iter):
197                if lazy:
198                    newname, expanded = _expanded_lazy(val, name, expand_args)
199                else:
200                    newname, expanded = _expanded(val, name, expand_args)
[10298]201
[8378]202                setattr(cls, newname, expanded)
203            setattr(cls, test_name, None)
[10180]204            #setattr(cls, "__" + test_name, val)
[8378]205    return cls
[7903]206
[8378]207def data_driven(cls=None, data_iter=None):
208    """ Class decorator for building data driven test cases.
209   
210    :param data_iter: An iterator supplying the names and arguments for
211        the expanded test.
[7903]212   
213    Example ::
214   
[8378]215        data_for_tests = [("one", (1, )), ("two", (2, ))]
216       
217        @data_driven(data_iter=data_for_tests)
218        class MyTestCase(unittest.TestCase):
[7903]219            @test_on_data
[8378]220            def test_add_on(self, number):
221                number + number
222               
223    The tests are then accessible from the command line ::
224   
225        python -m unittest MyTestCase.MyTestCase.test_add_on_one
226       
227    """
228    if data_iter is not None:
229        #Used as
230        # @data_driven(data_iter=...)
231        # class ...
232        return partial(_data_driven_cls_decorator, data_iter=data_iter)
233    elif cls is not None:
234        #Used as
235        # @data_driven
236        # class ...
237        return _data_driven_cls_decorator(cls)
[10180]238
[8378]239
240
241def data_driven_lazy(cls=None, data_iter=None):
[10180]242    if lazy_data_iter is not None:
[8378]243        #Used as
244        # @data_driven_lazy(data_iter= ...)
245        # class ...
246        return partial(_data_driven_cls_decorator, data_iter=data_iter, lazy=True)
247    elif cls is not None:
248        #Used as
249        # @data_driven_lazy
250        # class ...
251        return _data_driven_cls_decorator(cls, lazy=True)
[10180]252
[8378]253def test_on_data(test_func=None, data_iter=None):
254    """ Decorator for test member of unittest.TestCase, signaling that it
255    wants to be expanded (replicated) on each test's data case. This decorator
256    accepts an optional parameter (an data case iterator, see
257    `Data Iterators`_) which overrides the iterator passed to
258    :obj:`data_driven` decorator.
259   
260    Example ::
261   
262        @data_driven
263        class MyTestCase(TestCase):
264            @test_on_data(datasets_iterator())
[7903]265            def test_on(self, data)
266                ''' This will be a separate test case for each data-set
267                instance.
268                '''
269                print data.name
270               
[8378]271    .. note:: The actual expanding is done by `data_driven` class decorator.
[7903]272   
[8378]273    .. note:: Within the unittest framework `test_on` test will be expanded
274        to `test_on_iris`, `test_on_lenses` ... for each dataset returned
275        by :obj:`datasets_iterator`. You can then run individual tests from
276        the command line (requires Python 2.7) ::
277                   
278           python -m unittest mymodule.MyTestCase.test_on_iris
279   
280    """
281    def set_iter(func):
282        func._data_iter = data_iter, False
283        return func
[10180]284
[8378]285    if data_iter is not None:
286        return set_iter
287    else:
288        return set_iter(test_func)
[10180]289
290
[8378]291def test_on_data_lazy(test_func=None, data_iter=None):
292    """ Same as :func:`test_on_data` except the ``data_iter`` is
293    interpreted as a lazy data iterator (see `Data Iterators`_).
294   
295    """
296    def set_iter(func):
297        func._data_iter = data_iter, True
298        return func
[10180]299
[8378]300    if data_iter is not None:
301        return set_iter
302    else:
303        return set_iter(test_func)
[10180]304
305
[8378]306def datasets_iter(datasets=ALL_DATASETS, preprocess=0):
307    for name in datasets:
308        data = open_data(name, flags=preprocess)
309        name = name.replace("-", "_")
310        yield name, (data,)
[10180]311
312
[8378]313def datasets_iter_lazy(datasets=ALL_DATASETS, preprocess=0):
314    for name in datasets:
[10180]315        data = lambda : (open_data(name, flags=preprocess),)
[8378]316        name = name.replace("-", "_")
317        yield name, data
[10180]318
[8378]319
320def test_on_datasets(test_func=None, datasets=ALL_DATASETS):
321    """ same as ``test_on_data(data_iter=datasests_iter(datasets))``
[7903]322    """
[8378]323    if test_func is None:
324        return test_on_data(data_iter=datasets_iter(datasets))
325    else:
326        return test_on_data(data_iter=datasets_iter(datasets))(test_func)
[7903]327
[8378]328
329def datasets_driven(cls=None, datasets=ALL_DATASETS, preprocess=0):
330    """ same as ```data_driven(data_iter=datasets_iter(datasets)```
331    """
332    if  cls is None:
333        return data_driven(data_iter=datasets_iter(datasets, preprocess))
334    else:
335        return data_driven(data_iter=datasets_iter(datasets, preprocess))(cls)
[10180]336
[8378]337
338class DataTestCase(unittest.TestCase):
339    """ Base class for data driven tests.
[7903]340    """
[10180]341
[8378]342import Orange
343from Orange.evaluation import testing as _testing
344from Orange.evaluation import scoring as _scoring
345from Orange.core import MakeRandomIndices2 as _MakeRandomIndices2
346
347
348class LearnerTestCase(DataTestCase):
[7903]349    """ A basic test class for orange learner class. Must define
350    class variable `LEARNER` in a subclass or define the proper
[8378]351    setUp method which sets ``self.learner``.
[7903]352   
[10180]353    """
354
[7903]355    LEARNER = None
[10180]356
[7903]357    def setUp(self):
[8378]358        """ Set up the learner for the test from the ``LEARNER`` class member.
[7903]359        """
360        self.learner = self.LEARNER
[10180]361
[7903]362    @test_on_data
363    def test_learner_on(self, dataset):
[8378]364        """ Default test case for Orange learners.
[7903]365        """
[9919]366        if isinstance(dataset.domain.class_var, Orange.feature.Discrete):
[9019]367            indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset)
368        else:
369            indices = _MakeRandomIndices2(p0=0.3)(dataset)
[7903]370        learn = dataset.select(indices, 1)
371        test = dataset.select(indices, 0)
[10180]372
[7903]373        classifier = self.learner(learn)
[10180]374
[10964]375        # Test for classVar
[8378]376        self.assertTrue(hasattr(classifier, "class_var"))
[10964]377        self.assertIs(classifier.class_var, dataset.domain.class_var)
[10180]378
[8378]379        res = _testing.test_on_data([classifier], test)
[10180]380
[7903]381        for ex in test:
[10964]382            self.assertIsInstance(classifier(ex, Orange.core.GetValue),
383                                  Orange.core.Value)
384            self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities),
385                                  Orange.core.Distribution)
[10180]386
[8378]387            value, dist = classifier(ex, Orange.core.GetBoth)
[10180]388
[8378]389            self.assertIsInstance(value, Orange.core.Value)
390            self.assertIsInstance(dist, Orange.core.Distribution)
[10180]391
[10964]392            self.assertIs(dist.variable, classifier.class_var)
393
[8378]394            if isinstance(dist, Orange.core.ContDistribution):
[7903]395                dist_sum = sum(dist.values())
396            else:
397                dist_sum = sum(dist)
[10180]398
[7903]399            self.assertGreater(dist_sum, 0.0)
400            self.assertLess(abs(dist_sum - 1.0), 1e-3)
[10180]401
[8763]402            # just for fun also test this
[9019]403#            self.assertLess(abs(dist_sum - dist.abs), 1e-3)
[8763]404            # not fun because it fails
[7903]405
406        # Store classifier for possible use in subclasses
407        self.classifier = classifier
408
[8378]409    @test_on_data
410    def test_pickling_on(self, dataset):
411        """ Test learner and classifier pickling.
[7903]412        """
[8378]413        classifier = self.learner(dataset)
[10180]414
[9019]415        s = pickle.dumps(classifier)
416        classifier_clone = pickle.loads(s)
[10180]417
[8378]418        indices = orange.MakeRandomIndices2(p0=20)(dataset)
419        test = dataset.select(indices, 0)
[10180]420
[8378]421        for ex in test:
[9919]422            if isinstance(dataset.domain.class_var, Orange.feature.Continuous):
[8763]423                # Test to third digit after the decimal point
[8132]424                self.assertAlmostEqual(classifier(ex, orange.GetValue).native(),
425                                       classifier_clone(ex, orange.GetValue).native(),
[8763]426                                       min(3, dataset.domain.class_var.number_of_decimals),
[8132]427                                       "Pickled and original classifier return a different value!")
428            else:
[8763]429                self.assertEqual(classifier(ex, orange.GetValue),
430                                 classifier_clone(ex, orange.GetValue),
431                                 "Pickled and original classifier return a different value!")
[10180]432
[7903]433
[8378]434class MeasureAttributeTestCase(DataTestCase):
[7903]435    """ Test orange MeasureAttribute subclass.
436   
437    .. todo:: Test if measures respect `handlesDiscrete`, `handlesContinuous`
438        `computesThresholds`, `needs` (raise the appropriate exception). Test
439        `thresholdFunction`.
440    """
441    MEASURE = None
442    """ MEASURE must be defined in the subclass
443    """
[10180]444
[8149]445    def setUp(self):
446        self.measure = self.MEASURE
[10180]447
[7903]448    @test_on_data
449    def test_measure_attribute_on(self, data):
[8378]450        """ Default test for attribute measures.
[7903]451        """
452        scores = []
453        for attr in data.domain.attributes:
[8149]454            score = self.measure(attr, data)
[8378]455#            self.assertTrue(score >= 0.0)
[7903]456            scores.append(score)
457        # any scores actually non zero
458        self.assertTrue(any(score > 0.0 for score in scores))
[10180]459
460
[7903]461    def test_pickle(self):
[8378]462        """ Test attribute measure pickling support.
[7903]463        """
[9019]464        s = pickle.dumps(self.measure)
465        measure = pickle.loads(s)
[8378]466        # TODO: make sure measure computes the same scores as measure
[10180]467
[7903]468
[8378]469class PreprocessorTestCase(DataTestCase):
[7903]470    """ Test orange.Preprocessor subclass
471   
[10180]472    """
[7903]473    PREPROCESSOR = None
[10180]474
[8149]475    def setUp(self):
476        self.preprocessor = self.PREPROCESSOR
[7903]477
478    @test_on_data
479    def test_preprocessor_on(self, dataset):
[8378]480        """ Test preprocessor on dataset
[7903]481        """
[8149]482        newdata = self.preprocessor(dataset)
[10180]483
[7903]484    def test_pickle(self):
[8378]485        """ Test preprocessor pickling
[7903]486        """
[8149]487        if isinstance(self.preprocessor, type):
488            prep = self.preprocessor() # Test the default constructed
[9019]489            s = pickle.dumps(prep)
490            prep = pickle.loads(s)
[10180]491
[9019]492        s = pickle.dumps(self.preprocessor)
493        prep = pickle.loads(s)
[10180]494
495
[9663]496from Orange.distance import distance_matrix
[10583]497from Orange.utils import member_set
[8136]498
[8149]499
[8136]500class DistanceTestCase(DataTestCase):
501    """ Test orange.ExamplesDistance/Constructor
502    """
503    DISTANCE_CONSTRUCTOR = None
[10180]504
[8136]505    def setUp(self):
506        self.distance_constructor = self.DISTANCE_CONSTRUCTOR
[10180]507
[8136]508    @test_on_data
509    def test_distance_on(self, dataset):
510        import numpy
511        indices = orange.MakeRandomIndices2(dataset, min(20, len(dataset)))
512        dataset = dataset.select(indices, 0)
513        with member_set(self.distance_constructor, "ignore_class", True):
514            mat = distance_matrix(dataset, self.distance_constructor)
[10273]515
[10247]516        self.assertIsInstance(mat, Orange.misc.SymMatrix)
517        self.assertEqual(mat.dim, len(dataset))
[10273]518
[8136]519        m = numpy.array(list(mat))
520        self.assertTrue((m >= 0.0).all())
[10180]521
[8136]522        if dataset.domain.class_var:
523            with member_set(self.distance_constructor, "ignore_class", False):
[8763]524                try:
525                    mat = distance_matrix(dataset, self.distance_constructor)
526                except orange.KernelException, ex:
[10247]527                    if "not supported" in str(ex):
[8763]528                        return
529                    else:
530                        raise
[8136]531            m1 = numpy.array(list(mat))
532            self.assertTrue((m1 != m).all() or dataset, "%r does not seem to respect the 'ignore_class' flag")
[10180]533
[7903]534def test_case_script(path):
535    """ Return a TestCase instance from a script in `path`.
536    The script will be run in the directory it is in.
537   
538    :param path: The path to the script to test
539    :type path: str
540    """
541    dirname = os.path.dirname(os.path.realpath(path))
542    _dir = {}
543    def setUp():
544        _dir["cwd"] = os.path.realpath(os.curdir)
545        os.chdir(dirname)
546    def tearDown():
547        os.chdir(_dir["cwd"])
[10180]548
[7903]549    def runScript():
550        execfile(path, {})
[10180]551
[7903]552    runScript.__name__ = "runScript %s" % os.path.basename(path)
553    return unittest.FunctionTestCase(runScript, setUp=setUp, tearDown=tearDown)
554
555
556def test_suite_scripts(path):
557    """ Return a TestSuite for testing all scripts in a directory `path`
558   
559    :param path: Directory path
560    :type path: str
561    """
562    import glob
563    return unittest.TestSuite([test_case_script(os.path.join(path, name)) for name in glob.glob1(path, "*.py")])
[10180]564
[7903]565
566_default_run = unittest.TestCase.run
567def enable_pdb():
568    """ Enable the python pdb postmortem debugger to handle any
569    raised exception during the test for interactive debugging.
570   
[8378]571    For example you can examine exceptions in tests from ipython -pdb ::
[7903]572   
[10658]573        In [1]: import Orange.testing.testing as testing
[7903]574        In [2]: testing.enable_pdb()
575        In [3]: run tests/test_preprocessors.py
576        ---...
577        KernelException...
578        ipdb>
579       
580    .. warning:: This modifies the unittest.TestCase.run method
581   
[10180]582    """
583
[7903]584    def run(self, result=None):
585        if result is None:
586            result = self.defaultTestResult()
587        result.startTest(self)
588        testMethod = getattr(self, self._testMethodName)
589        try:
590            try:
591                self.setUp()
592                testMethod()
593                result.addSuccess(self)
[8378]594#            except self.failureException:
595#                result.addFailure(self, self._exc_info())
[7903]596            except KeyboardInterrupt:
597                raise
598            finally:
599                self.tearDown()
600        finally:
601            result.stopTest(self)
[10180]602
[7903]603    unittest.TestCase.run = run
[10180]604
[7903]605def disable_pdb():
606    """ Disables the python pdb postmortem debugger to handle
607    exceptions raised during test run.
608   
609    """
610    unittest.TestCase.run = _default_run
[10180]611
[7903]612try:
613    __IPYTHON__  #We are running tests from ipython
[8378]614    if getattr(__IPYTHON__.shell, "call_pdb", None): # Is pdb enabled
[7903]615        enable_pdb()
616except NameError:
617    pass
[10180]618
619
[7903]620def test_module(module):
621    """ A helper function to run all tests from a module. 
622    """
623    loader = unittest.TestLoader()
624    suite = loader.loadTestsFromModule(module)
625    runner = unittest.TextTestRunner()
626    return runner.run(suite)
Note: See TracBrowser for help on using the repository browser.