source: orange/Orange/testing/testing.py @ 11088:1f5732de6ea2

Revision 11088:1f5732de6ea2, 19.8 KB checked in by markotoplak, 15 months ago (diff)

Removed datasetsdir from Orange/testing/testing.py.

Line 
1"""\
2Orange unit testing
3===================
4
5This module contains some classes in common use by Orange unit testing
6framework. In particular its most useful feature is the BaseTestOnData
7(along with ``test_on_data`` function and ``datasets_driven`` class decorators)
8class for automating data driven tests.
9 
10         
11Example of use ::
12
13    from Orange.testing import testing
14    import unittest
15   
16    data = [("one", 1),
17            ("two", 2)]
18   
19    # Data driven with data_iter argument
20    # data must be reiterable multiple times if more than one test member defined
21    @data_driven(data_iter=data)
22    class TestDemo(unittest.TestCase):
23        @test_on_data
24        def test_instance_on(self, arg):
25            self.assertIsInstance(arg, int)
26           
27        @test_on_data
28        def test_add(self, arg):
29            res = arg + arg
30           
31    # data_driven without argument
32    @data_driven
33    class TestDemo1(unittest.TestCase):
34        @test_on_data(data_iter=data)
35        def test_instance_on(self, arg):
36            self.assertIsInstance(arg, int)
37           
38        @test_on_data(data_iter=data)
39        def test_add(self, arg):
40            res = arg + arg
41   
42    # data_driven without arg, using a static data_iter method
43    @data_driven
44    class TestDemo1(unittest.TestCase):
45        @test_on_data
46        def test_instance_on(self, arg):
47            self.assertIsInstance(arg, int)
48           
49        @test_on_data
50        def test_add(self, arg):
51            res = arg + arg
52           
53        @staticmethod
54        def data_iter():
55            yield "iris", Orange.data.Table("doc:iris")
56       
57    #@data_driven(data_iter=testing.datasets_iter(testing.CLASSIFICATION_DATASETS | testing.CLASSLES_DATASETS))
58    @datasets_driven(data_iter=testing.CLASSIFICATION_DATASETS |\
59                     testing.CLASSLESS_DATASETS)
60    class TestDefaultLearner(unittest.TestCase):
61        @test_on_data
62        def test_learner_on(self, dataset):
63            import Orange
64            Orange.classifcation.majority.MajorityLearner(dataset)
65           
66        # this overloads the class decorator's flags
67        @test_on_datasets(testing.CLASSLES_DATASETS)
68        def test_raise_missing_class_on(self, dataset):
69            import Orange
70            Orange.classifcation.majority.MajorityLearner(dataset)
71       
72"""
73from __future__ import absolute_import
74try:
75    # on python 2.6
76    import unittest2 as unittest
77    import pickle
78except:
79    import unittest
80    import cPickle as pickle
81
82import os, sys
83from functools import wraps
84import itertools
85from functools import partial
86
87import orange
88from Orange.data import preprocess
89
90TEST_CLASSIFICATION = 1
91TEST_REGRESSION = 2
92TEST_PICKLE = 4
93
94TEST_ALL = 7
95TEST_ALL_CLASSIFICATION = TEST_ALL - TEST_REGRESSION
96TEST_ALL_REGRESSION = TEST_ALL - TEST_ALL_CLASSIFICATION
97
98TEST_CLASSLESS = 8
99
100DISCRETIZE_DOMAIN = 16
101CONTINUIZE_DOMAIN = 32
102
103def open_data(name, flags=0):
104    """ Open a named data-set return it.
105    """
106    dataset = orange.ExampleTable(name)
107    if flags & CONTINUIZE_DOMAIN:
108        preprocessor = preprocess.Continuize()
109        dataset = preprocessor(dataset)
110    elif flags & DISCRETIZE_DOMAIN:
111        preprocessor = preprocess.Discretize(method=orange.EquiNDiscretization(),
112                                               discretize_class=False)
113        dataset = preprocessor(dataset)
114    dataset.name = name
115    return dataset
116
117CLASSIFICATION_DATASETS = ["iris", "brown-selected", "lenses", "monks-1"]
118REGRESSION_DATASETS = ["housing", "auto-mpg", "servo"]
119CLASSLES_DATASETS = ["water-treatment"]
120ALL_DATASETS = CLASSIFICATION_DATASETS + REGRESSION_DATASETS + CLASSLES_DATASETS
121
122
123from collections import namedtuple
124ExtraArgs = namedtuple("ExtraArgs", "args kwargs")
125
126
127def _expanded(func, name, extra_args):
128    """ Return an expanded function name and the function itself.
129    """
130    from functools import wraps
131    if isinstance(extra_args, ExtraArgs):
132        extra_args, extra_kwargs = extra_args
133    else:
134        extra_kwargs = {}
135    @wraps(func)
136    def expanded(*fixed_args, **fixed_kwargs):
137        call = partial(partial(func, *fixed_args, **fixed_kwargs), *extra_args, **extra_kwargs)
138        return call()
139#    expanded = partial(func, args, kwargs)
140#    expanded = wraps(func)(expanded)
141    newname = func.__name__ + "_" + name.replace("-", "_")
142    expanded.__name__ = newname
143    expanded.__doc__ = None
144    return newname, expanded
145
146def _expanded_lazy(func, name, args_getter):
147    """ Return an expanded function name and the function itself.
148    arge_getter must return the expanded arguments when called.
149   
150    """
151    from functools import wraps
152    @wraps(func)
153    def expanded(*fixed_args, **kwargs):
154        kwargs = kwargs.copy()
155        extra_args = args_getter()
156        if isinstance(extra_args, ExtraArgs):
157            extra_args, extra_kwargs = extra_args
158        else:
159            extra_kwargs = {}
160        call = partial(partial(func, fixed_args, kwargs), extra_args, extra_kwargs)
161        return call()
162
163    newname = func.__name__ + "_" + name.replace("-", "_")
164    expanded.__name__ = newname
165    expanded.__doc__ = None
166    return newname, expanded
167
168
169def _data_driven_cls_decorator(cls, data_iter=None, lazy=False):
170    """ A class decorator that expands TestCase subclass
171    methods decorated with `test_on_data` or `data_driven`
172    decorator.
173   
174    """
175    if data_iter is None:
176        data_iter = getattr(cls, "data_iter", None) # data_iter should be a staticmethod or classmethod
177        if data_iter is not None:
178            data_iter = data_iter()
179
180    if data_iter is not None:
181        data_iter = list(data_iter) # Because it needs to be iterated multiple times (each member not overriding it)
182
183    for test_name in dir(cls):
184        val = getattr(cls, test_name)
185        if hasattr(val, "_data_iter"):
186            member_data_iter = val._data_iter
187            if member_data_iter is None or member_data_iter == (None, False):
188                member_data_iter, lazy_iter = data_iter, lazy
189            else:
190                if isinstance(member_data_iter, tuple):
191                    member_data_iter, lazy_iter = member_data_iter
192                else:
193                    lazy_iter = lazy
194
195            assert(member_data_iter is not None)
196            for name, expand_args in iter(member_data_iter):
197                if lazy:
198                    newname, expanded = _expanded_lazy(val, name, expand_args)
199                else:
200                    newname, expanded = _expanded(val, name, expand_args)
201
202                setattr(cls, newname, expanded)
203            setattr(cls, test_name, None)
204            #setattr(cls, "__" + test_name, val)
205    return cls
206
207def data_driven(cls=None, data_iter=None):
208    """ Class decorator for building data driven test cases.
209   
210    :param data_iter: An iterator supplying the names and arguments for
211        the expanded test.
212   
213    Example ::
214   
215        data_for_tests = [("one", (1, )), ("two", (2, ))]
216       
217        @data_driven(data_iter=data_for_tests)
218        class MyTestCase(unittest.TestCase):
219            @test_on_data
220            def test_add_on(self, number):
221                number + number
222               
223    The tests are then accessible from the command line ::
224   
225        python -m unittest MyTestCase.MyTestCase.test_add_on_one
226       
227    """
228    if data_iter is not None:
229        #Used as
230        # @data_driven(data_iter=...)
231        # class ...
232        return partial(_data_driven_cls_decorator, data_iter=data_iter)
233    elif cls is not None:
234        #Used as
235        # @data_driven
236        # class ...
237        return _data_driven_cls_decorator(cls)
238
239
240
241def data_driven_lazy(cls=None, data_iter=None):
242    if lazy_data_iter is not None:
243        #Used as
244        # @data_driven_lazy(data_iter= ...)
245        # class ...
246        return partial(_data_driven_cls_decorator, data_iter=data_iter, lazy=True)
247    elif cls is not None:
248        #Used as
249        # @data_driven_lazy
250        # class ...
251        return _data_driven_cls_decorator(cls, lazy=True)
252
253def test_on_data(test_func=None, data_iter=None):
254    """ Decorator for test member of unittest.TestCase, signaling that it
255    wants to be expanded (replicated) on each test's data case. This decorator
256    accepts an optional parameter (an data case iterator, see
257    `Data Iterators`_) which overrides the iterator passed to
258    :obj:`data_driven` decorator.
259   
260    Example ::
261   
262        @data_driven
263        class MyTestCase(TestCase):
264            @test_on_data(datasets_iterator())
265            def test_on(self, data)
266                ''' This will be a separate test case for each data-set
267                instance.
268                '''
269                print data.name
270               
271    .. note:: The actual expanding is done by `data_driven` class decorator.
272   
273    .. note:: Within the unittest framework `test_on` test will be expanded
274        to `test_on_iris`, `test_on_lenses` ... for each dataset returned
275        by :obj:`datasets_iterator`. You can then run individual tests from
276        the command line (requires Python 2.7) ::
277                   
278           python -m unittest mymodule.MyTestCase.test_on_iris
279   
280    """
281    def set_iter(func):
282        func._data_iter = data_iter, False
283        return func
284
285    if data_iter is not None:
286        return set_iter
287    else:
288        return set_iter(test_func)
289
290
291def test_on_data_lazy(test_func=None, data_iter=None):
292    """ Same as :func:`test_on_data` except the ``data_iter`` is
293    interpreted as a lazy data iterator (see `Data Iterators`_).
294   
295    """
296    def set_iter(func):
297        func._data_iter = data_iter, True
298        return func
299
300    if data_iter is not None:
301        return set_iter
302    else:
303        return set_iter(test_func)
304
305
306def datasets_iter(datasets=ALL_DATASETS, preprocess=0):
307    for name in datasets:
308        data = open_data(name, flags=preprocess)
309        name = name.replace("-", "_")
310        yield name, (data,)
311
312
313def datasets_iter_lazy(datasets=ALL_DATASETS, preprocess=0):
314    for name in datasets:
315        data = lambda : (open_data(name, flags=preprocess),)
316        name = name.replace("-", "_")
317        yield name, data
318
319
320def test_on_datasets(test_func=None, datasets=ALL_DATASETS):
321    """ same as ``test_on_data(data_iter=datasests_iter(datasets))``
322    """
323    if test_func is None:
324        return test_on_data(data_iter=datasets_iter(datasets))
325    else:
326        return test_on_data(data_iter=datasets_iter(datasets))(test_func)
327
328
329def datasets_driven(cls=None, datasets=ALL_DATASETS, preprocess=0):
330    """ same as ```data_driven(data_iter=datasets_iter(datasets)```
331    """
332    if  cls is None:
333        return data_driven(data_iter=datasets_iter(datasets, preprocess))
334    else:
335        return data_driven(data_iter=datasets_iter(datasets, preprocess))(cls)
336
337
338class DataTestCase(unittest.TestCase):
339    """ Base class for data driven tests.
340    """
341
342import Orange
343from Orange.evaluation import testing as _testing
344from Orange.evaluation import scoring as _scoring
345from Orange.core import MakeRandomIndices2 as _MakeRandomIndices2
346
347
348class LearnerTestCase(DataTestCase):
349    """ A basic test class for orange learner class. Must define
350    class variable `LEARNER` in a subclass or define the proper
351    setUp method which sets ``self.learner``.
352   
353    """
354
355    LEARNER = None
356
357    def setUp(self):
358        """ Set up the learner for the test from the ``LEARNER`` class member.
359        """
360        self.learner = self.LEARNER
361
362    @test_on_data
363    def test_learner_on(self, dataset):
364        """ Default test case for Orange learners.
365        """
366        if isinstance(dataset.domain.class_var, Orange.feature.Discrete):
367            indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset)
368        else:
369            indices = _MakeRandomIndices2(p0=0.3)(dataset)
370        learn = dataset.select(indices, 1)
371        test = dataset.select(indices, 0)
372
373        classifier = self.learner(learn)
374
375        # Test for classVar
376        self.assertTrue(hasattr(classifier, "class_var"))
377        self.assertIs(classifier.class_var, dataset.domain.class_var)
378
379        res = _testing.test_on_data([classifier], test)
380
381        for ex in test:
382            self.assertIsInstance(classifier(ex, Orange.core.GetValue),
383                                  Orange.core.Value)
384            self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities),
385                                  Orange.core.Distribution)
386
387            value, dist = classifier(ex, Orange.core.GetBoth)
388
389            self.assertIsInstance(value, Orange.core.Value)
390            self.assertIsInstance(dist, Orange.core.Distribution)
391
392            self.assertIs(dist.variable, classifier.class_var)
393
394            if isinstance(dist, Orange.core.ContDistribution):
395                dist_sum = sum(dist.values())
396            else:
397                dist_sum = sum(dist)
398
399            self.assertGreater(dist_sum, 0.0)
400            self.assertLess(abs(dist_sum - 1.0), 1e-3)
401
402            # just for fun also test this
403#            self.assertLess(abs(dist_sum - dist.abs), 1e-3)
404            # not fun because it fails
405
406        # Store classifier for possible use in subclasses
407        self.classifier = classifier
408
409    @test_on_data
410    def test_pickling_on(self, dataset):
411        """ Test learner and classifier pickling.
412        """
413        classifier = self.learner(dataset)
414
415        s = pickle.dumps(classifier)
416        classifier_clone = pickle.loads(s)
417
418        indices = orange.MakeRandomIndices2(p0=20)(dataset)
419        test = dataset.select(indices, 0)
420
421        for ex in test:
422            if isinstance(dataset.domain.class_var, Orange.feature.Continuous):
423                # Test to third digit after the decimal point
424                self.assertAlmostEqual(classifier(ex, orange.GetValue).native(),
425                                       classifier_clone(ex, orange.GetValue).native(),
426                                       min(3, dataset.domain.class_var.number_of_decimals),
427                                       "Pickled and original classifier return a different value!")
428            else:
429                self.assertEqual(classifier(ex, orange.GetValue),
430                                 classifier_clone(ex, orange.GetValue),
431                                 "Pickled and original classifier return a different value!")
432
433
434class MeasureAttributeTestCase(DataTestCase):
435    """ Test orange MeasureAttribute subclass.
436   
437    .. todo:: Test if measures respect `handlesDiscrete`, `handlesContinuous`
438        `computesThresholds`, `needs` (raise the appropriate exception). Test
439        `thresholdFunction`.
440    """
441    MEASURE = None
442    """ MEASURE must be defined in the subclass
443    """
444
445    def setUp(self):
446        self.measure = self.MEASURE
447
448    @test_on_data
449    def test_measure_attribute_on(self, data):
450        """ Default test for attribute measures.
451        """
452        scores = []
453        for attr in data.domain.attributes:
454            score = self.measure(attr, data)
455#            self.assertTrue(score >= 0.0)
456            scores.append(score)
457        # any scores actually non zero
458        self.assertTrue(any(score > 0.0 for score in scores))
459
460
461    def test_pickle(self):
462        """ Test attribute measure pickling support.
463        """
464        s = pickle.dumps(self.measure)
465        measure = pickle.loads(s)
466        # TODO: make sure measure computes the same scores as measure
467
468
469class PreprocessorTestCase(DataTestCase):
470    """ Test orange.Preprocessor subclass
471   
472    """
473    PREPROCESSOR = None
474
475    def setUp(self):
476        self.preprocessor = self.PREPROCESSOR
477
478    @test_on_data
479    def test_preprocessor_on(self, dataset):
480        """ Test preprocessor on dataset
481        """
482        newdata = self.preprocessor(dataset)
483
484    def test_pickle(self):
485        """ Test preprocessor pickling
486        """
487        if isinstance(self.preprocessor, type):
488            prep = self.preprocessor() # Test the default constructed
489            s = pickle.dumps(prep)
490            prep = pickle.loads(s)
491
492        s = pickle.dumps(self.preprocessor)
493        prep = pickle.loads(s)
494
495
496from Orange.distance import distance_matrix
497from Orange.utils import member_set
498
499
500class DistanceTestCase(DataTestCase):
501    """ Test orange.ExamplesDistance/Constructor
502    """
503    DISTANCE_CONSTRUCTOR = None
504
505    def setUp(self):
506        self.distance_constructor = self.DISTANCE_CONSTRUCTOR
507
508    @test_on_data
509    def test_distance_on(self, dataset):
510        import numpy
511        indices = orange.MakeRandomIndices2(dataset, min(20, len(dataset)))
512        dataset = dataset.select(indices, 0)
513        with member_set(self.distance_constructor, "ignore_class", True):
514            mat = distance_matrix(dataset, self.distance_constructor)
515
516        self.assertIsInstance(mat, Orange.misc.SymMatrix)
517        self.assertEqual(mat.dim, len(dataset))
518
519        m = numpy.array(list(mat))
520        self.assertTrue((m >= 0.0).all())
521
522        if dataset.domain.class_var:
523            with member_set(self.distance_constructor, "ignore_class", False):
524                try:
525                    mat = distance_matrix(dataset, self.distance_constructor)
526                except orange.KernelException, ex:
527                    if "not supported" in str(ex):
528                        return
529                    else:
530                        raise
531            m1 = numpy.array(list(mat))
532            self.assertTrue((m1 != m).all() or dataset, "%r does not seem to respect the 'ignore_class' flag")
533
534def test_case_script(path):
535    """ Return a TestCase instance from a script in `path`.
536    The script will be run in the directory it is in.
537   
538    :param path: The path to the script to test
539    :type path: str
540    """
541    dirname = os.path.dirname(os.path.realpath(path))
542    _dir = {}
543    def setUp():
544        _dir["cwd"] = os.path.realpath(os.curdir)
545        os.chdir(dirname)
546    def tearDown():
547        os.chdir(_dir["cwd"])
548
549    def runScript():
550        execfile(path, {})
551
552    runScript.__name__ = "runScript %s" % os.path.basename(path)
553    return unittest.FunctionTestCase(runScript, setUp=setUp, tearDown=tearDown)
554
555
556def test_suite_scripts(path):
557    """ Return a TestSuite for testing all scripts in a directory `path`
558   
559    :param path: Directory path
560    :type path: str
561    """
562    import glob
563    return unittest.TestSuite([test_case_script(os.path.join(path, name)) for name in glob.glob1(path, "*.py")])
564
565
566_default_run = unittest.TestCase.run
567def enable_pdb():
568    """ Enable the python pdb postmortem debugger to handle any
569    raised exception during the test for interactive debugging.
570   
571    For example you can examine exceptions in tests from ipython -pdb ::
572   
573        In [1]: import Orange.testing.testing as testing
574        In [2]: testing.enable_pdb()
575        In [3]: run tests/test_preprocessors.py
576        ---...
577        KernelException...
578        ipdb>
579       
580    .. warning:: This modifies the unittest.TestCase.run method
581   
582    """
583
584    def run(self, result=None):
585        if result is None:
586            result = self.defaultTestResult()
587        result.startTest(self)
588        testMethod = getattr(self, self._testMethodName)
589        try:
590            try:
591                self.setUp()
592                testMethod()
593                result.addSuccess(self)
594#            except self.failureException:
595#                result.addFailure(self, self._exc_info())
596            except KeyboardInterrupt:
597                raise
598            finally:
599                self.tearDown()
600        finally:
601            result.stopTest(self)
602
603    unittest.TestCase.run = run
604
605def disable_pdb():
606    """ Disables the python pdb postmortem debugger to handle
607    exceptions raised during test run.
608   
609    """
610    unittest.TestCase.run = _default_run
611
612try:
613    __IPYTHON__  #We are running tests from ipython
614    if getattr(__IPYTHON__.shell, "call_pdb", None): # Is pdb enabled
615        enable_pdb()
616except NameError:
617    pass
618
619
620def test_module(module):
621    """ A helper function to run all tests from a module. 
622    """
623    loader = unittest.TestLoader()
624    suite = loader.loadTestsFromModule(module)
625    runner = unittest.TextTestRunner()
626    return runner.run(suite)
Note: See TracBrowser for help on using the repository browser.