source: orange/Orange/misc/testing.py @ 9919:8a2a770ef3af

Revision 9919:8a2a770ef3af, 20.0 KB checked in by markotoplak, 2 years ago (diff)

data.variable -> feature

Line 
1"""\
2Orange unit testing
3===================
4
5This module contains some classes in common use by Orange unit testing
6framework. In particular its most useful feature is the BaseTestOnData
7(along with ``test_on_data`` function and ``datasets_driven`` class decorators)
8class for automating data driven tests.
9 
10         
11Example of use ::
12
13    from Orange.misc import testing
14    import unittest
15   
16    data = [("one", 1),
17            ("two", 2)]
18   
19    # Data driven with data_iter argument
20    # data must be reiterable multiple times if more than one test member defined
21    @data_driven(data_iter=data)
22    class TestDemo(unittest.TestCase):
23        @test_on_data
24        def test_instance_on(self, arg):
25            self.assertIsInstance(arg, int)
26           
27        @test_on_data
28        def test_add(self, arg):
29            res = arg + arg
30           
31    # data_driven without argument
32    @data_driven
33    class TestDemo1(unittest.TestCase):
34        @test_on_data(data_iter=data)
35        def test_instance_on(self, arg):
36            self.assertIsInstance(arg, int)
37           
38        @test_on_data(data_iter=data)
39        def test_add(self, arg):
40            res = arg + arg
41   
42    # data_driven without arg, using a static data_iter method
43    @data_driven
44    class TestDemo1(unittest.TestCase):
45        @test_on_data
46        def test_instance_on(self, arg):
47            self.assertIsInstance(arg, int)
48           
49        @test_on_data
50        def test_add(self, arg):
51            res = arg + arg
52           
53        @staticmethod
54        def data_iter():
55            yield "iris", Orange.data.Table("doc:iris")
56       
57    #@data_driven(data_iter=testing.datasets_iter(testing.CLASSIFICATION_DATASETS | testing.CLASSLES_DATASETS))
58    @datasets_driven(data_iter=testing.CLASSIFICATION_DATASETS |\
59                     testing.CLASSLESS_DATASETS)
60    class TestDefaultLearner(unittest.TestCase):
61        @test_on_data
62        def test_learner_on(self, dataset):
63            import Orange
64            Orange.classifcation.majority.MajorityLearner(dataset)
65           
66        # this overloads the class decorator's flags
67        @test_on_datasets(testing.CLASSLES_DATASETS)
68        def test_raise_missing_class_on(self, dataset):
69            import Orange
70            Orange.classifcation.majority.MajorityLearner(dataset)
71       
72"""
73from __future__ import absolute_import
74import unittest
75import os, sys
76
77import itertools
78from functools import partial
79import cPickle as pickle
80#import pickle
81
82import orange
83from Orange.preprocess import Preprocessor_discretize, Preprocessor_continuize
84
85TEST_CLASSIFICATION = 1
86TEST_REGRESSION = 2
87TEST_PICKLE = 4
88
89TEST_ALL = 7
90TEST_ALL_CLASSIFICATION = TEST_ALL - TEST_REGRESSION
91TEST_ALL_REGRESSION = TEST_ALL - TEST_ALL_CLASSIFICATION
92
93TEST_CLASSLESS = 8
94
95DISCRETIZE_DOMAIN = 16
96CONTINUIZE_DOMAIN = 32
97
98datasetsdir = os.path.join(os.path.dirname(orange.__file__), "doc", "datasets")
99
100def open_data(name, flags=0):
101    """ Open a named data-set return it.
102    """
103    dataset = orange.ExampleTable(os.path.join(datasetsdir, name))
104    if flags & CONTINUIZE_DOMAIN:
105        preprocessor = Preprocessor_continuize()
106        dataset = preprocessor(dataset)
107    elif flags & DISCRETIZE_DOMAIN:
108        preprocessor = Preprocessor_discretize(method=orange.EquiNDiscretization(),
109                                               discretize_class=False)
110        dataset = preprocessor(dataset)
111    dataset.name = name
112    return dataset
113
114CLASSIFICATION_DATASETS = ["iris", "brown-selected", "lenses", "monks-1"]
115REGRESSION_DATASETS = ["housing", "auto-mpg", "servo"]
116CLASSLES_DATASETS =  ["water-treatment"]
117ALL_DATASETS  = CLASSIFICATION_DATASETS + REGRESSION_DATASETS + CLASSLES_DATASETS
118
119
120from collections import namedtuple
121ExtraArgs = namedtuple("ExtraArgs", "args kwargs")
122
123
124def _expanded(func, name, extra_args):
125    """ Return an expanded function name and the function itself.
126    """
127    from functools import wraps
128    if isinstance(extra_args, ExtraArgs):
129        extra_args, extra_kwargs = extra_args
130    else:
131        extra_kwargs = {}
132    @wraps(func)
133    def expanded(*fixed_args, **fixed_kwargs):
134        call = partial(partial(func, *fixed_args, **fixed_kwargs), *extra_args, **extra_kwargs)
135        return call()
136#    expanded = partial(func, args, kwargs)
137#    expanded = wraps(func)(expanded)
138    newname = func.__name__ + "_" + name.replace("-", "_")
139    expanded.__name__ = newname
140    return newname, expanded
141
142def _expanded_lazy(func, name, args_getter):
143    """ Return an expanded function name and the function itself.
144    arge_getter must return the expanded arguments when called.
145   
146    """
147    from functools import wraps
148    @wraps(func)
149    def expanded(*fixed_args, **kwargs):
150        kwargs = kwargs.copy()
151        extra_args = args_getter()
152        if isinstance(extra_args, ExtraArgs):
153            extra_args, extra_kwargs = extra_args
154        else:
155            extra_kwargs = {}
156        call = partial(partial(func, fixed_args, kwargs), extra_args, extra_kwargs)
157        return call()
158   
159    newname = func.__name__ + "_" + name.replace("-", "_")
160    expanded.__name__ = newname
161    return newname, expanded
162
163               
164def _data_driven_cls_decorator(cls, data_iter=None, lazy=False):
165    """ A class decorator that expands TestCase subclass
166    methods decorated with `test_on_data` or `data_driven`
167    decorator.
168   
169    """ 
170    if data_iter is None:
171        data_iter = getattr(cls, "data_iter", None) # data_iter should be a staticmethod or classmethod
172        if data_iter is not None:
173            data_iter = data_iter()
174           
175    if data_iter is not None:
176        data_iter = list(data_iter) # Because it needs to be iterated multiple times (each member not overriding it)
177   
178    for test_name in dir(cls):
179        val = getattr(cls, test_name)
180        if hasattr(val, "_data_iter"):
181            member_data_iter = val._data_iter
182            if member_data_iter is None or member_data_iter == (None, False):
183                member_data_iter, lazy_iter = data_iter, lazy
184            else:
185                if isinstance(member_data_iter, tuple):
186                    member_data_iter, lazy_iter = member_data_iter
187                else:
188                    lazy_iter = lazy
189                   
190            assert(member_data_iter is not None)
191            for name, expand_args in iter(member_data_iter):
192                if lazy:
193                    newname, expanded = _expanded_lazy(val, name, expand_args)
194                else:
195                    newname, expanded = _expanded(val, name, expand_args)
196                setattr(cls, newname, expanded)
197            setattr(cls, test_name, None)
198            setattr(cls, "__" + test_name, val)
199    return cls
200
201def data_driven(cls=None, data_iter=None):
202    """ Class decorator for building data driven test cases.
203   
204    :param data_iter: An iterator supplying the names and arguments for
205        the expanded test.
206   
207    Example ::
208   
209        data_for_tests = [("one", (1, )), ("two", (2, ))]
210       
211        @data_driven(data_iter=data_for_tests)
212        class MyTestCase(unittest.TestCase):
213            @test_on_data
214            def test_add_on(self, number):
215                number + number
216               
217    The tests are then accessible from the command line ::
218   
219        python -m unittest MyTestCase.MyTestCase.test_add_on_one
220       
221    """
222    if data_iter is not None:
223        #Used as
224        # @data_driven(data_iter=...)
225        # class ...
226        return partial(_data_driven_cls_decorator, data_iter=data_iter)
227    elif cls is not None:
228        #Used as
229        # @data_driven
230        # class ...
231        return _data_driven_cls_decorator(cls)
232     
233
234
235def data_driven_lazy(cls=None, data_iter=None):
236    if lazy_data_iter is not None: 
237        #Used as
238        # @data_driven_lazy(data_iter= ...)
239        # class ...
240        return partial(_data_driven_cls_decorator, data_iter=data_iter, lazy=True)
241    elif cls is not None:
242        #Used as
243        # @data_driven_lazy
244        # class ...
245        return _data_driven_cls_decorator(cls, lazy=True)
246   
247def test_on_data(test_func=None, data_iter=None):
248    """ Decorator for test member of unittest.TestCase, signaling that it
249    wants to be expanded (replicated) on each test's data case. This decorator
250    accepts an optional parameter (an data case iterator, see
251    `Data Iterators`_) which overrides the iterator passed to
252    :obj:`data_driven` decorator.
253   
254    Example ::
255   
256        @data_driven
257        class MyTestCase(TestCase):
258            @test_on_data(datasets_iterator())
259            def test_on(self, data)
260                ''' This will be a separate test case for each data-set
261                instance.
262                '''
263                print data.name
264               
265    .. note:: The actual expanding is done by `data_driven` class decorator.
266   
267    .. note:: Within the unittest framework `test_on` test will be expanded
268        to `test_on_iris`, `test_on_lenses` ... for each dataset returned
269        by :obj:`datasets_iterator`. You can then run individual tests from
270        the command line (requires Python 2.7) ::
271                   
272           python -m unittest mymodule.MyTestCase.test_on_iris
273   
274    """
275    def set_iter(func):
276        func._data_iter = data_iter, False
277        return func
278   
279    if data_iter is not None:
280        return set_iter
281    else:
282        return set_iter(test_func)
283   
284   
285def test_on_data_lazy(test_func=None, data_iter=None):
286    """ Same as :func:`test_on_data` except the ``data_iter`` is
287    interpreted as a lazy data iterator (see `Data Iterators`_).
288   
289    """
290    def set_iter(func):
291        func._data_iter = data_iter, True
292        return func
293   
294    if data_iter is not None:
295        return set_iter
296    else:
297        return set_iter(test_func)
298   
299   
300def datasets_iter(datasets=ALL_DATASETS, preprocess=0):
301    for name in datasets:
302        data = open_data(name, flags=preprocess)
303        name = name.replace("-", "_")
304        yield name, (data,)
305       
306       
307def datasets_iter_lazy(datasets=ALL_DATASETS, preprocess=0):
308    for name in datasets:
309        data = lambda : (open_data(name, flags=preprocess), )
310        name = name.replace("-", "_")
311        yield name, data
312   
313
314def test_on_datasets(test_func=None, datasets=ALL_DATASETS):
315    """ same as ``test_on_data(data_iter=datasests_iter(datasets))``
316    """
317    if test_func is None:
318        return test_on_data(data_iter=datasets_iter(datasets))
319    else:
320        return test_on_data(data_iter=datasets_iter(datasets))(test_func)
321
322
323def datasets_driven(cls=None, datasets=ALL_DATASETS, preprocess=0):
324    """ same as ```data_driven(data_iter=datasets_iter(datasets)```
325    """
326    if  cls is None:
327        return data_driven(data_iter=datasets_iter(datasets, preprocess))
328    else:
329        return data_driven(data_iter=datasets_iter(datasets, preprocess))(cls)
330   
331
332class DataTestCase(unittest.TestCase):
333    """ Base class for data driven tests.
334    """
335   
336import Orange
337from Orange.evaluation import testing as _testing
338from Orange.evaluation import scoring as _scoring
339from Orange.core import MakeRandomIndices2 as _MakeRandomIndices2
340
341
342class LearnerTestCase(DataTestCase):
343    """ A basic test class for orange learner class. Must define
344    class variable `LEARNER` in a subclass or define the proper
345    setUp method which sets ``self.learner``.
346   
347    """ 
348   
349    LEARNER = None
350   
351    def setUp(self):
352        """ Set up the learner for the test from the ``LEARNER`` class member.
353        """
354        self.learner = self.LEARNER
355       
356    @test_on_data
357    def test_learner_on(self, dataset):
358        """ Default test case for Orange learners.
359        """
360        if isinstance(dataset.domain.class_var, Orange.feature.Discrete):
361            indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset)
362        else:
363            indices = _MakeRandomIndices2(p0=0.3)(dataset)
364        learn = dataset.select(indices, 1)
365        test = dataset.select(indices, 0)
366       
367        classifier = self.learner(learn)
368       
369        # Test for classVar
370        self.assertTrue(hasattr(classifier, "class_var"))
371        self.assertTrue(classifier.class_var is not None)
372       
373        res = _testing.test_on_data([classifier], test)
374       
375        for ex in test:
376            self.assertIsInstance(classifier(ex, Orange.core.GetValue), Orange.core.Value)
377            self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities), Orange.core.Distribution)
378           
379            value, dist = classifier(ex, Orange.core.GetBoth)
380           
381            self.assertIsInstance(value, Orange.core.Value)
382            self.assertIsInstance(dist, Orange.core.Distribution)
383           
384            if isinstance(dist, Orange.core.ContDistribution):
385                dist_sum = sum(dist.values())
386            else:
387                dist_sum = sum(dist)
388               
389            self.assertGreater(dist_sum, 0.0)
390            self.assertLess(abs(dist_sum - 1.0), 1e-3)
391           
392            # just for fun also test this
393#            self.assertLess(abs(dist_sum - dist.abs), 1e-3)
394            # not fun because it fails
395
396        # Store classifier for possible use in subclasses
397        self.classifier = classifier
398
399    @test_on_data
400    def test_pickling_on(self, dataset):
401        """ Test learner and classifier pickling.
402        """
403        classifier = self.learner(dataset)
404       
405        s = pickle.dumps(classifier)
406        classifier_clone = pickle.loads(s)
407       
408        indices = orange.MakeRandomIndices2(p0=20)(dataset)
409        test = dataset.select(indices, 0)
410       
411        for ex in test:
412            if isinstance(dataset.domain.class_var, Orange.feature.Continuous):
413                # Test to third digit after the decimal point
414                self.assertAlmostEqual(classifier(ex, orange.GetValue).native(),
415                                       classifier_clone(ex, orange.GetValue).native(),
416                                       min(3, dataset.domain.class_var.number_of_decimals),
417                                       "Pickled and original classifier return a different value!")
418            else:
419                self.assertEqual(classifier(ex, orange.GetValue),
420                                 classifier_clone(ex, orange.GetValue),
421                                 "Pickled and original classifier return a different value!")
422               
423
424class MeasureAttributeTestCase(DataTestCase):
425    """ Test orange MeasureAttribute subclass.
426   
427    .. todo:: Test if measures respect `handlesDiscrete`, `handlesContinuous`
428        `computesThresholds`, `needs` (raise the appropriate exception). Test
429        `thresholdFunction`.
430    """
431    MEASURE = None
432    """ MEASURE must be defined in the subclass
433    """
434   
435    def setUp(self):
436        self.measure = self.MEASURE
437           
438    @test_on_data
439    def test_measure_attribute_on(self, data):
440        """ Default test for attribute measures.
441        """
442        scores = []
443        for attr in data.domain.attributes:
444            score = self.measure(attr, data)
445#            self.assertTrue(score >= 0.0)
446            scores.append(score)
447        # any scores actually non zero
448        self.assertTrue(any(score > 0.0 for score in scores))
449           
450       
451    def test_pickle(self):
452        """ Test attribute measure pickling support.
453        """
454        s = pickle.dumps(self.measure)
455        measure = pickle.loads(s)
456        # TODO: make sure measure computes the same scores as measure
457         
458
459class PreprocessorTestCase(DataTestCase):
460    """ Test orange.Preprocessor subclass
461   
462    """ 
463    PREPROCESSOR = None
464   
465    def setUp(self):
466        self.preprocessor = self.PREPROCESSOR
467
468    @test_on_data
469    def test_preprocessor_on(self, dataset):
470        """ Test preprocessor on dataset
471        """
472        newdata = self.preprocessor(dataset)
473       
474    def test_pickle(self):
475        """ Test preprocessor pickling
476        """
477        if isinstance(self.preprocessor, type):
478            prep = self.preprocessor() # Test the default constructed
479            s = pickle.dumps(prep)
480            prep = pickle.loads(s)
481               
482        s = pickle.dumps(self.preprocessor)
483        prep = pickle.loads(s)
484       
485       
486from Orange.distance import distance_matrix
487from Orange.misc import member_set
488
489
490class DistanceTestCase(DataTestCase):
491    """ Test orange.ExamplesDistance/Constructor
492    """
493    DISTANCE_CONSTRUCTOR = None
494   
495    def setUp(self):
496        self.distance_constructor = self.DISTANCE_CONSTRUCTOR
497       
498    @test_on_data
499    def test_distance_on(self, dataset):
500        import numpy
501        indices = orange.MakeRandomIndices2(dataset, min(20, len(dataset)))
502        dataset = dataset.select(indices, 0)
503        with member_set(self.distance_constructor, "ignore_class", True):
504            mat = distance_matrix(dataset, self.distance_constructor)
505       
506        m = numpy.array(list(mat))
507        self.assertTrue((m >= 0.0).all())
508       
509        if dataset.domain.class_var:
510            with member_set(self.distance_constructor, "ignore_class", False):
511                try:
512                    mat = distance_matrix(dataset, self.distance_constructor)
513                except orange.KernelException, ex:
514                    if "not supported" in ex.message:
515                        return
516                    else:
517                        raise
518            m1 = numpy.array(list(mat))
519            self.assertTrue((m1 != m).all() or dataset, "%r does not seem to respect the 'ignore_class' flag")
520       
521def test_case_script(path):
522    """ Return a TestCase instance from a script in `path`.
523    The script will be run in the directory it is in.
524   
525    :param path: The path to the script to test
526    :type path: str
527    """
528    dirname = os.path.dirname(os.path.realpath(path))
529    _dir = {}
530    def setUp():
531        _dir["cwd"] = os.path.realpath(os.curdir)
532        os.chdir(dirname)
533    def tearDown():
534        os.chdir(_dir["cwd"])
535       
536    def runScript():
537        execfile(path, {})
538       
539    runScript.__name__ = "runScript %s" % os.path.basename(path)
540    return unittest.FunctionTestCase(runScript, setUp=setUp, tearDown=tearDown)
541
542
543def test_suite_scripts(path):
544    """ Return a TestSuite for testing all scripts in a directory `path`
545   
546    :param path: Directory path
547    :type path: str
548    """
549    import glob
550    return unittest.TestSuite([test_case_script(os.path.join(path, name)) for name in glob.glob1(path, "*.py")])
551   
552
553_default_run = unittest.TestCase.run
554def enable_pdb():
555    """ Enable the python pdb postmortem debugger to handle any
556    raised exception during the test for interactive debugging.
557   
558    For example you can examine exceptions in tests from ipython -pdb ::
559   
560        In [1]: import Orange.misc.testing as testing
561        In [2]: testing.enable_pdb()
562        In [3]: run tests/test_preprocessors.py
563        ---...
564        KernelException...
565        ipdb>
566       
567    .. warning:: This modifies the unittest.TestCase.run method
568   
569    """ 
570   
571    def run(self, result=None):
572        if result is None:
573            result = self.defaultTestResult()
574        result.startTest(self)
575        testMethod = getattr(self, self._testMethodName)
576        try:
577            try:
578                self.setUp()
579                testMethod()
580                result.addSuccess(self)
581#            except self.failureException:
582#                result.addFailure(self, self._exc_info())
583            except KeyboardInterrupt:
584                raise
585            finally:
586                self.tearDown()
587        finally:
588            result.stopTest(self)
589           
590    unittest.TestCase.run = run
591   
592def disable_pdb():
593    """ Disables the python pdb postmortem debugger to handle
594    exceptions raised during test run.
595   
596    """
597    unittest.TestCase.run = _default_run
598   
599try:
600    __IPYTHON__  #We are running tests from ipython
601    if getattr(__IPYTHON__.shell, "call_pdb", None): # Is pdb enabled
602        enable_pdb()
603except NameError:
604    pass
605   
606   
607def test_module(module):
608    """ A helper function to run all tests from a module. 
609    """
610    loader = unittest.TestLoader()
611    suite = loader.loadTestsFromModule(module)
612    runner = unittest.TextTestRunner()
613    return runner.run(suite)
Note: See TracBrowser for help on using the repository browser.