source: orange/orange/Orange/evaluation/testing.py @ 9214:bdd7ce13825d

Revision 9214:bdd7ce13825d, 30.0 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Moved documentation from testing.py to rst. file.
Added deprecation decorators for crossvalidation and leave_one_out.
Added unit tests for leave_one_out and cross_validation.

Line 
1import Orange
2from Orange.misc import demangle_examples, getobjectname, printVerbose, deprecated_keywords
3import exceptions, cPickle, os, os.path
4
5#### Some private stuff
6
7def encode_PP(pps):
8    pps=""
9    for pp in pps:
10        objname = getobjectname(pp[1], "")
11        if len(objname):
12            pps+="_"+objname
13        else:
14            return "*"
15    return pps
16
17#### Data structures
18
19class TestedExample:
20    """
21    TestedExample stores predictions of different classifiers for a single testing example.
22
23    .. attribute:: classes
24
25        A list of predictions of type Value, one for each classifier.
26
27    .. attribute:: probabilities
28
29        A list of probabilities of classes, one for each classifier.
30
31    .. attribute:: iterationNumber
32
33        Iteration number (e.g. fold) in which the TestedExample was created/tested.
34
35    .. attribute:: actualClass
36
37        The correct class of the example
38
39    .. attribute:: weight
40   
41        Example's weight. Even if the example set was not weighted,
42        this attribute is present and equals 1.0.
43
44    :param iterationNumber:
45    :paramtype iterationNumber: type???
46    :param actualClass:
47    :paramtype actualClass: type???
48    :param n:
49    :paramtype n: int
50    :param weight:
51    :paramtype weight: float
52
53    """
54
55    def __init__(self, iterationNumber=None, actualClass=None, n=0, weight=1.0):
56        self.classes = [None]*n
57        self.probabilities = [None]*n
58        self.iterationNumber = iterationNumber
59        self.actualClass= actualClass
60        self.weight = weight
61   
62    def add_result(self, aclass, aprob):
63        """Appends a new result (class and probability prediction by a single classifier) to the classes and probabilities field."""
64   
65        if type(aclass.value)==float:
66            self.classes.append(float(aclass))
67            self.probabilities.append(aprob)
68        else:
69            self.classes.append(int(aclass))
70            self.probabilities.append(list(aprob))
71
72    def set_result(self, i, aclass, aprob):
73        """Sets the result of the i-th classifier to the given values."""
74        if type(aclass.value)==float:
75            self.classes[i] = float(aclass)
76            self.probabilities[i] = aprob
77        else:
78            self.classes[i] = int(aclass)
79            self.probabilities[i] = list(aprob)
80
81class ExperimentResults(object):
82    """
83    ``ExperimentResults`` stores results of one or more repetitions of
84    some test (cross validation, repeated sampling...) under the same
85    circumstances.
86
87    .. attribute:: results
88
89        A list of instances of TestedExample, one for each example in
90        the dataset.
91
92    .. attribute:: classifiers
93
94        A list of classifiers, one element for each repetition (eg
95        fold). Each element is a list of classifiers, one for each
96        learner. This field is used only if storing is enabled by
97        ``storeClassifiers=1``.
98
99    .. attribute:: numberOfIterations
100
101        Number of iterations. This can be the number of folds
102        (in cross validation) or the number of repetitions of some
103        test. ``TestedExample``'s attribute ``iterationNumber`` should
104        be in range ``[0, numberOfIterations-1]``.
105
106    .. attribute:: numberOfLearners
107
108        Number of learners. Lengths of lists classes and probabilities
109        in each :obj:`TestedExample` should equal ``numberOfLearners``.
110
111    .. attribute:: loaded
112
113        If the experimental method supports caching and there are no
114        obstacles for caching (such as unknown random seeds), this is a
115        list of boolean values. Each element corresponds to a classifier
116        and tells whether the experimental results for that classifier
117        were computed or loaded from the cache.
118
119    .. attribute:: weights
120
121        A flag telling whether the results are weighted. If ``False``,
122        weights are still present in ``TestedExamples``, but they are
123        all ``1.0``. Clear this flag, if your experimental procedure
124        ran on weighted testing examples but you would like to ignore
125        the weights in statistics.
126
127    """
128    def __init__(self, iterations, classifierNames, classValues=None, weights=None, baseClass=-1, domain=None, **argkw):
129        self.classValues = classValues
130        self.classifierNames = classifierNames
131        self.numberOfIterations = iterations
132        self.numberOfLearners = len(classifierNames)
133        self.results = []
134        self.classifiers = []
135        self.loaded = None
136        self.baseClass = baseClass
137        self.weights = weights
138
139        if domain is not None:
140            if domain.classVar.varType == Orange.data.Type.Discrete:
141                self.classValues = list(domain.classVar.values)
142                self.baseClass = domain.classVar.base_value
143                self.converter = int
144            else:
145                self.baseClass = self.classValues = None
146                self.converter = float
147
148        self.__dict__.update(argkw)
149
150    def load_from_files(self, learners, filename):
151        raise NotImplementedError("This feature is no longer supported.")
152
153    def save_to_files(self, learners, filename):
154        raise NotImplementedError("This feature is no longer supported. Pickle whole class instead.")
155
156    def create_tested_example(self, fold, example):
157        return TestedExample(fold,
158                             self.converter(example.getclass()),
159                             self.numberOfLearners,
160                             example.getweight(self.weights))
161        pass
162
163    def remove(self, index):
164        """remove one learner from evaluation results"""
165        for r in self.results:
166            del r.classes[index]
167            del r.probabilities[index]
168        del self.classifierNames[index]
169        self.numberOfLearners -= 1
170
171    def add(self, results, index, replace=-1):
172        """add evaluation results (for one learner)"""
173        if len(self.results)!=len(results.results):
174            raise SystemError("mismatch in number of test cases")
175        if self.numberOfIterations!=results.numberOfIterations:
176            raise SystemError("mismatch in number of iterations (%d<>%d)" % \
177                  (self.numberOfIterations, results.numberOfIterations))
178        if len(self.classifiers) and len(results.classifiers)==0:
179            raise SystemError("no classifiers in results")
180
181        if replace < 0 or replace >= self.numberOfLearners: # results for new learner
182            self.classifierNames.append(results.classifierNames[index])
183            self.numberOfLearners += 1
184            for i,r in enumerate(self.results):
185                r.classes.append(results.results[i].classes[index])
186                r.probabilities.append(results.results[i].probabilities[index])
187            if len(self.classifiers):
188                for i in range(self.numberOfIterations):
189                    self.classifiers[i].append(results.classifiers[i][index])
190        else: # replace results of existing learner
191            self.classifierNames[replace] = results.classifierNames[index]
192            for i,r in enumerate(self.results):
193                r.classes[replace] = results.results[i].classes[index]
194                r.probabilities[replace] = results.results[i].probabilities[index]
195            if len(self.classifiers):
196                for i in range(self.numberOfIterations):
197                    self.classifiers[replace] = results.classifiers[i][index]
198
199#### Experimental procedures
200
201@deprecated_keywords({"pps": "preprocessors"})
202def leave_one_out(learners, examples, preprocessors=(),
203                  callback=None, store_classifiers=False, store_examples=False):
204    """Perform leave-one-out evaluation of learners on a data set.
205
206    :param learners: list of learners to be tested
207    :param examples: data table on which the learners will be tested
208    :param preprocessors: a list of preprocessors to be used on data.
209    :param callback: a function that will be called after each fold is computed.
210    :param store_classifiers: if True, classifiers will be accessible in test_results.
211    :param store_examples: if True, examples will be accessible in test_results.
212    """
213    return test_with_indices(learners, examples, indices=range(len(examples)), preprocessors=preprocessors,
214                             callback=callback, store_classifiers=store_classifiers, store_examples=store_examples)
215
216
217def proportion_test(learners, examples, learnProp, times=10,
218                   strat=Orange.core.MakeRandomIndices.StratifiedIfPossible,
219                   pps=[], callback=None, **argkw):
220    """train-and-test evaluation (train on a subset, test on remaing examples)
221
222    Splits the data with ``learnProp`` of examples in the learning
223    and the rest in the testing set. The test is repeated for a given
224    number of times (default 10). Division is stratified by default. The
225    Function also accepts keyword arguments for randomization and
226    storing classifiers.
227
228    100 repetitions of the so-called 70:30 test in which 70% of examples
229    are used for training and 30% for testing is done by::
230
231        res = Orange.evaluation.testing.proportion_test(learners, data, 0.7, 100)
232
233    Note that Python allows naming the arguments; instead of "100" you
234    can use "times=100" to increase the clarity (not so with keyword
235    arguments, such as ``storeClassifiers``, ``randseed`` or ``verbose``
236    that must always be given with a name).
237
238    """
239   
240    # randomGenerator is set either to what users provided or to orange.RandomGenerator(0)
241    # If we left it None or if we set MakeRandomIndices2.randseed, it would give same indices each time it's called
242    randomGenerator = argkw.get("indicesrandseed", 0) or argkw.get("randseed", 0) or argkw.get("randomGenerator", 0)
243    pick = Orange.core.MakeRandomIndices2(stratified = strat, p0 = learnProp, randomGenerator = randomGenerator)
244   
245    examples, weight = demangle_examples(examples)
246    classVar = examples.domain.classVar
247    if classVar.varType == Orange.data.Type.Discrete:
248        values = list(classVar.values)
249        baseValue = classVar.baseValue
250    else:
251        baseValue = values = None
252    testResults = ExperimentResults(times, [l.name for l in learners], values, weight!=0, baseValue)
253
254    for time in range(times):
255        indices = pick(examples)
256        learnset = examples.selectref(indices, 0)
257        testset = examples.selectref(indices, 1)
258        learn_and_test_on_test_data(learners, (learnset, weight), (testset, weight), testResults, time, pps, **argkw)
259        if callback: callback()
260    return testResults
261
262
263@deprecated_keywords({"pps": "preprocessors",
264                      "strat": "stratified",
265                      "randseed": "random_generator",
266                      "indicesrandseed": "random_generator",
267                      "randomGenerator": "random_generator"})
268def cross_validation(learners, examples, folds=10, stratified=Orange.core.MakeRandomIndices.StratifiedIfPossible,
269                    preprocessors=(), random_generator=0, callback=None, store_classifiers=False, store_examples=False):
270    """Perform cross validation with specified number of folds.
271
272    :param learners: list of learners to be tested
273    :param examples: data table on which the learners will be tested
274    :param folds: number of folds to perform
275    :param stratified: sets, whether indices should be stratified
276    :param preprocessors: a list of preprocessors to be used on data.
277    :param random_generator: random seed or random generator for selection of indices
278    :param callback: a function that will be called after each fold is computed.
279    :param store_classifiers: if True, classifiers will be accessible in test_results.
280    :param store_examples: if True, examples will be accessible in test_results.
281    """
282    (examples, weight) = demangle_examples(examples)
283
284    indices = Orange.core.MakeRandomIndicesCV(examples, folds, stratified=stratified, random_generator=random_generator)
285    return test_with_indices(learners=learners, examples=(examples, weight), indices=indices,
286                             preprocessors=preprocessors,
287                             callback=callback, store_classifiers=store_classifiers, store_examples=store_examples)
288
289
290def learning_curve_n(learners, examples, folds=10,
291                   strat=Orange.core.MakeRandomIndices.StratifiedIfPossible,
292                   proportions=Orange.core.frange(0.1), pps=[], **argkw):
293    """Construct a learning curve for learners.
294
295    A simpler interface for the function :obj:`learning_curve`. Instead
296    of methods for preparing indices, it simply takes the number of folds
297    and a flag telling whether we want a stratified cross-validation or
298    not. This function does not return a single :obj:`ExperimentResults` but
299    a list of them, one for each proportion. ::
300
301        prop = [0.2, 0.4, 0.6, 0.8, 1.0]
302        res = Orange.evaluation.testing.learning_curve_n(learners, data, folds = 5, proportions = prop)
303        for i, p in enumerate(prop):
304            print "%5.3f:" % p,
305            printResults(res[i])
306
307    This function basically prepares a random generator and example selectors
308    (``cv`` and ``pick``) and calls :obj:`learning_curve`.
309
310    """
311
312    seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1)
313    if seed:
314        randomGenerator = Orange.core.RandomGenerator(seed)
315    else:
316        randomGenerator = argkw.get("randomGenerator", Orange.core.RandomGenerator())
317       
318    if strat:
319        cv=Orange.core.MakeRandomIndicesCV(folds = folds, stratified = strat, randomGenerator = randomGenerator)
320        pick=Orange.core.MakeRandomIndices2(stratified = strat, randomGenerator = randomGenerator)
321    else:
322        cv=Orange.core.RandomIndicesCV(folds = folds, stratified = strat, randomGenerator = randomGenerator)
323        pick=Orange.core.RandomIndices2(stratified = strat, randomGenerator = randomGenerator)
324    return apply(learning_curve, (learners, examples, cv, pick, proportions, pps), argkw)
325
326
327def learning_curve(learners, examples, cv=None, pick=None, proportions=Orange.core.frange(0.1), pps=[], **argkw):
328    """
329    Computes learning curves using a procedure recommended by Salzberg
330    (1997). It first prepares data subsets (folds). For each proportion,
331    it performs the cross-validation, but taking only a proportion of
332    examples for learning.
333
334    Arguments ``cv`` and ``pick`` give the methods for preparing
335    indices for cross-validation and random selection of learning
336    examples. If they are not given, :obj:`orange.MakeRandomIndicesCV` and
337    :obj:`orange.MakeRandomIndices2` are used, both will be stratified and the
338    cross-validation will be 10-fold. Proportions is a list of proportions
339    of learning examples.
340
341    The function can save time by loading experimental existing data for
342    any test that were already conducted and saved. Also, the computed
343    results are stored for later use. You can enable this by adding
344    a keyword argument ``cache=1``. Another keyword deals with progress
345    report. If you add ``verbose=1``, the function will print the proportion
346    and the fold number.
347
348    """
349    verb = argkw.get("verbose", 0)
350    cache = argkw.get("cache", 0)
351    callback = argkw.get("callback", 0)
352
353    for pp in pps:
354        if pp[0]!="L":
355            raise SystemError("cannot preprocess testing examples")
356
357    if not cv or not pick:   
358        seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1)
359        if seed:
360            randomGenerator = Orange.core.RandomGenerator(seed)
361        else:
362            randomGenerator = argkw.get("randomGenerator", Orange.core.RandomGenerator())
363        if not cv:
364            cv = Orange.core.MakeRandomIndicesCV(folds=10, stratified=Orange.core.MakeRandomIndices.StratifiedIfPossible, randomGenerator = randomGenerator)
365        if not pick:
366            pick = Orange.core.MakeRandomIndices2(stratified=Orange.core.MakeRandomIndices.StratifiedIfPossible, randomGenerator = randomGenerator)
367
368    examples, weight = demangle_examples(examples)
369    folds = cv(examples)
370    ccsum = hex(examples.checksum())[2:]
371    ppsp = encode_PP(pps)
372    nLrn = len(learners)
373
374    allResults=[]
375    for p in proportions:
376        printVerbose("Proportion: %5.3f" % p, verb)
377
378        if (cv.randseed<0) or (pick.randseed<0):
379            cache = 0
380        else:
381            fnstr = "{learning_curve}_%s_%s_%s_%s%s-%s" % ("%s", p, cv.randseed, pick.randseed, ppsp, ccsum)
382            if "*" in fnstr:
383                cache = 0
384
385        conv = examples.domain.classVar.varType == Orange.data.Type.Discrete and int or float
386        testResults = ExperimentResults(cv.folds, [l.name for l in learners], examples.domain.classVar.values.native(), weight!=0, examples.domain.classVar.baseValue)
387        testResults.results = [TestedExample(folds[i], conv(examples[i].getclass()), nLrn, examples[i].getweight(weight))
388                               for i in range(len(examples))]
389
390        if cache and testResults.load_from_files(learners, fnstr):
391            printVerbose("  loaded from cache", verb)
392        else:
393            for fold in range(cv.folds):
394                printVerbose("  fold %d" % fold, verb)
395               
396                # learning
397                learnset = examples.selectref(folds, fold, negate=1)
398                learnset = learnset.selectref(pick(learnset, p0=p), 0)
399                if not len(learnset):
400                    continue
401               
402                for pp in pps:
403                    learnset = pp[1](learnset)
404
405                classifiers = [None]*nLrn
406                for i in range(nLrn):
407                    if not cache or not testResults.loaded[i]:
408                        classifiers[i] = learners[i](learnset, weight)
409
410                # testing
411                for i in range(len(examples)):
412                    if (folds[i]==fold):
413                        # This is to prevent cheating:
414                        ex = Orange.data.Instance(examples[i])
415                        ex.setclass("?")
416                        for cl in range(nLrn):
417                            if not cache or not testResults.loaded[cl]:
418                                cls, pro = classifiers[cl](ex, Orange.core.GetBoth)
419                                testResults.results[i].set_result(cl, cls, pro)
420                if callback: callback()
421            if cache:
422                testResults.save_to_files(learners, fnstr)
423
424        allResults.append(testResults)
425       
426    return allResults
427
428
429def learning_curve_with_test_data(learners, learnset, testset, times=10,
430                              proportions=Orange.core.frange(0.1),
431                              strat=Orange.core.MakeRandomIndices.StratifiedIfPossible, pps=[], **argkw):
432    """
433    This function is suitable for computing a learning curve on datasets,
434    where learning and testing examples are split in advance. For each
435    proportion of learning examples, it randomly select the requested
436    number of learning examples, builds the models and tests them on the
437    entire testset. The whole test is repeated for the given number of
438    times for each proportion. The result is a list of :obj:`ExperimentResults`,
439    one for each proportion.
440
441    In the following scripts, examples are pre-divided onto training
442    and testing set. Learning curves are computed in which 20, 40, 60,
443    80 and 100 percents of the examples in the former set are used for
444    learning and the latter set is used for testing. Random selection
445    of the given proportion of learning set is repeated for five times.
446
447    .. literalinclude:: code/testing-test.py
448        :start-after: Learning curve with pre-separated data
449        :end-before: # End
450
451
452    """
453    verb = argkw.get("verbose", 0)
454
455    learnset, learnweight = demangle_examples(learnset)
456    testweight = demangle_examples(testset)[1]
457   
458    randomGenerator = argkw.get("indicesrandseed", 0) or argkw.get("randseed", 0) or argkw.get("randomGenerator", 0)
459    pick = Orange.core.MakeRandomIndices2(stratified = strat, randomGenerator = randomGenerator)
460    allResults=[]
461    for p in proportions:
462        printVerbose("Proportion: %5.3f" % p, verb)
463        testResults = ExperimentResults(times, [l.name for l in learners],
464                                        testset.domain.classVar.values.native(),
465                                        testweight!=0, testset.domain.classVar.baseValue)
466        testResults.results = []
467       
468        for t in range(times):
469            printVerbose("  repetition %d" % t, verb)
470            learn_and_test_on_test_data(learners, (learnset.selectref(pick(learnset, p), 0), learnweight),
471                                   testset, testResults, t)
472
473        allResults.append(testResults)
474       
475    return allResults
476
477def preprocess_data(learnset, testset, preprocessors):
478    """Apply preprocessors to learn and test dataset"""
479    for p_type, preprocessor in preprocessors:
480        if p_type == "B":
481            learnset = preprocessor(learnset)
482            testset = preprocessor(testset)
483    for p_type, preprocessor in preprocessors:
484        if p_type == "L":
485            learnset = preprocessor(learnset)
486        elif p_type == "T":
487            testset = preprocessor(testset)
488        elif p_type == "LT":
489            (learnset, testset) = preprocessor(learnset, testset)
490
491    return learnset, testset
492
493@deprecated_keywords({"storeExamples": "store_examples",
494                      "storeClassifiers": "store_classifiers=True",
495                      "pps":"preprocessors"})
496def test_with_indices(learners, examples, indices, preprocessors=(),
497                      callback=None, store_classifiers=False, store_examples=False, **kwargs):
498    """
499    Perform a cross-validation-like test. Examples for each fold are selected
500    based on given indices.
501
502    :param learners: list of learners to be tested
503    :param examples: data table on which the learners will be tested
504    :param indices: a list of integers that defines, which examples will be
505     used for testing in each fold. The number of indices should be equal to
506     the number of examples.
507    :param preprocessors: a list of preprocessors to be used on data.
508    :param callback: a function that will be called after each fold is computed.
509    :param store_classifiers: if True, classifiers will be accessible in test_results.
510    :param store_examples: if True, examples will be accessible in test_results.
511    """
512    examples, weight = demangle_examples(examples)
513    if not examples:
514        raise ValueError("Test data set with no examples")
515    if not examples.domain.classVar:
516        raise ValueError("Test data set without class attribute")
517    if "cache" in kwargs:
518        raise ValueError("This feature is no longer supported.")
519
520
521    niterations = max(indices)+1
522    test_result = ExperimentResults(niterations,
523                                    classifierNames = [getobjectname(l) for l in learners],
524                                    domain=examples.domain,
525                                    weights=weight)
526
527    test_result.results = [test_result.create_tested_example(indices[i], example)
528                           for i, example in enumerate(examples)]
529
530    if store_examples:
531        test_result.examples = examples
532
533    for fold in xrange(niterations):
534        results, classifiers = one_fold_with_indices(learners, examples, fold, indices, preprocessors, weight)
535
536        for example, learner, result in results:
537            test_result.results[example].set_result(learner, *result)
538
539        if store_classifiers:
540            test_result.classifiers.append(classifiers)
541        if callback:
542            callback()
543
544    return test_result
545
546def one_fold_with_indices(learners, examples, fold, indices, preprocessors=(), weight=0):
547    """Perform one fold of cross-validation like procedure using provided indices."""
548
549    results = []
550
551    learnset = examples.selectref(indices, fold, negate=1)
552    testset = examples.selectref(indices, fold, negate=0)
553    if len(learnset)==0 or len(testset)==0:
554        return (), ()
555
556    # learning
557    learnset, testset = preprocess_data(learnset, testset, preprocessors)
558    if not learnset:
559        raise SystemError("no training examples after preprocessing")
560    if not testset:
561        raise SystemError("no test examples after preprocessing")
562
563    classifiers = [learner(learnset, weight) for learner in learners]
564
565    # testing
566    test_idx = 0
567    for i in range(len(examples)):
568        if indices[i] != fold:
569            continue
570
571        # Remove class value from testing example to prevent cheating:
572        ex = Orange.data.Instance(testset[test_idx])
573        ex.setclass("?")
574        test_idx += 1
575
576        for c, classifier in enumerate(classifiers):
577            result = classifier(ex, Orange.core.GetBoth)
578            if result[0].is_special():
579                raise "Classifier %s returned unknown value" % (classifier.name or ("#%i" % c))
580            results.append((i, c, result))
581
582    return results, classifiers
583
584def learn_and_test_on_test_data(learners, learnset, testset, testResults=None, iterationNumber=0, pps=(), callback=None, **argkw):
585    """
586    Perform a test, where learners are learned on one dataset and tested
587    on another.
588
589    :param learners: list of learners to be tested
590    :param trainset: a dataset used for training
591    :param testset: a dataset used for testing
592    :param preprocessors: a list of preprocessors to be used on data.
593    :param callback: a function that is be called after each classifier is computed.
594    :param store_classifiers: if True, classifiers will be accessible in test_results.
595    :param store_examples: if True, examples will be accessible in test_results.
596    """
597    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get("storeClassifiers", 0)
598    storeExamples = argkw.get("storeExamples", 0)
599
600    learnset, learnweight = demangle_examples(learnset)
601    testset, testweight = demangle_examples(testset)
602    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get("storeClassifiers", 0)
603
604    learnset, testset = preprocess_data(learnset, testset, pps)
605           
606    classifiers = []
607    for learner in learners:
608        classifiers.append(learner(learnset, learnweight))
609        if callback:
610            callback()
611    for i in range(len(learners)):
612        classifiers[i].name = getattr(learners[i], 'name', 'noname')
613    testResults = test_on_data(classifiers, (testset, testweight), testResults, iterationNumber, storeExamples)
614    if storeclassifiers:
615        testResults.classifiers.append(classifiers)
616    return testResults
617
618
619def learn_and_test_on_learn_data(learners, learnset, testResults=None, iterationNumber=0, pps=[], callback=None, **argkw):
620    """
621    This function is similar to the above, except that it learns and
622    tests on the same data. If first preprocesses the data with ``"B"``
623    preprocessors on the whole data, and afterwards any ``"L"`` or ``"T"``
624    preprocessors on separate datasets. Then it induces the model from
625    the learning set and tests it on the testing set.
626
627    As with :obj:`learn_and_test_on_test_data`, you can pass an already initialized
628    :obj:`ExperimentResults` (argument ``results``) and an iteration number to the
629    function. In this case, results of the test will be appended with
630    the given iteration number.
631
632    """
633
634    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get("storeClassifiers", 0)
635    storeExamples = argkw.get("storeExamples", 0)
636
637    learnset, learnweight = demangle_examples(learnset)
638
639    hasLorT = 0   
640    for pp in pps:
641        if pp[0]=="B":
642            learnset = pp[1](learnset)
643        else:
644            hasLorT = 1
645
646    if hasLorT:
647        testset = Orange.data.Table(learnset)
648        for pp in pps:
649            if pp[0]=="L":
650                learnset = pp[1](learnset)
651            elif pp[0]=="T":
652                testset = pp[1](testset)
653            elif pp[0]=="LT":
654                learnset, testset = pp[1](learnset, testset)
655    else:
656        testset = learnset   
657
658    classifiers = []
659    for learner in learners:
660        classifiers.append(learner(learnset, learnweight))
661        if callback:
662            callback()
663    for i in range(len(learners)):
664        classifiers[i].name = getattr(learners[i], "name", "noname")
665    testResults = test_on_data(classifiers, (testset, learnweight), testResults, iterationNumber, storeExamples)
666    if storeclassifiers:
667        testResults.classifiers.append(classifiers)
668    return testResults
669
670
671def test_on_data(classifiers, testset, testResults=None, iterationNumber=0, storeExamples = False, **argkw):
672    """
673    This function gets a list of classifiers, not learners like the other
674    functions in this module. It classifies each testing example with
675    each classifier. You can pass an existing :obj:`ExperimentResults`
676    and iteration number, like in :obj:`learnAndTestWithTestData`
677    (which actually calls :obj:`testWithTestData`). If you don't, a new
678    :obj:`ExperimentResults` will be created.
679
680    """
681
682    testset, testweight = demangle_examples(testset)
683
684    if not testResults:
685        classVar = testset.domain.classVar
686        if testset.domain.classVar.varType == Orange.data.Type.Discrete:
687            values = classVar.values.native()
688            baseValue = classVar.baseValue
689        else:
690            values = None
691            baseValue = -1
692        testResults=ExperimentResults(1, [l.name for l in classifiers], values, testweight!=0, baseValue)
693
694    examples = getattr(testResults, "examples", False)
695    if examples and len(examples):
696        # We must not modify an example table we do not own, so we clone it the
697        # first time we have to add to it
698        if not getattr(testResults, "examplesCloned", False):
699            testResults.examples = Orange.data.Table(testResults.examples)
700            testResults.examplesCloned = True
701        testResults.examples.extend(testset)
702    else:
703        # We do not clone at the first iteration - cloning might never be needed at all...
704        testResults.examples = testset
705   
706    conv = testset.domain.classVar.varType == Orange.data.Type.Discrete and int or float
707    for ex in testset:
708        te = TestedExample(iterationNumber, conv(ex.getclass()), 0, ex.getweight(testweight))
709
710        for classifier in classifiers:
711            # This is to prevent cheating:
712            ex2 = Orange.data.Instance(ex)
713            ex2.setclass("?")
714            cr = classifier(ex2, Orange.core.GetBoth)
715            te.add_result(cr[0], cr[1])
716        testResults.results.append(te)
717       
718    return testResults
Note: See TracBrowser for help on using the repository browser.