Changeset 10967:98c1806351a7 in orange


Ignore:
Timestamp:
08/21/12 14:02:59 (20 months ago)
Author:
mlevar
Branch:
default
Message:

Scoring additions, preparation for Clustering Trees and multi-target addon

Files:
5 edited

Legend:

Unmodified
Added
Removed
  • Orange/ensemble/forest.py

    r10851 r10967  
    66import copy 
    77from Orange.utils import deprecated_keywords 
     8from operator import add 
    89 
    910def _default_small_learner(attributes=None, rand=None, base=None): 
     
    2223 
    2324def _wrap_learner(base, rand, randorange): 
    24     if base == None or isinstance(base, Orange.classification.tree.SimpleTreeLearner): 
     25    if base == None or isinstance(base, Orange.classification.tree.SimpleTreeLearner) or isinstance(base, Orange.core.ClusteringTreeLearner): 
    2526        return _default_simple_learner(base, randorange) 
    2627    elif isinstance(base, Orange.classification.tree.TreeLearner): 
     
    5152_RandomForestSimpleTreeLearner = Orange.utils.deprecated_members({"weightID":"weight_id", "examples":"instances"})(_RandomForestSimpleTreeLearner) 
    5253 
    53  
    5454class _RandomForestTreeLearner(Orange.core.Learner): 
    5555    """ A learner which wraps an ordinary TreeLearner with 
     
    139139 
    140140    __new__ = Orange.utils._orange__new__(Orange.core.Learner) 
    141  
     141     
    142142    def __init__(self, trees=100, attributes=None,\ 
    143143                    name='Random Forest', rand=None, callback=None, base_learner=None, learner=None): 
     
    199199                    domain=instances.domain, class_var=instances.domain.class_var, \ 
    200200                    class_vars=instances.domain.class_vars) 
    201             
     201 
     202 
    202203RandomForestLearner = Orange.utils.deprecated_members({"examples":"instances"})(RandomForestLearner) 
    203204 
     
    249250              :class:`Orange.statistics.Distribution` or a tuple with both 
    250251        """ 
    251         from operator import add 
    252  
    253         instance = Orange.data.Instance(self.domain, instance) 
     252 
    254253        # get results to avoid multiple calls 
    255254        res_both = [c(instance, orange.GetBoth) for c in self.classifiers] 
  • Orange/evaluation/scoring.py

    r10935 r10967  
    26492649#    pass 
    26502650 
    2651  
    2652 def mt_average_score(res, score, weights=None): 
    2653     """ 
    2654     Compute individual scores for each target and return the (weighted) average. 
    2655  
    2656     One method can be used to compute scores for all targets or a list of 
    2657     scoring methods can be passed to use different methods for different 
    2658     targets. In the latter case, care has to be taken if the ranges of scoring 
    2659     methods differ. 
    2660     For example, when the first target is scored from -1 to 1 (1 best) and the 
    2661     second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both 
    2662     to a span of 1, and invert the second so that higher scores are better. 
    2663  
    2664     :param score: Single-target scoring method or a list of such methods 
    2665                   (one for each target). 
    2666     :param weights: List of real weights, one for each target, 
    2667                     for a weighted average. 
    2668  
    2669     """ 
    2670     if not len(res.results): 
    2671         raise ValueError, "Cannot compute the score: no examples." 
    2672     if res.number_of_learners < 1: 
    2673         return [] 
    2674     n_classes = len(res.results[0].actual_class) 
    2675     if weights is None: 
    2676         weights = [1.] * n_classes 
    2677     if not isinstance(score, Iterable): 
    2678         score = [score] * n_classes 
    2679     elif len(score) != n_classes: 
    2680         raise ValueError, "Number of scoring methods and targets do not match." 
    2681     # save original classes 
    2682     clsss = [te.classes for te in res.results] 
    2683     aclsss = [te.actual_class for te in res.results] 
    2684     # compute single target scores 
    2685     single_scores = [] 
    2686     for i in range(n_classes): 
    2687         for te, clss, aclss in zip(res.results, clsss, aclsss): 
    2688             te.classes = [cls[i] for cls in clss] 
    2689             te.actual_class = aclss[i] 
    2690         single_scores.append(score[i](res)) 
    2691     # restore original classes 
    2692     for te, clss, aclss in zip(res.results, clsss, aclsss): 
    2693         te.classes = clss 
    2694         te.actual_class = aclss 
    2695     return [sum(w * s for w, s in zip(weights, scores)) / sum(weights) 
    2696         for scores in zip(*single_scores)] 
    2697  
    2698 def mt_flattened_score(res, score): 
    2699     """ 
    2700     Flatten (concatenate into a single list) the predictions of multiple 
    2701     targets and compute a single-target score. 
    2702      
    2703     :param score: Single-target scoring method. 
    2704     """ 
    2705     res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations, 
    2706         res.classifier_names, class_values=res.class_values, 
    2707         weights=res.weights, classifiers=res.classifiers, loaded=res.loaded, 
    2708         test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels) 
    2709     for te in res.results: 
    2710         for i, ac in enumerate(te.actual_class): 
    2711             te2 = Orange.evaluation.testing.TestedExample( 
    2712                 iteration_number=te.iteration_number, actual_class=ac) 
    2713             for c, p in zip(te.classes, te.probabilities): 
    2714                 te2.add_result(c[i], p[i]) 
    2715             res2.results.append(te2) 
    2716     return score(res2) 
    2717  
    2718 def mt_global_accuracy(res): 
    2719     """ 
    2720     :math:`Acc = \\frac{1}{N}\\sum_{i=1}^{N}\\delta(\\mathbf{c_{i}'},\\mathbf{c_{i}}) \\newline` 
    2721      
    2722     :math:`\\delta (\\mathbf{c_{i}'},\\mathbf{c_{i}} )=\\left\\{\\begin{matrix}1:\\mathbf{c_{i}'}=\\mathbf{c_{i}}\\\\ 0: otherwise\\end{matrix}\\right.` 
     2651def logloss(res): 
     2652    """ 
     2653    Calculates LogLoss, n is the number of all test results and :math:`p_{i}` is the probability 
     2654     withw hich the classifier predicted the actual class. 
     2655     :math:`LogLoss = \\frac{1}{n}\\sum_{i = 1}^{n} -max(log(p_{i}), log \\frac{1}{n}) \\newline` 
    27232656    """ 
    27242657    results = [] 
    2725     for l in xrange(res.number_of_learners): 
    2726         n_results = len(res.results) 
    2727         n_correct = 0. 
    2728  
     2658    n_results = len(res.results) 
     2659    min_log = math.log(1.0/n_results) 
     2660    for l in xrange(res.number_of_learners):        
     2661        temp = 0.0 
    27292662        for r in res.results: 
    2730             if list(r.classes[l]) == r.actual_class: 
    2731                 n_correct+=1 
    2732  
    2733         results.append(n_correct/n_results) 
     2663            if not r.probabilities[l]: 
     2664                raise ValueError, "Probabilities are needed to compute logloss" 
     2665            temp-=max(math.log(max(r.probabilities[l][int(r.actual_class)],1e-20)),min_log) 
     2666 
     2667        results.append(temp/n_results) 
    27342668    return results 
    27352669 
    27362670 
    2737 def mt_mean_accuracy(res): 
    2738     """ 
    2739     :math:`\\overline{Acc_{d}} = \\frac{1}{d}\\sum_{j=1}^{d}Acc_{j} = \\frac{1}{d}\\sum_{j=1}^{d} \\frac{1}{N}\\sum_{i=1}^{N}\\delta(c_{ij}',c_{ij} ) \\newline` 
    2740      
    2741     :math:`\\delta (c_{ij}',c_{ij} )=\\left\\{\\begin{matrix}1:c_{ij}'=c_{ij}\\\\ 0: otherwise\\end{matrix}\\right.` 
    2742     """ 
     2671def mlc_F1_micro(res): 
     2672    """ 
     2673    F1_{micro} = 2 * \frac{\overline{precision}  * \overline{recall}}{\overline{precision} + \overline{recall}} 
     2674    """ 
     2675 
     2676    precision = mlc_precision(res) 
     2677    recall = mlc_recall(res) 
     2678    return [2 * p * r / (p + r) for p,r in zip(precision, recall)] 
     2679 
     2680 
     2681def mlc_F1_macro(res): 
     2682    """ 
     2683    F1_{macro} = \frac{1}{d}\sum_{j=0}^{d} 2 * \frac{precision_j * recall_j}{precision_j + recall_j} 
     2684    """ 
     2685 
    27432686    results = [] 
    2744     for l in xrange(res.number_of_learners): 
    2745         n_classes = len(res.results[0].actual_class) 
    2746         n_results = len(res.results) 
    2747         n_correct = 0. 
    2748  
     2687    n_results = gettotsize(res) 
     2688    n_classes =  len(res.results[0].actual_class) 
     2689 
     2690    for l in xrange(res.number_of_learners):  
     2691        true_positive = [0.0] * n_classes 
     2692        sum_fptp = [0.0] * n_classes 
     2693        sum_fntp = [0.0] * n_classes 
    27492694        for r in res.results: 
    2750             for i in xrange(n_classes): 
    2751                 if r.classes[l][i] == r.actual_class[i]: 
    2752                     n_correct+=1 
    2753         results.append(n_correct/n_classes/n_results) 
     2695            aclass = r.actual_class 
     2696            for i, cls_val in enumerate(r.classes[l]): 
     2697                if aclass[i] and cls_val: 
     2698                    true_positive[i] += 1 
     2699                if cls_val: 
     2700                    sum_fptp[i] += 1 
     2701                if aclass[i]: 
     2702                    sum_fntp[i] += 1 
     2703 
     2704        results.append(sum([ 2*(tp/fptp * tp/fntp)/(tp/fptp + tp/fntp) for tp, fptp, fntp in \ 
     2705            zip(true_positive, sum_fptp, sum_fntp)] ) / n_classes) 
    27542706    return results 
    27552707 
  • Orange/evaluation/testing.py

    r10898 r10967  
    187187                    self.converter = float 
    188188            elif test_type in (TEST_TYPE_MLC, TEST_TYPE_MULTITARGET): 
     189                self.class_values = [list(cv.values) if cv.var_type == cv.Discrete else None for cv in domain.class_vars] 
    189190                self.labels = [var.name for var in domain.class_vars] 
    190191                self.converter = mt_vals 
  • Orange/testing/unit/tests/test_display_name_mapping.py

    r10385 r10967  
    77 
    88class TestNameMapping(unittest.TestCase): 
     9 
     10    exempt = ["Orange.multitarget.tree", 
     11        ] 
     12 
    913    def test_qualified_names(self): 
    1014        """ Test that qualified names of core C++ objects  
     
    1418        for cls in orange.__dict__.values(): 
    1519            if type(cls) == type: 
     20                if cls.__module__ in exempt: 
     21                    pass 
    1622                try: 
    1723                    cls2 = eval(cls.__module__ + "." + cls.__name__) 
  • docs/reference/rst/Orange.evaluation.scoring.rst

    r10784 r10967  
    125125.. autofunction:: split_by_iterations 
    126126 
    127  
    128 .. _mt-scoring: 
    129  
    130 ============ 
    131 Multi-target 
    132 ============ 
    133  
    134 :doc:`Multi-target <Orange.multitarget>` classifiers predict values for 
    135 multiple target classes. They can be used with standard 
    136 :obj:`~Orange.evaluation.testing` procedures (e.g. 
    137 :obj:`~Orange.evaluation.testing.Evaluation.cross_validation`), but require 
    138 special scoring functions to compute a single score from the obtained 
    139 :obj:`~Orange.evaluation.testing.ExperimentResults`. 
    140 Since different targets can vary in importance depending on the experiment, 
    141 some methods have options to indicate this e.g. through weights or customized 
    142 distance functions. These can also be used for normalization in case target 
    143 values do not have the same scales. 
    144  
    145 .. autofunction:: mt_flattened_score 
    146 .. autofunction:: mt_average_score 
    147  
    148 The whole procedure of evaluating multi-target methods and computing 
    149 the scores (RMSE errors) is shown in the following example 
    150 (:download:`mt-evaluate.py <code/mt-evaluate.py>`). Because we consider 
    151 the first target to be more important and the last not so much we will 
    152 indicate this using appropriate weights. 
    153  
    154 .. literalinclude:: code/mt-evaluate.py 
    155  
    156 Which outputs:: 
    157  
    158     Weighted RMSE scores: 
    159         Majority    0.8228 
    160           MTTree    0.3949 
    161              PLS    0.3021 
    162            Earth    0.2880 
    163  
    164 Two more accuracy measures based on the article by Zaragoza et al.(2011); applicable to discrete classes: 
    165  
    166 Global accuracy (accuracy per example) over d-dimensional class variable: 
    167  
    168 .. autofunction:: mt_global_accuracy 
    169  
    170 Mean accuracy (accuracy per class or per label) over d class variables:  
    171  
    172 .. autofunction:: mt_mean_accuracy    
    173  
    174 References 
    175 ========== 
    176  
    177 Zaragoza, J.H., Sucar, L.E., Morales, E.F.,Bielza, C., Larranaga, P.  (2011). 'Bayesian Chain Classifiers for Multidimensional Classification', Proc. of the International Joint Conference on Artificial Intelligence (IJCAI-2011),  pp:2192-2197. 
    178  
    179127========================== 
    180128Multi-label classification 
Note: See TracChangeset for help on using the changeset viewer.