Changeset 10426:ce57e8dbcc18 in orange


Ignore:
Timestamp:
03/02/12 15:55:49 (2 years ago)
Author:
anzeh <anze.staric@…>
Branch:
default
Message:

Refactored AUC.

Files:
3 edited

Legend:

Unmodified
Added
Removed
  • Orange/evaluation/scoring.py

    r10425 r10426  
    14721472    """ 
    14731473    Compute the area under ROC curve given a set of experimental results. 
    1474     For multivalued class problems, return the result of 
    1475     :obj:`by_weighted_pairs`. 
    14761474    If testing consisted of multiple folds, each fold is scored and the 
    14771475    average score is returned. If a fold contains only instances with the 
     
    14801478    :param test_results: test results to score 
    14811479    :param ignore_weights: ignore instance weights when calculating score 
    1482     :param method: DEPRECATED, call the appropriate method directly. 
    1483     """ 
     1480    :param multiclass: tells what kind of averaging to perform if the target 
     1481                       class has more than 2 values. 
     1482    """ 
     1483 
     1484    #!Compute AUC for each pair of classes (ignoring instances of all other 
     1485    #!classes) and average the results, weighting them by the number of 
     1486    #!pairs of instances from these two classes (e.g. by the product of 
     1487    #!probabilities of the two classes). AUC computed in this way still 
     1488    #!behaves as the concordance index, e.g., gives the probability that two 
     1489    #!randomly chosen instances from different classes will be correctly 
     1490    #!recognized (if the classifier knows from which two classes the 
     1491    #!instances came). 
     1492    ByWeightedPairs = 0 
     1493 
     1494    #!Similar to ByWeightedPairs, except that the average over class pairs 
     1495    #!is not weighted. This AUC is, like the binary version, independent of 
     1496    #!class distributions, but it is not related to the concordance index 
     1497    #!any more. 
     1498    ByPairs = 1 
     1499 
     1500    #!For each class, it computes AUC for this class against all others (that 
     1501    #!is, treating other classes as one class). The AUCs are then averaged by 
     1502    #!the class probabilities. This is related to the concordance index in 
     1503    #!which we test the classifier's (average) capability of distinguishing 
     1504    #!the instances from a specified class from those that come from other 
     1505    #!classes. 
     1506    #!Unlike the binary AUC, the measure is not independent of class 
     1507    #!distributions. 
     1508    WeightedOneAgainstAll = 2 
     1509 
     1510    #!Similar to weighted_one_against_all, except that the average 
     1511    #!is not weighted. 
     1512    OneAgainstAll = 3 
    14841513 
    14851514    @replace_use_weights 
    1486     def __init__(self, test_results=None, method=0, ignore_weights=False): 
     1515    @deprecated_keywords({"method": "multiclass"}) 
     1516    def __init__(self, test_results=None, multiclass=ByWeightedPairs, ignore_weights=False): 
    14871517 
    14881518        super(AUC, self).__init__() 
    14891519 
    14901520        self.ignore_weights=ignore_weights 
    1491         self.method=method 
     1521        self.method=multiclass 
    14921522 
    14931523        if test_results is not None: 
    1494             self.__call__(test_results) 
     1524            self[:] = self.__call__(test_results) 
    14951525 
    14961526    def __call__(self, test_results): 
     
    14981528            raise ValueError("Cannot compute AUC on a single-class problem") 
    14991529        elif len(test_results.class_values) == 2: 
    1500             self._compute_for_binary_class(test_results) 
    1501         else: 
    1502             self._compute_for_multi_value_class(test_results, self.method) 
    1503  
    1504     @classmethod 
    1505     def by_weighted_pairs(cls, res, ignore_weights=False): 
    1506         """ 
    1507         Compute AUC for each pair of classes (ignoring instances of all other 
    1508         classes) and average the results, weighting them by the number of 
    1509         pairs of instances from these two classes (e.g. by the product of 
    1510         probabilities of the two classes). AUC computed in this way still 
    1511         behaves as the concordance index, e.g., gives the probability that two 
    1512         randomly chosen instances from different classes will be correctly 
    1513         recognized (if the classifier knows from which two classes the 
    1514         instances came). 
    1515         """ 
    1516         auc = AUC(ignore_weights=ignore_weights) 
    1517         auc._compute_for_multi_value_class(res, method=cls.ByWeightedPairs) 
    1518         return auc 
    1519  
    1520     @classmethod 
    1521     def by_pairs(cls, res, ignore_weights=False): 
    1522         """ 
    1523         Similar to by_weighted_pairs, except that the average over class pairs 
    1524         is not weighted. This AUC is, like the binary version, independent of 
    1525         class distributions, but it is not related to the concordance index 
    1526         any more. 
    1527         """ 
    1528         auc = AUC(ignore_weights=ignore_weights) 
    1529         auc._compute_for_multi_value_class(res, method=cls.ByPairs) 
    1530         return auc 
    1531  
    1532     @classmethod 
    1533     def weighted_one_against_all(cls, res, ignore_weights=False): 
    1534         """ 
    1535         For each class, it computes AUC for this class against all others (that 
    1536         is, treating other classes as one class). The AUCs are then averaged by 
    1537         the class probabilities. This is related to the concordance index in 
    1538         which we test the classifier's (average) capability of distinguishing 
    1539         the instances from a specified class from those that come from other 
    1540         classes. 
    1541         Unlike the binary AUC, the measure is not independent of class 
    1542         distributions. 
    1543         """ 
    1544         auc = AUC(ignore_weights=ignore_weights) 
    1545         auc._compute_for_multi_value_class(res, 
    1546             method=cls.WeightedOneAgainstAll) 
    1547         return auc 
    1548  
    1549     @classmethod 
    1550     def one_against_all(cls, res, ignore_weights=False): 
    1551         """ 
    1552         Similar to weighted_one_against_all, except that the average 
    1553         is not weighted. 
    1554         """ 
    1555         auc = AUC(ignore_weights=ignore_weights) 
    1556         auc._compute_for_multi_value_class(res, method=cls.OneAgainstAll) 
    1557         return auc 
    1558  
    1559     @classmethod 
    1560     def single_class(cls, res, class_index=-1, ignore_weights=False): 
    1561         """ 
    1562         Compute AUC where the class with the given class_index is singled 
    1563         out and all other classes are treated as a single class. 
    1564         """ 
    1565         if class_index < 0: 
    1566             if res.base_class >= 0: 
    1567                 class_index = res.base_class 
    1568             else: 
    1569                 class_index = 1 
    1570  
    1571         auc = AUC(ignore_weights=ignore_weights) 
    1572         auc._compute_for_single_class(res, class_index) 
    1573         return auc 
    1574  
    1575     @classmethod 
    1576     def pair(cls, res, class_index1, class_index2, ignore_weights=False): 
    1577         """ 
    1578         Computes AUC between a pair of classes, ignoring instances from all 
    1579         other classes. 
    1580         """ 
    1581         auc = AUC(ignore_weights=ignore_weights) 
    1582         auc._compute_for_pair_of_classes(res, class_index1, class_index2) 
    1583         return auc 
    1584  
    1585     @classmethod 
    1586     def matrix(cls, res, ignore_weights=False): 
    1587         """ 
    1588         Compute a (lower diagonal) matrix with AUCs for all pairs of classes. 
    1589         If there are empty classes, the corresponding elements in the matrix 
    1590         are -1. 
    1591         """ 
    1592         auc = AUC(ignore_weights=ignore_weights) 
    1593         auc._compute_matrix(res) 
    1594         return auc 
     1530            return self._compute_for_binary_class(test_results) 
     1531        else: 
     1532            return self._compute_for_multi_value_class(test_results, self.method) 
    15951533 
    15961534    def _compute_for_binary_class(self, res): 
     
    16021540                (-1, res, res.number_of_iterations)) 
    16031541        else: 
    1604             auc, _ = self._compute_one_class_against_all(res, -1) 
    1605             self[:] = auc 
    1606             return self 
     1542            return self._compute_one_class_against_all(res, -1)[0] 
    16071543 
    16081544    def _compute_for_multi_value_class(self, res, method=0): 
     
    16581594            sum_aucs = [x/usefulClassPairs for x in sum_aucs] 
    16591595 
    1660         self[:] = sum_aucs 
    1661         return self 
     1596        return sum_aucs 
    16621597 
    16631598    # computes the average AUC over folds using "AUCcomputer" (AUC_i or AUC_ij) 
     
    16801615                return aucs 
    16811616            subsum_aucs = map(add, subsum_aucs, aucs) 
    1682         self[:] = subsum_aucs 
    1683         return self 
     1617        return subsum_aucs 
    16841618 
    16851619    # Computes AUC 
     
    16891623    def _compute_for_single_class(self, res, class_index): 
    16901624        if res.number_of_iterations > 1: 
    1691             self._compute_for_multiple_folds( 
     1625            return self._compute_for_multiple_folds( 
    16921626                self._compute_one_class_against_all, split_by_iterations(res), 
    16931627                (class_index, res, res.number_of_iterations)) 
    16941628        else: 
    1695             self._compute_one_class_against_all(res, class_index) 
     1629            return self._compute_one_class_against_all(res, class_index) 
    16961630 
    16971631    # Computes AUC for a pair of classes (as if there were no other classes) 
     
    17001634    def _compute_for_pair_of_classes(self, res, class_index1, class_index2): 
    17011635        if res.number_of_iterations > 1: 
    1702             self._compute_for_multiple_folds( 
     1636            return self._compute_for_multiple_folds( 
    17031637                self._compute_one_class_against_another, 
    17041638                split_by_iterations(res), 
    17051639                (class_index1, class_index2, res, res.number_of_iterations)) 
    17061640        else: 
    1707             self._compute_one_class_against_another(res, class_index1, 
     1641            return self._compute_one_class_against_another(res, class_index1, 
    17081642                                                    class_index2) 
    17091643 
     
    17571691        return False, False 
    17581692 
    1759     def _compute_matrix(self, res): 
    1760         numberOfClasses = len(res.class_values) 
    1761         number_of_learners = res.number_of_learners 
    1762         if res.number_of_iterations > 1: 
    1763             iterations, all_ite = split_by_iterations(res), res 
    1764         else: 
    1765             iterations, all_ite = [res], None 
     1693class AUC_for_single_class(AUC): 
     1694    """ 
     1695    Compute AUC where the class with the given class_index is singled 
     1696    out and all other classes are treated as a single class. 
     1697    """ 
     1698    def __init__(self, test_results=None, class_index=-1, ignore_weights=False): 
     1699        if class_index < 0: 
     1700            if test_results and test_results.base_class >= 0: 
     1701                self.class_index = test_results.base_class 
     1702            else: 
     1703                self.class_index = 1 
     1704        else: 
     1705            self.class_index = class_index 
     1706 
     1707        super(AUC_for_single_class, self).__init__(test_results, ignore_weights=ignore_weights) 
     1708 
     1709    def __call__(self, test_results): 
     1710        return self._compute_for_single_class(test_results, self.class_index) 
     1711 
     1712 
     1713class AUC_for_pair_of_classes(AUC): 
     1714    """ 
     1715    Computes AUC between a pair of classes, ignoring instances from all 
     1716    other classes. 
     1717    """ 
     1718    def __init__(self, test_results, class_index1, class_index2, ignore_weights=False): 
     1719        self.class_index1 = class_index1 
     1720        self.class_index2 = class_index2 
     1721 
     1722        super(AUC_for_pair_of_classes, self).__init__(test_results, ignore_weights=ignore_weights) 
     1723 
     1724    def __call__(self, test_results): 
     1725        return self._compute_for_pair_of_classes(test_results, self.class_index1, self.class_index2) 
     1726 
     1727 
     1728class AUC_matrix(AUC): 
     1729    """ 
     1730    Compute a (lower diagonal) matrix with AUCs for all pairs of classes. 
     1731    If there are empty classes, the corresponding elements in the matrix 
     1732    are -1. 
     1733    """ 
     1734 
     1735    def __call__(self, test_results): 
     1736        numberOfClasses = len(test_results.class_values) 
     1737        number_of_learners = test_results.number_of_learners 
     1738        if test_results.number_of_iterations > 1: 
     1739            iterations, all_ite = split_by_iterations(test_results), test_results 
     1740        else: 
     1741            iterations, all_ite = [test_results], None 
    17661742        aucs = [[[] for _ in range(numberOfClasses)] 
    1767                 for _ in range(number_of_learners)] 
     1743        for _ in range(number_of_learners)] 
    17681744        for classIndex1 in range(numberOfClasses): 
    17691745            for classIndex2 in range(classIndex1): 
     
    17711747                    self._compute_one_class_against_another, iterations, 
    17721748                    (classIndex1, classIndex2, all_ite, 
    1773                      res.number_of_iterations)) 
     1749                     test_results.number_of_iterations)) 
    17741750                if pair_aucs: 
    17751751                    for lrn in range(number_of_learners): 
     
    17781754                    for lrn in range(number_of_learners): 
    17791755                        aucs[lrn][classIndex1].append(-1) 
    1780         self[:] = aucs 
    17811756        return aucs 
    17821757 
    17831758#Backward compatibility 
    1784 AUC.ByWeightedPairs = 0 
    1785 AUC.ByPairs = 1 
    1786 AUC.WeightedOneAgainstAll = 2 
    1787 AUC.OneAgainstAll = 3 
    1788  
    17891759@replace_use_weights 
    17901760def AUC_binary(res, ignore_weights=False): 
     
    18171787def AUC_i(ite, class_index, ignore_weights=False, all_ite=None, 
    18181788          divide_by_if_ite=1.): 
    1819     auc = deprecated_function_name(AUC)() 
     1789    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights) 
    18201790    result = auc._compute_one_class_against_another(ite, class_index, 
    1821         all_ite=None, divide_by_if_ite=1.) 
     1791        all_ite=all_ite, divide_by_if_ite=divide_by_if_ite) 
    18221792    return result 
    18231793 
     
    18281798    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights) 
    18291799    result = auc._compute_one_class_against_another( 
    1830         ite, class_index1, class_index2, all_ite=None, divide_by_if_ite=1.) 
     1800        ite, class_index1, class_index2, all_ite=all_ite, divide_by_if_ite=divide_by_if_ite) 
    18311801    return result 
    1832  
    1833  
    1834  
    1835  
    1836 #AUC_binary = replace_use_weights(deprecated_function_name(AUC()._compute_for_binary_class)) 
    1837 #AUC_multi = replace_use_weights(deprecated_function_name(AUC._compute_for_multi_value_class)) 
    1838 #AUC_iterations = replace_use_weights(deprecated_function_name(AUC._compute_for_multiple_folds)) 
    1839 #AUC_x = replace_use_weights(deprecated_function_name(AUC._compute_auc)) 
    1840 #AUC_i = replace_use_weights(deprecated_function_name(AUC._compute_one_class_against_all)) 
    1841 #AUC_ij = replace_use_weights(deprecated_function_name(AUC._compute_one_class_against_another)) 
    18421802 
    18431803AUC_single = replace_use_weights( 
    18441804             deprecated_keywords({"classIndex": "class_index"})( 
    1845              deprecated_function_name(AUC.single_class))) 
     1805             deprecated_function_name(AUC_for_single_class))) 
    18461806AUC_pair = replace_use_weights( 
    18471807           deprecated_keywords({"classIndex1": "class_index1", 
    18481808                                "classIndex2": "class_index2"})( 
    1849            deprecated_function_name(AUC.pair))) 
    1850 AUC_matrix = replace_use_weights(deprecated_function_name(AUC.matrix)) 
     1809           deprecated_function_name(AUC_for_pair_of_classes))) 
     1810AUC_matrix = replace_use_weights(deprecated_function_name(AUC_matrix)) 
    18511811 
    18521812 
  • Orange/testing/unit/tests/test_evaluation_scoring.py

    r10425 r10426  
    4545        ds = data.Table("iris") 
    4646        test_results = testing.cross_validation([self.learner], ds, folds=5) 
    47         auc = scoring.AUC.by_pairs(test_results) 
     47        auc = scoring.AUC(test_results, multiclass=scoring.AUC.ByPairs) 
    4848 
    4949        self.assertEqual(len(auc), 1) 
     
    5252        ds = data.Table("iris") 
    5353        test_results = testing.cross_validation([self.learner], ds, folds=5) 
    54         auc = scoring.AUC.by_weighted_pairs(test_results) 
     54        auc = scoring.AUC(test_results, multiclass=scoring.AUC.ByWeightedPairs) 
    5555 
    5656        self.assertEqual(len(auc), 1) 
     
    5959        ds = data.Table("iris") 
    6060        test_results = testing.cross_validation([self.learner], ds, folds=5) 
    61         auc = scoring.AUC.one_against_all(test_results) 
     61        auc = scoring.AUC(test_results, multiclass=scoring.AUC.OneAgainstAll) 
    6262 
    6363        self.assertEqual(len(auc), 1) 
     
    6666        ds = data.Table("iris") 
    6767        test_results = testing.cross_validation([self.learner], ds, folds=5) 
    68         auc = scoring.AUC.weighted_one_against_all(test_results) 
     68        auc = scoring.AUC(test_results, multiclass=scoring.AUC.WeightedOneAgainstAll) 
    6969 
    7070        self.assertEqual(len(auc), 1) 
     
    7373        ds = data.Table("iris") 
    7474        test_results = testing.cross_validation([self.learner], ds, folds=5) 
    75         auc = scoring.AUC.single_class(test_results) 
    76         self.assertEqual(len(auc), 1) 
    77         auc = scoring.AUC.single_class(test_results, 0) 
    78         self.assertEqual(len(auc), 1) 
    79         auc = scoring.AUC.single_class(test_results, 1) 
    80         self.assertEqual(len(auc), 1) 
    81         auc = scoring.AUC.single_class(test_results, 2) 
     75        auc = scoring.AUC_for_single_class(test_results) 
     76        self.assertEqual(len(auc), 1) 
     77        auc = scoring.AUC_for_single_class(test_results, 0) 
     78        self.assertEqual(len(auc), 1) 
     79        auc = scoring.AUC_for_single_class(test_results, 1) 
     80        self.assertEqual(len(auc), 1) 
     81        auc = scoring.AUC_for_single_class(test_results, 2) 
    8282        self.assertEqual(len(auc), 1) 
    8383 
     
    8585        ds = data.Table("iris") 
    8686        test_results = testing.cross_validation([self.learner], ds, folds=5) 
    87         auc = scoring.AUC.pair(test_results, 0, 1) 
    88         self.assertEqual(len(auc), 1) 
    89         auc = scoring.AUC.pair(test_results, 0, 2) 
    90         self.assertEqual(len(auc), 1) 
    91         auc = scoring.AUC.pair(test_results, 1, 2) 
     87        auc = scoring.AUC_for_pair_of_classes(test_results, 0, 1) 
     88        self.assertEqual(len(auc), 1) 
     89        auc = scoring.AUC_for_pair_of_classes(test_results, 0, 2) 
     90        self.assertEqual(len(auc), 1) 
     91        auc = scoring.AUC_for_pair_of_classes(test_results, 1, 2) 
    9292        self.assertEqual(len(auc), 1) 
    9393 
     
    9595        ds = data.Table("iris") 
    9696        test_results = testing.cross_validation([self.learner], ds, folds=5) 
    97         auc = scoring.AUC.matrix(test_results) 
     97        auc = scoring.AUC_matrix(test_results) 
    9898        self.assertEqual(len(auc), 1) 
    9999        self.assertEqual(len(auc[0]), 3) 
     
    246246    def score(self): 
    247247        return scoring.MCC 
     248 
    248249if __name__ == '__main__': 
    249250    unittest.main() 
  • docs/reference/rst/Orange.evaluation.scoring.rst

    r10425 r10426  
    4848.. autofunction:: Brier_score 
    4949 
    50 .. autoclass:: AUC 
    51     :members: by_weighted_pairs, by_pairs, 
    52               weighted_one_against_all, one_against_all, single_class, pair, 
    53  
     50.. autofunction:: AUC 
     51.. autofunction:: AUC_for_single_class 
     52.. autofunction:: AUC_matrix 
    5453.. autofunction:: AUCWilcoxon 
    5554 
Note: See TracChangeset for help on using the changeset viewer.