Changeset 9999:3b8b4cc606c0 in orange


Ignore:
Timestamp:
02/07/12 19:04:58 (2 years ago)
Author:
anze <anze.staric@…>
Branch:
default
Children:
10000:d65550cf0356, 10003:1631c2f30f11
rebase_source:
395f47303359a995a9ab21929a90119d9a55de6f
Message:

Improved documentation.

Files:
3 edited

Legend:

Unmodified
Added
Removed
  • Orange/evaluation/scoring.py

    r9892 r9999  
    44 
    55import Orange 
    6 from Orange import statc 
     6from Orange import statc, corn 
    77from Orange.misc import deprecated_keywords 
    88 
     
    306306 
    307307@deprecated_keywords({"reportSE": "report_se"}) 
    308 def CA(res, report_se = False, **argkw): 
    309     """ Computes classification accuracy, i.e. percentage of matches between 
    310     predicted and actual class. The function returns a list of classification 
    311     accuracies of all classifiers tested. If reportSE is set to true, the list 
    312     will contain tuples with accuracies and standard errors. 
    313      
    314     If results are from multiple repetitions of experiments (like those 
    315     returned by Orange.evaluation.testing.crossValidation or 
    316     Orange.evaluation.testing.proportionTest) the 
    317     standard error (SE) is estimated from deviation of classification 
    318     accuracy accross folds (SD), as SE = SD/sqrt(N), where N is number 
    319     of repetitions (e.g. number of folds). 
    320      
    321     If results are from a single repetition, we assume independency of 
    322     instances and treat the classification accuracy as distributed according 
    323     to binomial distribution. This can be approximated by normal distribution, 
    324     so we report the SE of sqrt(CA*(1-CA)/N), where CA is classification 
    325     accuracy and N is number of test instances. 
    326      
    327     Instead of ExperimentResults, this function can be given a list of 
    328     confusion matrices (see below). Standard errors are in this case 
    329     estimated using the latter method. 
    330     """ 
    331     if res.number_of_iterations==1: 
    332         if type(res)==ConfusionMatrix: 
    333             div = nm.TP+nm.FN+nm.FP+nm.TN 
     308def CA(test_results, report_se = False, **argkw): 
     309    """Return percentage of matches between predicted and actual class. 
     310 
     311    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults` 
     312                         or :obj:`ConfusionMatrix`. 
     313    :param report_se: include standard error in result. 
     314    :rtype: list of scores, one for each learner. 
     315 
     316    Standard errors are estimated from deviation of CAs across folds (if 
     317    test_results were produced by cross_validation) or approximated under 
     318    the assumption of normal distribution otherwise. 
     319    """ 
     320    if isinstance(test_results, list) and len(test_results) > 0 \ 
     321                             and isinstance(test_results[0], ConfusionMatrix): 
     322        results = [] 
     323        for cm in test_results: 
     324            div = cm.TP+cm.FN+cm.FP+cm.TN 
    334325            check_non_zero(div) 
    335             ca = [(nm.TP+nm.TN)/div] 
    336         else: 
    337             CAs = [0.0]*res.number_of_learners 
    338             if argkw.get("unweighted", 0) or not res.weights: 
    339                 totweight = gettotsize(res) 
    340                 for tex in res.results: 
    341                     CAs = map(lambda res, cls: res+(cls==tex.actual_class), CAs, tex.classes) 
    342             else: 
    343                 totweight = 0. 
    344                 for tex in res.results: 
    345                     CAs = map(lambda res, cls: res+(cls==tex.actual_class and tex.weight), CAs, tex.classes) 
    346                     totweight += tex.weight 
    347             check_non_zero(totweight) 
    348             ca = [x/totweight for x in CAs] 
     326            results.append((cm.TP+cm.TN)/div) 
     327        return results 
     328    elif test_results.number_of_iterations==1: 
     329        CAs = [0.0]*test_results.number_of_learners 
     330        if argkw.get("unweighted", 0) or not test_results.weights: 
     331            totweight = gettotsize(test_results) 
     332            for tex in test_results.results: 
     333                CAs = map(lambda res, cls: res+(cls==tex.actual_class), CAs, tex.classes) 
     334        else: 
     335            totweight = 0. 
     336            for tex in test_results.results: 
     337                CAs = map(lambda res, cls: res+(cls==tex.actual_class and tex.weight), CAs, tex.classes) 
     338                totweight += tex.weight 
     339        check_non_zero(totweight) 
     340        ca = [x/totweight for x in CAs] 
    349341             
    350342        if report_se: 
     
    354346         
    355347    else: 
    356         CAsByFold = [[0.0]*res.number_of_iterations for i in range(res.number_of_learners)] 
    357         foldN = [0.0]*res.number_of_iterations 
    358  
    359         if argkw.get("unweighted", 0) or not res.weights: 
    360             for tex in res.results: 
    361                 for lrn in range(res.number_of_learners): 
     348        CAsByFold = [[0.0]*test_results.number_of_iterations for i in range(test_results.number_of_learners)] 
     349        foldN = [0.0]*test_results.number_of_iterations 
     350 
     351        if argkw.get("unweighted", 0) or not test_results.weights: 
     352            for tex in test_results.results: 
     353                for lrn in range(test_results.number_of_learners): 
    362354                    CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class) 
    363355                foldN[tex.iteration_number] += 1 
    364356        else: 
    365             for tex in res.results: 
    366                 for lrn in range(res.number_of_learners): 
     357            for tex in test_results.results: 
     358                for lrn in range(test_results.number_of_learners): 
    367359                    CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class) and tex.weight 
    368360                foldN[tex.iteration_number] += tex.weight 
     
    564556     
    565557class ConfusionMatrix: 
    566     """ Class ConfusionMatrix stores data about false and true 
    567     predictions compared to real class. It stores the number of 
    568     True Negatives, False Positive, False Negatives and True Positives. 
     558    """ 
     559    Classification result summary 
     560 
     561    .. attribute:: TP 
     562 
     563        True Positive predictions 
     564 
     565    .. attribute:: TN 
     566 
     567        True Negative predictions 
     568 
     569    .. attribute:: FP 
     570 
     571        False Positive predictions 
     572 
     573    .. attribute:: FN 
     574 
     575        False Negative predictions 
    569576    """ 
    570577    def __init__(self): 
    571578        self.TP = self.FN = self.FP = self.TN = 0.0 
    572579 
    573     def addTFPosNeg(self, predictedPositive, isPositive, weight = 1.0): 
    574         if predictedPositive: 
    575             if isPositive: 
     580    @deprecated_keywords({"predictedPositive": "predicted_positive", 
     581                          "isPositive": "is_positive"}) 
     582    def addTFPosNeg(self, predicted_positive, is_positive, weight = 1.0): 
     583        """ 
     584        Update confusion matrix with result of a single classification 
     585 
     586        :param predicted_positive: positive class value was predicted 
     587        :param is_positive: correct class value is positive 
     588        :param weight: weight of the selected instance 
     589         """ 
     590        if predicted_positive: 
     591            if is_positive: 
    576592                self.TP += weight 
    577593            else: 
    578594                self.FP += weight 
    579595        else: 
    580             if isPositive: 
     596            if is_positive: 
    581597                self.FN += weight 
    582598            else: 
    583599                self.TN += weight 
    584600 
    585  
    586 @deprecated_keywords({"classIndex": "class_index"}) 
    587 def confusion_matrices(res, class_index=-1, **argkw): 
    588     """ This function can compute two different forms of confusion matrix: 
    589     one in which a certain class is marked as positive and the other(s) 
    590     negative, and another in which no class is singled out. The way to 
    591     specify what we want is somewhat confusing due to backward 
    592     compatibility issues. 
    593     """ 
    594     tfpns = [ConfusionMatrix() for i in range(res.number_of_learners)] 
     601@deprecated_keywords({"res": "test_results", 
     602                      "classIndex": "class_index"}) 
     603def confusion_matrices(test_results, class_index=1, 
     604                       unweighted=False, cutoff=.5): 
     605    """ 
     606    Return confusion matrices for test_results. 
     607 
     608    :param test_results: test results 
     609    :param class_index: index of class value for which the confusion matrices 
     610                        are to be computed. 
     611    :param unweighted: ignore instance weights. 
     612    :params cutoff: cutoff for probability 
     613 
     614    :rtype: list of :obj:`ConfusionMatrix` 
     615    """ 
     616    tfpns = [ConfusionMatrix() for i in range(test_results.number_of_learners)] 
    595617     
    596618    if class_index<0: 
    597         numberOfClasses = len(res.class_values) 
     619        numberOfClasses = len(test_results.class_values) 
    598620        if class_index < -1 or numberOfClasses > 2: 
    599             cm = [[[0.0] * numberOfClasses for i in range(numberOfClasses)] for l in range(res.number_of_learners)] 
    600             if argkw.get("unweighted", 0) or not res.weights: 
    601                 for tex in res.results: 
     621            cm = [[[0.0] * numberOfClasses for i in range(numberOfClasses)] for l in range(test_results.number_of_learners)] 
     622            if unweighted or not test_results.weights: 
     623                for tex in test_results.results: 
    602624                    trueClass = int(tex.actual_class) 
    603625                    for li, pred in enumerate(tex.classes): 
     
    606628                            cm[li][trueClass][predClass] += 1 
    607629            else: 
    608                 for tex in enumerate(res.results): 
     630                for tex in enumerate(test_results.results): 
    609631                    trueClass = int(tex.actual_class) 
    610632                    for li, pred in tex.classes: 
     
    614636            return cm 
    615637             
    616         elif res.baseClass>=0: 
    617             class_index = res.baseClass 
     638        elif test_results.baseClass>=0: 
     639            class_index = test_results.baseClass 
    618640        else: 
    619641            class_index = 1 
    620              
    621     cutoff = argkw.get("cutoff") 
    622     if cutoff: 
    623         if argkw.get("unweighted", 0) or not res.weights: 
    624             for lr in res.results: 
     642 
     643    if cutoff != .5: 
     644        if unweighted or not test_results.weights: 
     645            for lr in test_results.results: 
    625646                isPositive=(lr.actual_class==class_index) 
    626                 for i in range(res.number_of_learners): 
     647                for i in range(test_results.number_of_learners): 
    627648                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index]>cutoff, isPositive) 
    628649        else: 
    629             for lr in res.results: 
     650            for lr in test_results.results: 
    630651                isPositive=(lr.actual_class==class_index) 
    631                 for i in range(res.number_of_learners): 
     652                for i in range(test_results.number_of_learners): 
    632653                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index]>cutoff, isPositive, lr.weight) 
    633654    else: 
    634         if argkw.get("unweighted", 0) or not res.weights: 
    635             for lr in res.results: 
     655        if unweighted or not test_results.weights: 
     656            for lr in test_results.results: 
    636657                isPositive=(lr.actual_class==class_index) 
    637                 for i in range(res.number_of_learners): 
     658                for i in range(test_results.number_of_learners): 
    638659                    tfpns[i].addTFPosNeg(lr.classes[i]==class_index, isPositive) 
    639660        else: 
    640             for lr in res.results: 
     661            for lr in test_results.results: 
    641662                isPositive=(lr.actual_class==class_index) 
    642                 for i in range(res.number_of_learners): 
     663                for i in range(test_results.number_of_learners): 
    643664                    tfpns[i].addTFPosNeg(lr.classes[i]==class_index, isPositive, lr.weight) 
    644665    return tfpns 
     
    651672@deprecated_keywords({"confusionMatrix": "confusion_matrix"}) 
    652673def confusion_chi_square(confusion_matrix): 
     674    """ 
     675    Return chi square statistic of the confusion matrix 
     676    (higher value indicates that prediction is not by chance). 
     677    """ 
     678    if isinstance(confusion_matrix, ConfusionMatrix) or \ 
     679       not isinstance(confusion_matrix[1], list): 
     680        return _confusion_chi_square(confusion_matrix) 
     681    else: 
     682        return map(_confusion_chi_square, confusion_matrix) 
     683 
     684def _confusion_chi_square(confusion_matrix): 
     685    if isinstance(confusion_matrix, ConfusionMatrix): 
     686        c = confusion_matrix 
     687        confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]] 
    653688    dim = len(confusion_matrix) 
    654689    rowPriors = [sum(r) for r in confusion_matrix] 
    655     colPriors = [sum([r[i] for r in confusion_matrix]) for i in range(dim)] 
     690    colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)] 
    656691    total = sum(rowPriors) 
    657692    rowPriors = [r/total for r in rowPriors] 
     
    666701    df = (dim-1)**2 
    667702    return ss, df, statc.chisqprob(ss, df) 
    668          
    669      
    670 def sens(confm): 
    671     """Return sensitivity (recall rate) over the given confusion matrix.""" 
    672     if type(confm) == list: 
    673         return [sens(cm) for cm in confm] 
    674     else: 
    675         tot = confm.TP+confm.FN 
     703 
     704@deprecated_keywords({"confm": "confusion_matrix"}) 
     705def sens(confusion_matrix): 
     706    """ 
     707    Return `sensitivity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ 
     708    (proportion of actual positives which are correctly identified as such). 
     709    """ 
     710    if type(confusion_matrix) == list: 
     711        return [sens(cm) for cm in confusion_matrix] 
     712    else: 
     713        tot = confusion_matrix.TP+confusion_matrix.FN 
    676714        if tot < 1e-6: 
    677715            import warnings 
     
    679717            return -1 
    680718 
    681         return confm.TP/tot 
    682  
    683 def recall(confm): 
    684     """Return recall rate (sensitivity) over the given confusion matrix.""" 
    685     return sens(confm) 
    686  
    687  
    688 def spec(confm): 
    689     """Return specificity over the given confusion matrix.""" 
    690     if type(confm) == list: 
    691         return [spec(cm) for cm in confm] 
    692     else: 
    693         tot = confm.FP+confm.TN 
     719        return confusion_matrix.TP/tot 
     720 
     721 
     722@deprecated_keywords({"confm": "confusion_matrix"}) 
     723def recall(confusion_matrix): 
     724    """ 
     725    Return `recall <http://en.wikipedia.org/wiki/Precision_and_recall>`_ 
     726    (fraction of relevant instances that are retrieved). 
     727    """ 
     728    return sens(confusion_matrix) 
     729 
     730 
     731@deprecated_keywords({"confm": "confusion_matrix"}) 
     732def spec(confusion_matrix): 
     733    """ 
     734    Return `specificity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ 
     735    (proportion of negatives which are correctly identified). 
     736    """ 
     737    if type(confusion_matrix) == list: 
     738        return [spec(cm) for cm in confusion_matrix] 
     739    else: 
     740        tot = confusion_matrix.FP+confusion_matrix.TN 
    694741        if tot < 1e-6: 
    695742            import warnings 
    696743            warnings.warn("Can't compute specificity: one or both classes have no instances") 
    697744            return -1 
    698         return confm.TN/tot 
    699    
    700  
    701 def PPV(confm): 
    702     """Return positive predictive value (precision rate) over the given confusion matrix.""" 
    703     if type(confm) == list: 
    704         return [PPV(cm) for cm in confm] 
    705     else: 
    706         tot = confm.TP+confm.FP 
     745        return confusion_matrix.TN/tot 
     746 
     747 
     748@deprecated_keywords({"confm": "confusion_matrix"}) 
     749def PPV(confusion_matrix): 
     750    """ 
     751    Return `positive predictive value <http://en.wikipedia.org/wiki/Positive_predictive_value>`_ 
     752    (proportion of subjects with positive test results who are correctly diagnosed).""" 
     753    if type(confusion_matrix) == list: 
     754        return [PPV(cm) for cm in confusion_matrix] 
     755    else: 
     756        tot = confusion_matrix.TP+confusion_matrix.FP 
    707757        if tot < 1e-6: 
    708758            import warnings 
    709759            warnings.warn("Can't compute PPV: one or both classes have no instances") 
    710760            return -1 
    711         return confm.TP/tot 
    712  
    713  
    714 def precision(confm): 
    715     """Return precision rate (positive predictive value) over the given confusion matrix.""" 
    716     return PPV(confm) 
    717  
    718  
    719 def NPV(confm): 
    720     """Return negative predictive value over the given confusion matrix.""" 
    721     if type(confm) == list: 
    722         return [NPV(cm) for cm in confm] 
    723     else: 
    724         tot = confm.FN+confm.TN 
     761        return confusion_matrix.TP/tot 
     762 
     763 
     764@deprecated_keywords({"confm": "confusion_matrix"}) 
     765def precision(confusion_matrix): 
     766    """ 
     767    Return `precision <http://en.wikipedia.org/wiki/Precision_and_recall>`_ 
     768    (retrieved instances that are relevant). 
     769    """ 
     770    return PPV(confusion_matrix) 
     771 
     772@deprecated_keywords({"confm": "confusion_matrix"}) 
     773def NPV(confusion_matrix): 
     774    """Return `negative predictive value <http://en.wikipedia.org/wiki/Negative_predictive_value>`_ 
     775     (proportion of subjects with a negative test result who are correctly 
     776     diagnosed). 
     777     """ 
     778    if type(confusion_matrix) == list: 
     779        return [NPV(cm) for cm in confusion_matrix] 
     780    else: 
     781        tot = confusion_matrix.FN+confusion_matrix.TN 
    725782        if tot < 1e-6: 
    726783            import warnings 
    727784            warnings.warn("Can't compute NPV: one or both classes have no instances") 
    728785            return -1 
    729         return confm.TN/tot 
    730  
    731 def F1(confm): 
    732     """Return F1 score (harmonic mean of precision and recall) over the given confusion matrix.""" 
    733     if type(confm) == list: 
    734         return [F1(cm) for cm in confm] 
    735     else: 
    736         p = precision(confm) 
    737         r = recall(confm) 
     786        return confusion_matrix.TN/tot 
     787 
     788@deprecated_keywords({"confm": "confusion_matrix"}) 
     789def F1(confusion_matrix): 
     790    """Return `F1 score <http://en.wikipedia.org/wiki/F1_score>`_ 
     791    (harmonic mean of precision and recall).""" 
     792    if type(confusion_matrix) == list: 
     793        return [F1(cm) for cm in confusion_matrix] 
     794    else: 
     795        p = precision(confusion_matrix) 
     796        r = recall(confusion_matrix) 
    738797        if p + r > 0: 
    739798            return 2. * p * r / (p + r) 
     
    743802            return -1 
    744803 
    745 def Falpha(confm, alpha=1.0): 
     804 
     805@deprecated_keywords({"confm": "confusion_matrix"}) 
     806def Falpha(confusion_matrix, alpha=1.0): 
    746807    """Return the alpha-mean of precision and recall over the given confusion matrix.""" 
    747     if type(confm) == list: 
    748         return [Falpha(cm, alpha=alpha) for cm in confm] 
    749     else: 
    750         p = precision(confm) 
    751         r = recall(confm) 
     808    if type(confusion_matrix) == list: 
     809        return [Falpha(cm, alpha=alpha) for cm in confusion_matrix] 
     810    else: 
     811        p = precision(confusion_matrix) 
     812        r = recall(confusion_matrix) 
    752813        return (1. + alpha) * p * r / (alpha * p + r) 
    753      
    754 def MCC(confm): 
    755     ''' 
    756     Return Mattew correlation coefficient over the given confusion matrix. 
    757  
    758     MCC is calculated as follows: 
    759     MCC = (TP*TN - FP*FN) / sqrt( (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN) ) 
    760      
    761     [1] Matthews, B.W., Comparison of the predicted and observed secondary  
    762     structure of T4 phage lysozyme. Biochim. Biophys. Acta 1975, 405, 442-451 
    763  
    764     code by Boris Gorelik 
    765     ''' 
    766     if type(confm) == list: 
    767         return [MCC(cm) for cm in confm] 
    768     else: 
    769         truePositive = confm.TP 
    770         trueNegative = confm.TN 
    771         falsePositive = confm.FP 
    772         falseNegative = confm.FN  
     814 
     815 
     816@deprecated_keywords({"confm": "confusion_matrix"}) 
     817def MCC(confusion_matrix): 
     818    """ 
     819    Return `Matthew correlation coefficient <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>`_ 
     820    (correlation coefficient between the observed and predicted binary classifications) 
     821    """ 
     822    # code by Boris Gorelik 
     823    if type(confusion_matrix) == list: 
     824        return [MCC(cm) for cm in confusion_matrix] 
     825    else: 
     826        truePositive = confusion_matrix.TP 
     827        trueNegative = confusion_matrix.TN 
     828        falsePositive = confusion_matrix.FP 
     829        falseNegative = confusion_matrix.FN 
    773830           
    774831        try:    
     
    791848 
    792849@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"}) 
    793 def scotts_pi(confm, b_is_list_of_matrices=True): 
     850def scotts_pi(confusion_matrix, b_is_list_of_matrices=True): 
    794851   """Compute Scott's Pi for measuring inter-rater agreement for nominal data 
    795852 
     
    798855   raters. 
    799856 
    800    @param confm: confusion matrix, or list of confusion matrices. To obtain 
     857   @param confusion_matrix: confusion matrix, or list of confusion matrices. To obtain 
    801858                           non-binary confusion matrix, call 
    802859                           Orange.evaluation.scoring.compute_confusion_matrices and set the 
     
    811868   if b_is_list_of_matrices: 
    812869       try: 
    813            return [scotts_pi(cm, b_is_list_of_matrices=False) for cm in confm] 
     870           return [scotts_pi(cm, b_is_list_of_matrices=False) for cm in confusion_matrix] 
    814871       except TypeError: 
    815872           # Nevermind the parameter, maybe this is a "conventional" binary 
    816873           # confusion matrix and bIsListOfMatrices was specified by mistake 
    817            return scottsPiSingle(confm, bIsListOfMatrices=False) 
     874           return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False) 
    818875   else: 
    819        if isinstance(confm, ConfusionMatrix): 
    820            confm = numpy.array( [[confm.TP, confm.FN], 
    821                    [confm.FP, confm.TN]], dtype=float) 
     876       if isinstance(confusion_matrix, ConfusionMatrix): 
     877           confusion_matrix = numpy.array( [[confusion_matrix.TP, confusion_matrix.FN], 
     878                   [confusion_matrix.FP, confusion_matrix.TN]], dtype=float) 
    822879       else: 
    823            confm = numpy.array(confm, dtype=float) 
    824  
    825        marginalSumOfRows = numpy.sum(confm, axis=0) 
    826        marginalSumOfColumns = numpy.sum(confm, axis=1) 
     880           confusion_matrix = numpy.array(confusion_matrix, dtype=float) 
     881 
     882       marginalSumOfRows = numpy.sum(confusion_matrix, axis=0) 
     883       marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1) 
    827884       jointProportion = (marginalSumOfColumns + marginalSumOfRows)/ \ 
    828                            (2.0 * numpy.sum(confm, axis=None)) 
     885                           (2.0 * numpy.sum(confusion_matrix, axis=None)) 
    829886       # In the eq. above, 2.0 is what the Wikipedia page calls 
    830887       # the number of annotators. Here we have two annotators: 
     
    833890 
    834891       prExpected = numpy.sum(jointProportion ** 2, axis=None) 
    835        prActual = numpy.sum(numpy.diag(confm), axis=None)/numpy.sum(confm, axis=None) 
     892       prActual = numpy.sum(numpy.diag(confusion_matrix), axis=None)/numpy.sum(confusion_matrix, axis=None) 
    836893 
    837894       ret = (prActual - prExpected) / (1.0 - prExpected) 
     
    846903    tuples (aROC, standard error). 
    847904    """ 
    848     import corn 
    849905    useweights = res.weights and not argkw.get("unweighted", 0) 
    850906    problists, tots = corn.computeROCCumulative(res, class_index, useweights) 
     
    879935@deprecated_keywords({"classIndex": "class_index"}) 
    880936def compare_2_AUCs(res, lrn1, lrn2, class_index=-1, **argkw): 
    881     import corn 
    882937    return corn.compare2ROCs(res, lrn1, lrn2, class_index, res.weights and not argkw.get("unweighted")) 
    883938 
     
    890945    1-specificity and y is sensitivity. 
    891946    """ 
    892     import corn 
    893947    problists, tots = corn.computeROCCumulative(res, class_index) 
    894948 
     
    9461000                      "keepConcavities": "keep_concavities"}) 
    9471001def TC_compute_ROC(res, class_index=-1, keep_concavities=1): 
    948     import corn 
    9491002    problists, tots = corn.computeROCCumulative(res, class_index) 
    9501003 
     
    11711224@deprecated_keywords({"classIndex": "class_index"}) 
    11721225def compute_calibration_curve(res, class_index=-1): 
    1173     import corn 
    11741226    ## merge multiple iterations into one 
    11751227    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels) 
     
    12341286@deprecated_keywords({"classIndex": "class_index"}) 
    12351287def compute_lift_curve(res, class_index=-1): 
    1236     import corn 
    12371288    ## merge multiple iterations into one 
    12381289    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels) 
     
    12711322def compute_CDT(res, class_index=-1, **argkw): 
    12721323    """Obsolete, don't use""" 
    1273     import corn 
    12741324    if class_index<0: 
    12751325        if res.baseClass>=0: 
     
    13611411                      "divideByIfIte": "divide_by_if_ite"}) 
    13621412def AUC_ij(ite, class_index1, class_index2, use_weights = True, all_ite = None, divide_by_if_ite = 1.0): 
    1363     import corn 
    13641413    return AUC_x(corn.computeCDTPair, ite, all_ite, divide_by_if_ite, (class_index1, class_index2, use_weights)) 
    13651414 
     
    13691418                      "useWeights": "use_weights", 
    13701419                      "divideByIfIte": "divide_by_if_ite"}) 
    1371 def AUC_i(ite, class_index, use_weights = True, all_ite = None, divide_by_if_ite = 1.0): 
    1372     import corn 
     1420def AUC_i(ite, class_index, use_weights = True, all_ite = None, 
     1421          divide_by_if_ite = 1.0): 
    13731422    return AUC_x(corn.computeCDT, ite, all_ite, divide_by_if_ite, (class_index, use_weights)) 
    13741423 
  • docs/reference/rst/Orange.evaluation.scoring.rst

    r9904 r9999  
    77.. index: scoring 
    88 
    9 This module contains various measures of quality for classification and 
    10 regression. Most functions require an argument named :obj:`res`, an instance of 
    11 :class:`Orange.evaluation.testing.ExperimentResults` as computed by 
    12 functions from :mod:`Orange.evaluation.testing` and which contains 
    13 predictions obtained through cross-validation, 
    14 leave one-out, testing on training data or test set instances. 
     9Scoring plays and integral role in evaluation of any prediction model. Orange 
     10implements various scores for evaluation of classification, 
     11regression and multi-label models. Most of the methods needs to be called 
     12with an instance of :obj:`ExperimentResults`. 
     13 
     14.. literalinclude:: code/statExample0.py 
    1515 
    1616============== 
    1717Classification 
    1818============== 
     19 
     20Many scores for evaluation of classification models can be computed solely 
     21from the confusion matrix constructed manually with the 
     22:obj:`confusion_matrices` function. If class variable has more than two 
     23values, the index of the value to calculate the confusion matrix for should 
     24be passed as well. 
     25 
     26Calibration scores 
     27================== 
     28 
     29.. autofunction:: CA 
     30.. autofunction:: sens 
     31.. autofunction:: spec 
     32.. autofunction:: PPV 
     33.. autofunction:: NPV 
     34.. autofunction:: precision 
     35.. autofunction:: recall 
     36.. autofunction:: F1 
     37.. autofunction:: Falpha 
     38.. autofunction:: MCC 
     39.. autofunction:: AP 
     40.. autofunction:: IS 
     41.. autofunction:: 
     42 
     43Discriminatory scores 
     44===================== 
     45 
     46.. autofunction:: Brier_score 
     47 
     48.. autofunction:: AUC 
     49 
     50    .. attribute:: AUC.ByWeightedPairs (or 0) 
     51 
     52      Computes AUC for each pair of classes (ignoring instances of all other 
     53      classes) and averages the results, weighting them by the number of 
     54      pairs of instances from these two classes (e.g. by the product of 
     55      probabilities of the two classes). AUC computed in this way still 
     56      behaves as concordance index, e.g., gives the probability that two 
     57      randomly chosen instances from different classes will be correctly 
     58      recognized (this is of course true only if the classifier knows 
     59      from which two classes the instances came). 
     60 
     61   .. attribute:: AUC.ByPairs (or 1) 
     62 
     63      Similar as above, except that the average over class pairs is not 
     64      weighted. This AUC is, like the binary, independent of class 
     65      distributions, but it is not related to concordance index any more. 
     66 
     67   .. attribute:: AUC.WeightedOneAgainstAll (or 2) 
     68 
     69      For each class, it computes AUC for this class against all others (that 
     70      is, treating other classes as one class). The AUCs are then averaged by 
     71      the class probabilities. This is related to concordance index in which 
     72      we test the classifier's (average) capability for distinguishing the 
     73      instances from a specified class from those that come from other classes. 
     74      Unlike the binary AUC, the measure is not independent of class 
     75      distributions. 
     76 
     77   .. attribute:: AUC.OneAgainstAll (or 3) 
     78 
     79      As above, except that the average is not weighted. 
     80 
     81   In case of multiple folds (for instance if the data comes from cross 
     82   validation), the computation goes like this. When computing the partial 
     83   AUCs for individual pairs of classes or singled-out classes, AUC is 
     84   computed for each fold separately and then averaged (ignoring the number 
     85   of instances in each fold, it's just a simple average). However, if a 
     86   certain fold doesn't contain any instances of a certain class (from the 
     87   pair), the partial AUC is computed treating the results as if they came 
     88   from a single-fold. This is not really correct since the class 
     89   probabilities from different folds are not necessarily comparable, 
     90   yet this will most often occur in a leave-one-out experiments, 
     91   comparability shouldn't be a problem. 
     92 
     93   Computing and printing out the AUC's looks just like printing out 
     94   classification accuracies (except that we call AUC instead of 
     95   CA, of course):: 
     96 
     97       AUCs = Orange.evaluation.scoring.AUC(res) 
     98       for l in range(len(learners)): 
     99           print "%10s: %5.3f" % (learners[l].name, AUCs[l]) 
     100 
     101   For vehicle, you can run exactly this same code; it will compute AUCs 
     102   for all pairs of classes and return the average weighted by probabilities 
     103   of pairs. Or, you can specify the averaging method yourself, like this:: 
     104 
     105       AUCs = Orange.evaluation.scoring.AUC(resVeh, Orange.evaluation.scoring.AUC.WeightedOneAgainstAll) 
     106 
     107   The following snippet tries out all four. (We don't claim that this is 
     108   how the function needs to be used; it's better to stay with the default.):: 
     109 
     110       methods = ["by pairs, weighted", "by pairs", "one vs. all, weighted", "one vs. all"] 
     111       print " " *25 + "  \tbayes\ttree\tmajority" 
     112       for i in range(4): 
     113           AUCs = Orange.evaluation.scoring.AUC(resVeh, i) 
     114           print "%25s: \t%5.3f\t%5.3f\t%5.3f" % ((methods[i], ) + tuple(AUCs)) 
     115 
     116   As you can see from the output:: 
     117 
     118                                   bayes   tree    majority 
     119              by pairs, weighted:  0.789   0.871   0.500 
     120                        by pairs:  0.791   0.872   0.500 
     121           one vs. all, weighted:  0.783   0.800   0.500 
     122                     one vs. all:  0.783   0.800   0.500 
     123 
     124.. autofunction:: AUC_single 
     125 
     126.. autofunction:: AUC_pair 
     127 
     128.. autofunction:: AUC_matrix 
     129 
     130The remaining functions, which plot the curves and statistically compare 
     131them, require that the results come from a test with a single iteration, 
     132and they always compare one chosen class against all others. If you have 
     133cross validation results, you can either use split_by_iterations to split the 
     134results by folds, call the function for each fold separately and then sum 
     135the results up however you see fit, or you can set the ExperimentResults' 
     136attribute number_of_iterations to 1, to cheat the function - at your own 
     137responsibility for the statistical correctness. Regarding the multi-class 
     138problems, if you don't chose a specific class, Orange.evaluation.scoring will use the class 
     139attribute's baseValue at the time when results were computed. If baseValue 
     140was not given at that time, 1 (that is, the second class) is used as default. 
     141 
     142We shall use the following code to prepare suitable experimental results:: 
     143 
     144    ri2 = Orange.core.MakeRandomIndices2(voting, 0.6) 
     145    train = voting.selectref(ri2, 0) 
     146    test = voting.selectref(ri2, 1) 
     147    res1 = Orange.evaluation.testing.learnAndTestOnTestData(learners, train, test) 
     148 
     149 
     150.. autofunction:: AUCWilcoxon 
     151 
     152.. autofunction:: compute_ROC 
     153 
     154 
     155.. autofunction:: confusion_matrices 
     156 
     157.. autoclass:: ConfusionMatrix 
     158 
    19159 
    20160To prepare some data for examples on this page, we shall load the voting data 
     
    29169(:download:`statExamples.py <code/statExamples.py>`, uses :download:`voting.tab <code/voting.tab>` and :download:`vehicle.tab <code/vehicle.tab>`): 
    30170 
    31 .. literalinclude:: code/statExample0.py 
    32  
    33171If instances are weighted, weights are taken into account. This can be 
    34172disabled by giving :obj:`unweighted=1` as a keyword argument. Another way of 
     
    39177=========================== 
    40178 
    41 .. autofunction:: CA 
    42  
    43 .. autofunction:: AP 
    44  
    45 .. autofunction:: Brier_score 
    46  
    47 .. autofunction:: IS 
     179 
     180 
     181 
    48182 
    49183So, let's compute all this in part of 
     
    58192    bayes   0.903   0.902   0.175    0.759 
    59193    tree    0.846   0.845   0.286    0.641 
    60     majorty  0.614   0.526   0.474   -0.000 
     194    majority  0.614   0.526   0.474   -0.000 
    61195 
    62196Script :download:`statExamples.py <code/statExamples.py>` contains another example that also prints out 
     
    163297   instances. The classifier is obviously quite biased to vans. 
    164298 
    165    .. method:: sens(confm) 
    166    .. method:: spec(confm) 
    167    .. method:: PPV(confm) 
    168    .. method:: NPV(confm) 
    169    .. method:: precision(confm) 
    170    .. method:: recall(confm) 
    171    .. method:: F2(confm) 
    172    .. method:: Falpha(confm, alpha=2.0) 
    173    .. method:: MCC(conf) 
     299 
    174300 
    175301   With the confusion matrix defined in terms of positive and negative 
  • docs/reference/rst/code/statExample0.py

    r9372 r9999  
    1 import orange, orngTest, orngTree 
     1import Orange 
    22 
    3 learners = [orange.BayesLearner(name = "bayes"), 
    4             orngTree.TreeLearner(name="tree"), 
    5             orange.MajorityLearner(name="majrty")] 
     3learners = [Orange.classification.bayes.NaiveLearner(name="bayes"), 
     4            Orange.classification.tree.TreeLearner(name="tree"), 
     5            Orange.classification.majority.MajorityLearner(name="majority")] 
    66 
    7 voting = orange.ExampleTable("voting") 
    8 res = orngTest.crossValidation(learners, voting) 
     7voting = Orange.data.Table("voting") 
     8res = Orange.evaluation.testing.cross_validation(learners, voting) 
    99 
    10 vehicle = orange.ExampleTable("vehicle") 
    11 resVeh = orngTest.crossValidation(learners, vehicle) 
     10print "CA =", Orange.evaluation.scoring.CA(res) 
     11print "AUC = ", Orange.evaluation.scoring.AUC(res) 
Note: See TracChangeset for help on using the changeset viewer.