Changeset 8248:5561ec566b23 in orange


Ignore:
Timestamp:
08/21/11 21:44:01 (3 years ago)
Author:
martin <martin@…>
Branch:
default
Convert:
dd3df28186353574c6db83766e4e05e43774328f
Message:

ABML related corrections.

Location:
source/orange
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • source/orange/rulelearner.cpp

    r6649 r8248  
    7474 
    7575TRule::TRule(const TRule &other, bool copyData) 
    76 : filter(other.filter? other.filter->deepCopy() : PFilter()), 
     76: filter(other.filter? other.filter->deepCopy() : PFilter()), //  
    7777  valuesFilter(other.valuesFilter? other.valuesFilter->deepCopy() : PFilter()), 
    7878  classifier(other.classifier), 
     
    469469    return 0.0; 
    470470 
    471   float ep = obs_dist.abs*P/(exp_dist.abs); 
    472   float lrs = 2 * (p*log(p/ep) + n*log(n/obs_dist.abs) + 
    473                    (P-p)*log((P-p)/(exp_dist.abs-obs_dist.abs)) + (N-n)*log((N-n)/(exp_dist.abs-obs_dist.abs)) - 
    474                    (P-p)*log(P/exp_dist.abs)-N*log(N/exp_dist.abs)); 
     471  float ep = (p+n)*P/(P+N); 
     472  float lrs = 2 * (p*log(p/ep) + n*log(n/(p+n))+(P-p)*log((P-p)/(P+N-p-n))+(N-n)*log((N-n)/(P+N-p-n))-(P-p)*log(P/(P+N))-N*log(N/(P+N))); 
    475473  if (storeRules) { 
     474    TFilter_values *filter; 
     475    filter = rule->filter.AS(TFilter_values); 
     476    int ncond = filter->conditions->size(); 
    476477    TRuleList &rlist = rules.getReference(); 
    477     rlist.push_back(rule); 
     478    if (!rlist.at(ncond) || rlist.at(ncond)->quality < lrs) 
     479        rlist.at(ncond) = rule; 
    478480  } 
    479481  return lrs; 
     
    496498} 
    497499 
     500// compute cumulative probability with extreme value distribution  
    498501double TEVDist::getProb(const float & chi) 
    499 { 
     502 
     503  // percentiles are not computed 
    500504  if (!percentiles || percentiles->size()==0 || percentiles->at(percentiles->size()-1)<chi) 
    501505    return 1.0-exp(-exp((double)(mu-chi)/beta)); 
    502   if (chi < percentiles->at(0)) 
    503     return 1.0; 
     506  // low chi 
     507  if (chi < percentiles->at(0)-1e-6) 
     508    return 1.0 - 0.05 * chi / percentiles->at(0); 
    504509  TFloatList::const_iterator pi(percentiles->begin()), pe(percentiles->end()); 
    505510  for (int i=0; (pi+1)!=pe; pi++,i++) { 
    506511    float a = *pi; 
    507512    float b = *(pi+1); 
    508     if (chi>=a && chi <=b) 
    509       return (maxPercentile-i*step)-step*(chi-a)/(b-a); 
     513    if (chi>=(a-1e-6) && chi <= (b+1e-6)) 
     514      if (b - a <= 0.0) 
     515        return maxPercentile-i*step; 
     516      else 
     517        return (maxPercentile-i*step)-step*(chi-a)/(b-a); 
    510518  } 
    511519  return 1.0; 
     
    551559} 
    552560 
    553 LNLNChiSq::LNLNChiSq(PEVDist evd, const float & chi) 
     561LNLNChiSq::LNLNChiSq(PEVDist evd, const float & chi, const float & priorProb) 
    554562: evd(evd), 
    555563  chi(chi) 
     
    560568    extremeAlpha = -1.0; 
    561569  // exponent used in FT cumulative function (min because it is negative) 
    562   exponent = min(float(log(log(1/evd->maxPercentile))),(evd->mu-chi)/evd->beta); 
     570  exponent = min(float(log(log(1/evd->maxPercentile))),(evd->mu-chi)/evd->beta); //(evd->mu-chi)/evd->beta; //min(float(log(log(1/evd->maxPercentile))),(evd->mu-chi)/evd->beta); 
     571  pp = priorProb; 
    563572} 
    564573 
    565574double LNLNChiSq::operator()(float chix) { 
     575  if (chix > 1400) 
     576    return -1000.0; 
     577 
     578  double chip; 
    566579  if (chix<=0.0) 
    567     return 100.0; 
    568   double chip = chisqprob((double)chix,1.0)/2; // in statc 
     580    chip = 0.5; 
     581  else 
     582    chip = chisqprob((double)chix,1.0)*0.5;//pp; // in statc 
     583 
     584  //chip = chisqprob((double)chix,1.0); // in statc 
     585/*  if (chi > 5.2 && chi < 5.3) 
     586  printf("chip = %f, chip-e = %f\n", chip, chip-extremeAlpha); */ 
     587 
    569588  if (extremeAlpha > 0.0) 
    570589    return chip-extremeAlpha; 
    571590 
    572591  if (chip<=0.0) 
    573     return -100.0; 
     592    return -1000.0; 
    574593 
    575594  if (chip < 1e-6) 
     
    617636 
    618637// Implementation of Brent's root finding method. 
    619 float brent(const float & minv, const float & maxv, const int & maxsteps, DiffFunc * func) 
     638float brent(const float & minv, const float & maxv, const int & maxsteps, DiffFunc * func, float threshold) 
    620639{ 
    621640  float a = minv; 
     
    624643  float fb = func->call(b); 
    625644 
    626   float threshold = 0.01 * (maxv - minv); 
    627  
     645 
     646 // float threshold = 0.01 * (maxv - minv); 
    628647  if (fb>0 && fa>0 && fb>fa || fb<0 && fa<0 && fb<fa) 
    629648    return a; 
     
    659678    fb = fe; 
    660679 
    661     if (counter>maxsteps) 
    662       return 0.0; 
    663     if ((b>0.1 && fb*func->call(b-0.1)<=0) || fb*func->call(b+0.1)<=0) 
     680/*    if (counter<maxsteps) 
     681        printf("a = %f, b=%f, fa=%f, fb=%f, maxv=%f, minv=%f\n",a,b,fa,fb,maxv,minv); */ 
     682    //  return 0.0; 
     683    if ((b>threshold && fb*func->call(b-threshold)<=0) || fb*func->call(b+threshold)<=0) 
     684    { 
    664685      return b; 
     686    } 
    665687    if (abs(a-b)<threshold && fa*fb<0) 
    666688      return (a+b)/2.; 
     
    754776      // correct lrs with evc 
    755777      if (!useClassicLRS) { 
    756         LNLNChiSq *diffFunc = new LNLNChiSq(evd,chi); 
    757             chi = brent(0.0,chi,100, diffFunc); 
     778        LNLNChiSq *diffFunc = new LNLNChiSq(evd,chi,aprioriProb); 
     779            chi = brent(0.0,chi,100, diffFunc, 0.1f); 
    758780        delete diffFunc; 
    759781      } 
     
    812834      // compute ePos 
    813835      LRInvE *diffFunc = new LRInvE(rule, rule->parentRule, targetClass, median); 
    814       ePos = brent(rule->parentRule->classDistribution->atint(targetClass)/rule->parentRule->classDistribution->abs*rule->classDistribution->atint(targetClass), rule->classDistribution->atint(targetClass), 100, diffFunc); 
     836      ePos = brent(rule->parentRule->classDistribution->atint(targetClass)/rule->parentRule->classDistribution->abs*rule->classDistribution->atint(targetClass), rule->classDistribution->atint(targetClass), 100, diffFunc, 0.1f); 
    815837      delete diffFunc; 
    816838 
     
    855877    return -10e+6; 
    856878  //printf("mu=%f, beta=%f\n",evd->mu,evd->beta); 
    857   if (evd->mu == 0.0) 
     879  if (evd->mu < 1.0001) 
    858880  { 
    859881    // return as if rule distribution is not optimistic 
    860882    rule->chi = getChi(rule->classDistribution->atint(targetClass), rule->classDistribution->abs - rule->classDistribution->atint(targetClass), 
    861883                apriori->atint(targetClass), apriori->abs - apriori->atint(targetClass)); 
    862       rule->estRF = rule->classDistribution->atint(targetClass)/rule->classDistribution->abs; 
     884    rule->estRF = rule->classDistribution->atint(targetClass)/rule->classDistribution->abs; 
    863885    rule->distP = rule->classDistribution->atint(targetClass); 
    864886    return (rule->classDistribution->atint(targetClass)+m*aprioriProb)/(rule->classDistribution->abs+m); 
     
    872894  float rule_acc = rule->classDistribution->atint(targetClass)/rule->classDistribution->abs; 
    873895 
    874   if ((evd->mu-chi)/evd->beta < -100) 
     896  if ((evd->mu-chi)/evd->beta < -500) 
    875897    ePos = rule->classDistribution->atint(targetClass); 
    876   else if (rule_acc < baseProb) 
     898  if (rule_acc < baseProb) 
    877899    ePos = rule->classDistribution->atint(targetClass); 
    878   else if (chi<=median) 
    879     ePos = rule->classDistribution->abs * baseProb; 
     900  else if (chi <= median+1e-6) 
     901      ePos = rule->classDistribution->abs * baseProb; 
    880902  else { 
    881903      // correct chi 
    882       LNLNChiSq *diffFunc = new LNLNChiSq(evd,chi); 
    883       rule->chi = brent(0.0,chi,100, diffFunc); // this is only the correction of one step chi 
     904      LNLNChiSq *diffFunc = new LNLNChiSq(evd,chi,aprioriProb); 
     905      rule->chi = brent(0.0,chi,100, diffFunc, 0.1f); // this is only the correction of one step chi 
    884906      delete diffFunc; 
    885907 
    886       //printf("rule chi = %4.3f\n",rule->chi); 
    887908      // compute expected number of positive examples relatively to base rule 
    888909      if (rule->chi > 0.0) 
     
    892913        float baseTarget = base->atint(targetClass); 
    893914        LRInv *diffFunc = new LRInv(rule->classDistribution->abs, baseTarget, base->abs, rule->chi); //-0.45); 
    894         ePos = brent(base->atint(targetClass)/base->abs*rule->classDistribution->abs, rule->classDistribution->atint(targetClass), 100, diffFunc); 
     915        ePos = brent(base->atint(targetClass)/base->abs*rule->classDistribution->abs, rule->classDistribution->atint(targetClass), 100, diffFunc, 0.1f); 
    895916        //printf("epos = %4.3f\n",ePos); 
    896917        delete diffFunc; 
     918        if (rule->classDistribution->abs == rule->classDistribution->atint(targetClass)) 
     919        { 
     920            if (rule->parentRule) 
     921            { 
     922                float parentOpt = rule->parentRule->estRF / (rule->parentRule->classDistribution->atint(targetClass) / rule->parentRule->classDistribution->abs); 
     923                float ruleOpt = ePos / rule->classDistribution->atint(targetClass); 
     924                if (ruleOpt < parentOpt) 
     925                    ePos = (ruleOpt + (parentOpt - ruleOpt) * 0.366) * rule->classDistribution->atint(targetClass); 
     926            } 
     927            else 
     928                ePos = (0.634 * ePos/rule->classDistribution->atint(targetClass) + 0.366) * rule->classDistribution->atint(targetClass); 
     929        } 
    897930      } 
    898931      else 
    899932        ePos = rule->classDistribution->abs * baseProb; 
    900   } 
    901  
     933 
     934  } 
     935 
     936  // 
    902937  rule->chi = getChi(ePos, rule->classDistribution->abs - ePos, 
    903938                apriori->atint(targetClass), apriori->abs - apriori->atint(targetClass)); 
    904939 
    905940  rule->estRF = ePos/rule->classDistribution->abs; 
     941 
    906942  float quality = (ePos + m*aprioriProb)/(rule->classDistribution->abs+m); 
    907   //printf("quality = %4.3f, rf = %4.3f\n",quality, rule->estRF); 
    908  // if (rule->classDistribution->atint(targetClass) >= 38 || rule->parentRule->classDistribution->atint(targetClass) >= 38) 
    909  //   printf("child: all %4.4f %4.4f %4.4f\n", rule->distP, quality, aprioriProb); 
     943 
    910944  if (quality > aprioriProb) 
    911945    return quality; 
    912946  if (rule_acc < aprioriProb) 
    913     return rule_acc; 
    914   return aprioriProb; //-0.01+0.01*rule_acc; 
    915 } 
    916  
     947    return rule_acc-0.01; 
     948  return aprioriProb-0.01+0.01*chi/median;  
     949} 
    917950 
    918951float TRuleEvaluator_mEVC::operator()(PRule rule, PExampleTable examples, const int & weightID, const int &targetClass, PDistribution apriori) 
     
    934967    quality = evaluateRuleEVC(rule,examples,weightID,targetClass,apriori,rLength,aprioriProb); 
    935968 
    936 /*  if (optimismReduction == 2) 
    937     printf("rule: %f, %f, %f\n",rule->classDistribution->atint(targetClass), rule->classDistribution->abs, quality); */ 
    938969  if (quality < 0.0) 
    939970    return quality; 
     
    952983  if (rule->classDistribution->atint(targetClass) == rule->classDistribution->abs) 
    953984    futureQuality = -1.0; 
     985  else if (improved >= min_improved && 
     986           improved/rule->classDistribution->atint(targetClass) > min_improved_perc*0.01 && 
     987           quality > (aprioriProb + 1e-3)) 
     988    futureQuality = quality; 
     989//    futureQuality = 1.0 + quality; 
    954990  else { 
    955991    PDistribution oldRuleDist = rule->classDistribution; 
     
    9841020        if ((*ei).getClass().intV != targetClass) 
    9851021          continue; 
    986       /*  if (quality >= (*ei)[probVar].floatV) { 
    987           futureQuality += 1.0; 
    988           continue; 
    989         } */ 
    9901022        if (bestQuality <= (*ei)[probVar].floatV) { 
    9911023          continue; 
     
    9951027          x *= (1.0-quality)/(bestQuality-quality); 
    9961028        x /= sqrt(quality*(1.0-quality)); // rule->classDistribution->abs* 
    997  //       if (max(1e-12,1.0-zprob(x)) > futureQuality) 
    998  //           futureQuality = max(1e-12,1.0-zprob(x)); 
    999  //           futureQuality += max(1e-12,1.0-zprob(x)); 
    10001029        futureQuality += log(1.0-max(1e-12,1.0-2*zprob(x))); 
    10011030      } 
    10021031      futureQuality = 1.0 - exp(futureQuality); 
    1003       //futureQuality /= rule->classDistribution->atint(targetClass); //apriori->abs; //rule->classDistribution->atint(targetClass);//rule->classDistribution->abs; 
    10041032    } 
    10051033  } 
     
    11971225 
    11981226    else if (((*vi)->varType == TValue::FLOATVAR)) { 
    1199       if (discretization) { 
     1227 
     1228        if (discretization) { 
    12001229        PVariable discretized; 
    12011230        try { 
     
    12581287    } 
    12591288  } 
     1289 
    12601290  if (!discretization) 
    12611291    discretization = PDiscretization(); 
     
    12981328    ruleFilter = mlnew TRuleBeamFilter_Width; 
    12991329 
     1330  // create an entropy evaluator to detect rules with pure distributions 
     1331  PRuleEvaluator entropy_evaluator = mlnew TRuleEvaluator_Entropy; 
     1332 
    13001333  checkProperty(initializer); 
    13011334  checkProperty(candidateSelector); 
     
    13031336  checkProperty(evaluator); 
    13041337  checkProperty(ruleFilter); 
    1305  
    13061338  PDistribution apriori = getClassDistribution(data, weightID); 
    13071339 
     
    13121344  PRuleList ruleList = initializer->call(data, weightID, targetClass, baseRules, evaluator, apriori, bestRule); 
    13131345 
    1314   { 
    13151346  PITERATE(TRuleList, ri, ruleList) { 
    13161347    if (!(*ri)->examples) 
     
    13181349    if ((*ri)->quality == ILLEGAL_FLOAT) 
    13191350      (*ri)->quality = evaluator->call(*ri, data, weightID, targetClass, apriori); 
    1320   } 
    13211351  } 
    13221352 
     
    13321362      PRuleList newRules = refiner->call(*ri, data, weightID, targetClass); 
    13331363      PITERATE(TRuleList, ni, newRules) { 
     1364        // evaluate rule 
    13341365        (*ni)->quality = evaluator->call(*ni, data, weightID, targetClass, apriori); 
    13351366        if ((*ni)->quality > bestRule->quality && (!validator || validator->call(*ni, data, weightID, targetClass, apriori))) 
    13361367          _selectBestRule(*ni, bestRule, wins, rgen); 
     1368        // if rule covers only one class or has less than 2 examples, do not add it for further specialization 
     1369        if (entropy_evaluator->call(*ni, data, weightID, targetClass, apriori) > -0.01 || 
     1370            (*ni)->classDistribution->abs < 2) 
     1371            continue; 
     1372        // is the rule a good candidate for further specialization? 
    13371373        if (!ruleStoppingValidator || ruleStoppingValidator->call(*ni, (*ri)->examples, weightID, targetClass, (*ri)->classDistribution)) { 
    13381374          ruleList->push_back(*ni); 
     
    20542090  for (int i=0; i<rules->size(); i++) { 
    20552091    // compute new coverage 
    2056     new_positive = 0.0; new_covered = 0.0; 
     2092    new_positive = 0.0; new_covered = 0.0; 
    20572093    for (int j=0; j<examples->numberOfExamples(); j++) 
    20582094        if (rules->at(i)->call(examples->at(j)) && !tempState->isExampleFixed[j]) 
     
    20622098            new_covered ++; 
    20632099        } 
    2064     if (new_covered < 1) 
    2065         continue; 
     2100    if (new_covered < 1) 
     2101        continue; 
    20662102 
    20672103/*  // rule should cover at least 50% of its own examples (so that conditions still make some sense) 
     
    20692105      continue; */ 
    20702106 
    2071     apriori_prob = apriori->atint(getClassIndex(rules->at(i))) / apriori->abs; 
     2107    apriori_prob = apriori->atint(getClassIndex(rules->at(i))) / apriori->abs; 
    20722108    new_rf_rule = new_positive / new_covered; 
    2073     if (new_rf_rule <= apriori_prob) 
    2074         continue; 
    2075  
    2076     // corrected quality of the rule (due to rules in the prefix set already) 
     2109    if (new_rf_rule <= apriori_prob) 
     2110        continue; 
     2111 
     2112    // corrected quality of the rule (due to rules in the prefix set already) 
    20772113    oldQuality = rules->at(i)->quality; 
    2078     rf_rule = rules->at(i)->classDistribution->atint(getClassIndex(rules->at(i))); 
    2079     rf_rule /= rules->at(i) -> classDistribution->abs; 
    2080     newQuality = oldQuality * (new_positive + 2*rf_rule) / (new_covered + 2) / rf_rule; 
     2114    rf_rule = rules->at(i)->classDistribution->atint(getClassIndex(rules->at(i))); 
     2115    rf_rule /= rules->at(i) -> classDistribution->abs; 
     2116    newQuality = oldQuality * (new_positive + 2*rf_rule) / (new_covered + 2) / rf_rule; 
    20812117 
    20822118    // check if improvement is good 
     
    20852121    rules->at(i)->quality = oldQuality; 
    20862122 
    2087     brier_improvement = (current_brier - currentState->getBrierScore()) / new_covered; 
    2088  
    2089     if (currentState->getAUC() >= current_auc &&  
    2090         brier_improvement > best_brier_improvement) 
    2091     { 
    2092         best_brier_improvement = brier_improvement; 
    2093         bestRuleI = i; 
    2094         bestNewQuality = newQuality; 
    2095     } 
     2123    brier_improvement = (current_brier - currentState->getBrierScore()) / new_covered; 
     2124 
     2125    if (currentState->getAUC() >= current_auc &&  
     2126        brier_improvement > best_brier_improvement) 
     2127    { 
     2128        best_brier_improvement = brier_improvement; 
     2129        bestRuleI = i; 
     2130        bestNewQuality = newQuality; 
     2131    } 
    20962132    tempState->copyTo(currentState); 
    20972133  } 
     
    21952231  if (minSignificance >= 0.5 || minSignificance <= 0.0) 
    21962232    PITERATE(TRuleList, ri, rules) 
    2197       wSatQ->push_back((*ri)->quality); 
     2233      //wSatQ->push_back((*ri)->quality); 
     2234      wSatQ->push_back(1); 
    21982235  else 
    21992236  { 
     
    22712308 
    22722309void TRuleClassifier_logit::updateRuleBetas(float step_) 
     2310{ 
     2311    PLogitClassifierState finalState; 
     2312    currentState->copyTo(finalState); 
     2313 
     2314    float step = 0.1f; 
     2315    float gamma = 0.01f; 
     2316    float error = 1e+20f; 
     2317    float old_error = 1e+21f; 
     2318 
     2319    while (old_error > error || step > 0.00001) 
     2320    { 
     2321        // reduce step if improvement failed 
     2322        if (old_error < error) 
     2323        { 
     2324            step /= 10; 
     2325            finalState->copyTo(currentState); 
     2326        } 
     2327        else 
     2328            currentState->copyTo(finalState); 
     2329 
     2330        old_error = error; 
     2331        // update betas 
     2332        for (int i=0; i<rules->size(); i++) { 
     2333            if (skipRule[i]) 
     2334                continue; 
     2335 
     2336            float der = 0.0; 
     2337            if (currentState->avgProb->at(i) < rules->at(i)->quality) 
     2338                der -= rules->at(i)->quality - currentState->avgProb->at(i); 
     2339            der += 2*gamma*currentState->betas[i]; 
     2340            currentState->newBeta(i,max(0.0f, currentState->betas[i]-step*der)); 
     2341        } 
     2342        // estimate new error 
     2343        error = 0; 
     2344        for (int i=0; i<rules->size(); i++) { 
     2345            if (currentState->avgProb->at(i) < rules->at(i)->quality) 
     2346                error += rules->at(i)->quality - currentState->avgProb->at(i); 
     2347            error += gamma*pow(currentState->betas[i],2); 
     2348        } 
     2349        //printf("error = %4.4f\n", error); 
     2350        // print betas 
     2351        //for (int i=0; i<rules->size(); i++) 
     2352        //    printf("%4.2f,",currentState->betas[i]); 
     2353        //printf("\n");  
     2354    } 
     2355    finalState->copyTo(currentState); 
     2356} 
     2357 
     2358/* 
     2359void TRuleClassifier_logit::updateRuleBetas2(float step_) 
    22732360{ 
    22742361 
     
    23422429  finalState->copyTo(currentState); 
    23432430} 
    2344  
     2431*/ 
    23452432 
    23462433void TRuleClassifier_logit::stabilizeAndEvaluate(float & step, int last_changed_rule_index) 
  • source/orange/rulelearner.hpp

    r6650 r8248  
    189189public: 
    190190  PEVDist evd; 
    191   float chi, exponent; 
     191  float chi, exponent, pp; 
    192192  double extremeAlpha; 
    193193 
    194   LNLNChiSq(PEVDist evd, const float & chi); 
     194  LNLNChiSq(PEVDist evd, const float & chi, const float & aprioriProb); 
    195195  double operator()(float chix); 
    196196}; 
Note: See TracChangeset for help on using the changeset viewer.