Changeset 3576:a03e6b4fb65f in orange


Ignore:
Timestamp:
04/23/07 21:34:22 (7 years ago)
Author:
martinm <martinm@…>
Branch:
default
Convert:
6642c038f2a7c1df4828828b78e4ec11fa128ab2
Message:

EVC rules

Location:
source
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • source/orange/callback.hpp

    r2698 r3576  
    148148public: 
    149149  __REGISTER_CLASS 
    150   virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution) const; 
     150  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution); 
     151}; 
     152 
     153class ORANGE_API TEVCDistGetter_Python : public TEVCDistGetter { 
     154public: 
     155  __REGISTER_CLASS 
     156  virtual PEVCDist operator()(const PRule, const int &) const; 
     157}; 
     158 
     159class ORANGE_API TChiFunction_Python : public TChiFunction { 
     160public: 
     161  __REGISTER_CLASS 
     162  virtual float operator()(PRule rule, PExampleTable data, const int & weightID, const int & targetClass, PDistribution apriori, float & nonOptimistic_Chi) const; 
    151163}; 
    152164 
  • source/orange/lib_learner.cpp

    r3434 r3576  
    11451145C_NAMED(RuleEvaluator_Entropy, RuleEvaluator, "()") 
    11461146C_NAMED(RuleEvaluator_Laplace, RuleEvaluator, "()") 
     1147C_NAMED(RuleEvaluator_LRS, RuleEvaluator, "()") 
     1148C_NAMED(RuleEvaluator_mEVC, RuleEvaluator, "()") 
     1149 
     1150C_NAMED(EVCDist, Orange, "()") 
     1151C_NAMED(ChiFunction_2LOGLR, ChiFunction, "()") 
     1152C_NAMED(EVCDistGetter_Standard, EVCDistGetter, "()") 
    11471153 
    11481154C_NAMED(RuleBeamFinder, RuleFinder, "([validator=, evaluator=, initializer=, refiner=, candidateSelector=, ruleFilter=])") 
     
    12401246  PyCATCH 
    12411247} 
     1248 
     1249PyObject *EVCDistGetter_new(PyTypeObject *type, PyObject *args, PyObject *keywords)  BASED_ON(Orange, "<abstract>") 
     1250{ if (type == (PyTypeObject *)&PyOrEVCDistGetter_Type) 
     1251    return setCallbackFunction(WrapNewOrange(mlnew TEVCDistGetter_Python(), type), args); 
     1252  else 
     1253    return WrapNewOrange(mlnew TEVCDistGetter_Python(), type); 
     1254} 
     1255 
     1256PyObject *EVCDistGetter__reduce__(PyObject *self) 
     1257{ 
     1258  return callbackReduce(self, PyOrEVCDistGetter_Type); 
     1259} 
     1260 
     1261 
     1262PyObject *EVCDistGetter_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(rule, length) -/-> (EVCdist)") 
     1263{ 
     1264  PyTRY 
     1265    NO_KEYWORDS 
     1266 
     1267    PRule rule; 
     1268    int rLength; 
     1269 
     1270    if (!PyArg_ParseTuple(args, "O&i:EVCDistGetter.call", cc_Rule, &rule, &rLength)) 
     1271      return PYNULL; 
     1272    CAST_TO(TEVCDistGetter, getter) 
     1273    PEVCDist dist = (*getter)(rule, rLength); 
     1274 
     1275    return WrapOrange(dist); 
     1276  PyCATCH 
     1277} 
     1278 
     1279PyObject *ChiFunction_new(PyTypeObject *type, PyObject *args, PyObject *keywords)  BASED_ON(Orange, "<abstract>") 
     1280{ if (type == (PyTypeObject *)&PyOrChiFunction_Type) 
     1281    return setCallbackFunction(WrapNewOrange(mlnew TChiFunction_Python(), type), args); 
     1282  else 
     1283    return WrapNewOrange(mlnew TChiFunction_Python(), type); 
     1284} 
     1285 
     1286PyObject *ChiFunction__reduce__(PyObject *self) 
     1287{ 
     1288  return callbackReduce(self, PyOrChiFunction_Type); 
     1289} 
     1290 
     1291PyObject *ChiFunction_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(rule, data, weight, targetClass, apriori) -/-> (nonOptimistic_Chi, optimistic_Chi)") 
     1292{ 
     1293  PyTRY 
     1294    NO_KEYWORDS 
     1295 
     1296    PRule rule; 
     1297    PExampleGenerator gen; 
     1298    int weightID = 0; 
     1299    int targetClass = -1; 
     1300    PDistribution apriori; 
     1301 
     1302    if (!PyArg_ParseTuple(args, "O&O&O&iO&:ChiFunction.call", cc_Rule, &rule, pt_ExampleGenerator, &gen, pt_weightByGen(gen), &weightID, &targetClass, cc_Distribution, &apriori)) 
     1303      return PYNULL; 
     1304    CAST_TO(TChiFunction, chif); 
     1305    float nonOptimistic_Chi = 0.0; 
     1306    float chi = (*chif)(rule, gen, weightID, targetClass, apriori, nonOptimistic_Chi); 
     1307 
     1308    return Py_BuildValue("ii", nonOptimistic_Chi, chi); 
     1309  PyCATCH 
     1310} 
     1311 
    12421312 
    12431313PyObject *RuleValidator_new(PyTypeObject *type, PyObject *args, PyObject *keywords)  BASED_ON(Orange, "<abstract>") 
     
    15721642} 
    15731643 
    1574 PyObject *RuleClassifier_logit_new(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(rules, examples[, weight])") 
     1644PyObject *RuleClassifier_logit_new(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(rules, min_beta, examples[, weight])") 
    15751645{  
    15761646  PyTRY 
     
    15861656    float minBeta = 0.0; 
    15871657    PRuleList rules; 
    1588  
    1589     if (!PyArg_ParseTuple(args, "O&fO&|O&:RuleClassifier.call", cc_RuleList, &rules, &minBeta, pt_ExampleGenerator, &gen, pt_weightByGen(gen), &weightID)) 
    1590       return PYNULL; 
    1591  
    1592     TRuleClassifier *rc = new TRuleClassifier_logit(rules, minBeta, gen, weightID); 
     1658    PDistributionList probList; 
     1659    PClassifier classifier; 
     1660 
     1661    if (!PyArg_ParseTuple(args, "O&fO&|O&O&O&:RuleClassifier.call", cc_RuleList, &rules, &minBeta, pt_ExampleGenerator, &gen, pt_weightByGen(gen), &weightID, cc_Classifier, &classifier, cc_DistributionList, &probList)) 
     1662      return PYNULL; 
     1663 
     1664    TRuleClassifier *rc = new TRuleClassifier_logit(rules, minBeta, gen, weightID, classifier, probList); 
    15931665    PRuleClassifier ruleClassifier = rc; 
    15941666//    ruleClassifier = new SELF_AS(TRuleClassifier)(rules, gen, weightID); 
     
    16111683    float minBeta = 0.0; 
    16121684    PRuleList rules; 
    1613  
    1614     if (!PyArg_ParseTuple(args, "O&fO&|O&:RuleClassifier.call", cc_RuleList, &rules, &minBeta, pt_ExampleGenerator, &gen, pt_weightByGen(gen), &weightID)) 
    1615       return PYNULL; 
    1616  
    1617     TRuleClassifier *rc = new TRuleClassifier_logit_bestRule(rules, minBeta, gen, weightID); 
     1685    PDistributionList probList; 
     1686    PClassifier classifier; 
     1687 
     1688    if (!PyArg_ParseTuple(args, "O&fO&|O&O&O&:RuleClassifier.call", cc_RuleList, &rules, &minBeta, pt_ExampleGenerator, &gen, pt_weightByGen(gen), &weightID, cc_Classifier, &classifier, cc_DistributionList, &probList)) 
     1689      return PYNULL; 
     1690 
     1691    TRuleClassifier *rc = new TRuleClassifier_logit_bestRule(rules, minBeta, gen, weightID, classifier, probList); 
    16181692    PRuleClassifier ruleClassifier = rc; 
    16191693//    ruleClassifier = new SELF_AS(TRuleClassifier)(rules, gen, weightID); 
     
    16491723PyObject *RuleList__reduce__(TPyOrange *self, PyObject *) { return ListOfWrappedMethods<PRuleList, TRuleList, PRule, &PyOrRule_Type>::_reduce(self); } 
    16501724 
     1725PEVCDistList PEVCDistList_FromArguments(PyObject *arg) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::P_FromArguments(arg); } 
     1726PyObject *EVCDistList_FromArguments(PyTypeObject *type, PyObject *arg) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_FromArguments(type, arg); } 
     1727PyObject *EVCDistList_new(PyTypeObject *type, PyObject *arg, PyObject *kwds) BASED_ON(Orange, "(<list of EVCDist>)") ALLOWS_EMPTY { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_new(type, arg, kwds); } 
     1728PyObject *EVCDistList_getitem_sq(TPyOrange *self, int index) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_getitem(self, index); } 
     1729int       EVCDistList_setitem_sq(TPyOrange *self, int index, PyObject *item) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_setitem(self, index, item); } 
     1730PyObject *EVCDistList_getslice(TPyOrange *self, int start, int stop) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_getslice(self, start, stop); } 
     1731int       EVCDistList_setslice(TPyOrange *self, int start, int stop, PyObject *item) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_setslice(self, start, stop, item); } 
     1732int       EVCDistList_len_sq(TPyOrange *self) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_len(self); } 
     1733PyObject *EVCDistList_richcmp(TPyOrange *self, PyObject *object, int op) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_richcmp(self, object, op); } 
     1734PyObject *EVCDistList_concat(TPyOrange *self, PyObject *obj) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_concat(self, obj); } 
     1735PyObject *EVCDistList_repeat(TPyOrange *self, int times) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_repeat(self, times); } 
     1736PyObject *EVCDistList_str(TPyOrange *self) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_str(self); } 
     1737PyObject *EVCDistList_repr(TPyOrange *self) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_str(self); } 
     1738int       EVCDistList_contains(TPyOrange *self, PyObject *obj) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_contains(self, obj); } 
     1739PyObject *EVCDistList_append(TPyOrange *self, PyObject *item) PYARGS(METH_O, "(EVCDist) -> None") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_append(self, item); } 
     1740PyObject *EVCDistList_extend(TPyOrange *self, PyObject *obj) PYARGS(METH_O, "(sequence) -> None") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_extend(self, obj); } 
     1741PyObject *EVCDistList_count(TPyOrange *self, PyObject *obj) PYARGS(METH_O, "(EVCDist) -> int") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_count(self, obj); } 
     1742PyObject *EVCDistList_filter(TPyOrange *self, PyObject *args) PYARGS(METH_VARARGS, "([filter-function]) -> EVCDistList") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_filter(self, args); } 
     1743PyObject *EVCDistList_index(TPyOrange *self, PyObject *obj) PYARGS(METH_O, "(EVCDist) -> int") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_index(self, obj); } 
     1744PyObject *EVCDistList_insert(TPyOrange *self, PyObject *args) PYARGS(METH_VARARGS, "(index, item) -> None") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_insert(self, args); } 
     1745PyObject *EVCDistList_native(TPyOrange *self) PYARGS(METH_NOARGS, "() -> list") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_native(self); } 
     1746PyObject *EVCDistList_pop(TPyOrange *self, PyObject *args) PYARGS(METH_VARARGS, "() -> EVCDist") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_pop(self, args); } 
     1747PyObject *EVCDistList_remove(TPyOrange *self, PyObject *obj) PYARGS(METH_O, "(EVCDist) -> None") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_remove(self, obj); } 
     1748PyObject *EVCDistList_reverse(TPyOrange *self) PYARGS(METH_NOARGS, "() -> None") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_reverse(self); } 
     1749PyObject *EVCDistList_sort(TPyOrange *self, PyObject *args) PYARGS(METH_VARARGS, "([cmp-func]) -> None") { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_sort(self, args); } 
     1750PyObject *EVCDistList__reduce__(TPyOrange *self, PyObject *) { return ListOfWrappedMethods<PEVCDistList, TEVCDistList, PEVCDist, &PyOrEVCDist_Type>::_reduce(self); } 
    16511751 
    16521752#include "lib_learner.px" 
  • source/orange/rulelearner.cpp

    r3405 r3576  
    3434 
    3535DEFINE_TOrangeVector_classDescription(PRule, "TRuleList", true, ORANGE_API) 
     36DEFINE_TOrangeVector_classDescription(PEVCDist, "TEVCDistList", true, ORANGE_API) 
    3637 
    3738 
     
    328329    return false; 
    329330   
    330   if (min_coverage>0.0 && obs_dist.cases < min_coverage) 
     331  if (obs_dist.cases < min_coverage) 
    331332    return false; 
    332333 
    333   if (max_rule_complexity > 0.0 && rule->complexity > max_rule_complexity) 
     334  if (max_rule_complexity > -1.0 && rule->complexity > max_rule_complexity) 
    334335    return false; 
    335336 
     
    373374  if (n<=0.0) 
    374375    n = 1e-6f; 
    375  
    376  
    377376   
    378377  float lrs = 2 * (p*log(p/P) + n*log(n/N) - obs_dist.abs * log(obs_dist.abs/exp_dist.abs)); 
     
    382381 
    383382 
    384 float TRuleEvaluator_Entropy::operator()(PRule rule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) const 
     383float TRuleEvaluator_Entropy::operator()(PRule rule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) 
    385384{ 
    386385  const TDiscDistribution &obs_dist = dynamic_cast<const TDiscDistribution &>(rule->classDistribution.getReference()); 
     
    408407} 
    409408 
    410 float TRuleEvaluator_Laplace::operator()(PRule rule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) const 
     409float TRuleEvaluator_Laplace::operator()(PRule rule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) 
    411410{ 
    412411  const TDiscDistribution &obs_dist = dynamic_cast<const TDiscDistribution &>(rule->classDistribution.getReference()); 
     
    423422} 
    424423 
     424TRuleEvaluator_LRS::TRuleEvaluator_LRS(const bool &sr) 
     425: storeRules(sr) 
     426{ 
     427  TRuleList *ruleList = mlnew TRuleList; 
     428  rules = ruleList; 
     429} 
     430 
     431float TRuleEvaluator_LRS::operator()(PRule rule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) 
     432{ 
     433  const TDiscDistribution &obs_dist = dynamic_cast<const TDiscDistribution &>(rule->classDistribution.getReference()); 
     434  if (!obs_dist.cases) 
     435    return 0.0; 
     436   
     437  const TDiscDistribution &exp_dist = dynamic_cast<const TDiscDistribution &>(apriori.getReference()); 
     438 
     439  if (obs_dist.abs >= exp_dist.abs) //it turns out that this happens quite often 
     440    return 0.0;  
     441 
     442  if (targetClass == -1) { 
     443    float lrs = 0.0; 
     444    for(TDiscDistribution::const_iterator odi(obs_dist.begin()), ode(obs_dist.end()), edi(exp_dist.begin()), ede(exp_dist.end()); 
     445        (odi!=ode); odi++, edi++) { 
     446      if ((edi!=ede) && (*edi) && (*odi)) 
     447        lrs += *odi * log(*odi / ((edi != ede) & (*edi > 0.0) ? *edi : 1e-5)); 
     448    } 
     449    lrs = 2 * (lrs - obs_dist.abs * log(obs_dist.abs / exp_dist.abs)); 
     450    return lrs; 
     451  } 
     452 
     453  float p = (targetClass < obs_dist.size()) ? obs_dist[targetClass]-0.5 : 1e-5; 
     454  const float P = (targetClass < exp_dist.size()) && (exp_dist[targetClass] > 0.0) ? exp_dist[targetClass] : 1e-5; 
     455 
     456  if (p/obs_dist.abs < P/exp_dist.abs) 
     457    return 0.0; 
     458 
     459  float n = obs_dist.abs - p; 
     460  float N = exp_dist.abs - P; 
     461 
     462  if (N<=0.0) 
     463    N = 1e-6f; 
     464  if (p<=0.0) 
     465    p = 1e-6f; 
     466  if (n<=0.0) 
     467    n = 1e-6f; 
     468 
     469  float lrs = 2 * (p*log(p/P) + n*log(n/N) - obs_dist.abs * log(obs_dist.abs/exp_dist.abs)); 
     470  if (storeRules) { 
     471    TRuleList &rlist = rules.getReference(); 
     472    rlist.push_back(rule); 
     473  } 
     474  return lrs; 
     475} 
     476 
     477 
     478TEVCDist::TEVCDist(const float & mu, const float & beta, PFloatList & percentiles)  
     479: mu(mu), 
     480  beta(beta), 
     481  percentiles(percentiles) 
     482{} 
     483 
     484TEVCDist::TEVCDist()  
     485{} 
     486 
     487double TEVCDist::getProb(const float & chi) 
     488{ 
     489  if (!percentiles || percentiles->size()==0 || percentiles->at(percentiles->size()-1)<chi) 
     490    return 1.0-exp(-exp((double)(mu-chi)/beta)); 
     491  if (chi < percentiles->at(0)) 
     492    return 1.0; 
     493  TFloatList::const_iterator pi(percentiles->begin()), pe(percentiles->end()); 
     494  for (; (pi+1)!=pe; pi++) { 
     495    float a = *pi; 
     496    float b = *(pi+1); 
     497    if (chi>=a && chi <=b) 
     498      return (chi-a)/(b-a); 
     499  } 
     500  return 1.0; 
     501} 
     502 
     503float TEVCDist::median() 
     504{ 
     505  if (!percentiles || percentiles->size()==0) 
     506    return mu + beta*0.36651292; // log(log(2)) 
     507  return (percentiles->at(4)+percentiles->at(5))/2; 
     508} 
     509 
     510TEVCDistGetter_Standard::TEVCDistGetter_Standard(PEVCDistList dists)  
     511: dists(dists) 
     512{} 
     513 
     514TEVCDistGetter_Standard::TEVCDistGetter_Standard() 
     515{} 
     516 
     517PEVCDist TEVCDistGetter_Standard::operator()(const PRule, const int & length) const 
     518{ 
     519  if (dists->size() > length) 
     520    return dists->at(length); 
     521  return NULL; 
     522} 
     523    
     524float getChi(float p, float n, float P, float N) 
     525{ 
     526  float pn = p+n; 
     527  if (p/(p+n) == P/(P+N)) 
     528    return 0.0; 
     529  else if (p/(p+n) < P/(P+N)) { 
     530    p = p+0.5; 
     531    if (p>(p+n)*P/(P+N)) 
     532      p = (p+n)*P/(P+N); 
     533    n = pn-p; 
     534  } 
     535  else { 
     536    p = p - 0.5; 
     537    if (p<(p+n)*P/(P+N)) 
     538      p = (p+n)*P/(P+N); 
     539    n = pn-p; 
     540  } 
     541  return 2*(p*log(p/(p+n))+n*log(n/(p+n))+(P-p)*log((P-p)/(P+N-p-n))+(N-n)*log((N-n)/(P+N-p-n))-P*log(P/(P+N))-N*log(N/(P+N))); 
     542} 
     543 
     544// 2 log likelihood with Yates' correction 
     545float TChiFunction_2LOGLR::operator()(PRule rule, PExampleTable data, const int & weightID, const int & targetClass, PDistribution apriori, float & nonOptimistic_Chi) const 
     546{ 
     547  nonOptimistic_Chi = 0.0; 
     548  if (!rule->classDistribution->abs || apriori->abs == rule->classDistribution->abs) 
     549    return 0.0; 
     550  return getChi(rule->classDistribution->atint(targetClass), 
     551                rule->classDistribution->abs - rule->classDistribution->atint(targetClass), 
     552                apriori->atint(targetClass), 
     553                apriori->abs - apriori->atint(targetClass)); 
     554} 
     555 
     556 
     557 
     558TRuleEvaluator_mEVC::TRuleEvaluator_mEVC(const int & m, PChiFunction chiFunction, PEVCDistGetter evcDistGetter, PVariable probVar, PRuleValidator validator, const int & min_improved, const float & min_improved_perc) 
     559: m(m), 
     560  chiFunction(chiFunction), 
     561  evcDistGetter(evcDistGetter), 
     562  probVar(probVar), 
     563  validator(validator), 
     564  min_improved(min_improved), 
     565  min_improved_perc(min_improved_perc), 
     566  bestRule(NULL) 
     567{} 
     568 
     569TRuleEvaluator_mEVC::TRuleEvaluator_mEVC() 
     570: m(0), 
     571  chiFunction(NULL), 
     572  evcDistGetter(NULL), 
     573  probVar(NULL), 
     574  validator(NULL), 
     575  min_improved(1), 
     576  min_improved_perc(0), 
     577  bestRule(NULL) 
     578{} 
     579 
     580void TRuleEvaluator_mEVC::reset() 
     581{ 
     582  bestRule = NULL; 
     583} 
     584 
     585LNLNChiSq::LNLNChiSq(PEVCDist evc, const float & chi) 
     586: evc(evc), 
     587  chi(chi) 
     588{ 
     589  extremeAlpha = evc->getProb(chi); 
     590  if (extremeAlpha < 0.05) 
     591    extremeAlpha = 0.0; 
     592} 
     593 
     594double LNLNChiSq::operator()(float chix) const { 
     595    if (chix<=0.0) 
     596        return 100.0; 
     597    double chip = chisqprob((double)chix,1.0); // in statc 
     598    if (extremeAlpha > 0.0) 
     599        return chip-extremeAlpha; 
     600    if (chip<=0.0 && (evc->mu-chi)/evc->beta < -100) 
     601        return 0.0; 
     602    if (chip<=0.0) 
     603        return -100.0; 
     604    if (chip < 1e-6) 
     605        return log(chip)-(evc->mu-chi)/evc->beta; 
     606    return log(-log(1-chip))-(evc->mu-chi)/evc->beta; 
     607} 
     608 
     609LRInv::LRInv(const float & pn, const float & P, const float & PN, const float & chiCorrected) 
     610: pn(pn), 
     611  P(P),  
     612  chiCorrected(chiCorrected) 
     613{ 
     614  N = PN - P; 
     615} 
     616 
     617double LRInv::operator()(float p) const { 
     618    return getChi(p,pn-p,P,N) - chiCorrected; 
     619} 
     620 
     621// Implementation of Brent's root finding method. 
     622float brent(const float & minv, const float & maxv, const int & maxsteps, DiffFunc * func)  
     623{ 
     624    float a = minv; 
     625    float b = maxv; 
     626    float fa = func->call(a); 
     627    float fb = func->call(b); 
     628    if (fb>0 && fa>0 && fb>fa || fb<0 && fa<0 && fb<fa) 
     629        return a; 
     630    if (fb>0 && fa>0 && fb<fa || fb<0 && fa<0 && fb>fa) 
     631        return b; 
     632 
     633    float c = a; // c is previous value of b 
     634    float fe, fc = fa; 
     635    float m = 0.0, e = 0.0, d = 0.0; 
     636    int counter = 0; 
     637    while (1) { 
     638        counter += 1; 
     639        if (fb == fa) 
     640          return b; 
     641        else if (fb!=fc && fa!=fc) 
     642            d = a*fb*fc/(fa-fb)/(fa-fc)+b*fa*fc/(fb-fa)/(fb-fc)+c*fa*fb/(fc-fa)/(fc-fb); 
     643        else 
     644            d = b-fb*(b-a)/(fb-fa); 
     645        m = (a+b)/2; 
     646        if (d<=m && d>=b || d>=m && d<=b) 
     647            e = d; 
     648        else 
     649            e = m; 
     650        fe = func->call(e); 
     651        if (fe*fb<0) { 
     652            a = b; 
     653            fa = fb; 
     654        } 
     655        c = b; 
     656        fc = fb; 
     657        b = e; 
     658        fb = fe; 
     659        if (abs(a-b)<0.01 && fa*fb<0) 
     660            return (a+b)/2.; 
     661        if (fb*fa>0 || b>maxv || b<minv) 
     662            return 0.0; 
     663        if ((b>0.1 && fb*func->call(b-0.1)<=0) || fb*func->call(b+0.1)<=0) 
     664            return b; 
     665        if (counter>maxsteps) 
     666            return 0.0; 
     667    } 
     668} 
     669 
     670float TRuleEvaluator_mEVC::evaluateRule(PRule rule, PExampleTable examples, const int & weightID, const int &targetClass, PDistribution apriori, const int & rLength, const float & aprioriProb) const 
     671{ 
     672  PEVCDist evc = evcDistGetter->call(rule, rLength); 
     673  if (!evc || evc->mu < 0.0) 
     674    return -10e+6; 
     675  if (evc->mu == 0.0 || rLength == 0) 
     676    return (rule->classDistribution->atint(targetClass)+m*aprioriProb)/(rule->classDistribution->abs+m); 
     677  PEVCDist evc_inter = evcDistGetter->call(rule, 0); 
     678  float rule_acc = rule->classDistribution->atint(targetClass)/rule->classDistribution->abs; 
     679  // if accuracy of rule is worse than prior probability 
     680  if (rule_acc < aprioriProb) 
     681    return rule_acc - 0.01; 
     682  // correct chi square 
     683  float nonOptimistic_Chi = 0.0; 
     684 
     685  float chi = chiFunction->call(rule, examples, weightID, targetClass, apriori, nonOptimistic_Chi); 
     686  if ((evc->mu-chi)/evc->beta < -100) 
     687    return (rule->classDistribution->atint(targetClass)+m*aprioriProb)/(rule->classDistribution->abs+m); 
     688 
     689  float median = evc->median(); 
     690  float chiCorrected = nonOptimistic_Chi; 
     691  // chi is less then median .. 
     692  if (chi <= median) 
     693    return aprioriProb-0.01; 
     694 
     695  // correct chi 
     696  LNLNChiSq *diffFunc = new LNLNChiSq(evc,chi); 
     697  chiCorrected += brent(0.0,chi,100, diffFunc); 
     698  delete diffFunc; 
     699 
     700  // remove inter-length optimism 
     701  chiCorrected -= evc_inter->mu; 
     702  rule->chi = chiCorrected; 
     703  // compute expected number of positive examples 
     704  float ePositives = 0.0; 
     705  if (chiCorrected > 0.0) 
     706  { 
     707    LRInv *diffFunc = new LRInv(rule->classDistribution->abs,apriori->atint(targetClass),apriori->abs,chiCorrected); 
     708    ePositives = brent(0.0, rule->classDistribution->atint(targetClass), 100, diffFunc); 
     709    delete diffFunc; 
     710  } 
     711  float quality = (ePositives + m*aprioriProb)/(rule->classDistribution->abs+m); 
     712  if (quality > aprioriProb) 
     713    return quality; 
     714  return aprioriProb-0.01; 
     715} 
     716 
     717float TRuleEvaluator_mEVC::operator()(PRule rule, PExampleTable examples, const int & weightID, const int &targetClass, PDistribution apriori) 
     718{ 
     719  rule->chi = 0.0; 
     720  if (!rule->classDistribution->cases || !rule->classDistribution->atint(targetClass)) 
     721    return 0; 
     722 
     723  // evaluate rule 
     724  TFilter_values *filter = rule->filter.AS(TFilter_values); 
     725  int rLength = filter->conditions->size(); 
     726  float aprioriProb = apriori->atint(targetClass)/apriori->abs; 
     727  rule->quality = evaluateRule(rule,examples,weightID,targetClass,apriori,rLength,aprioriProb); 
     728  if (rule->quality < 0.0) 
     729    return rule->quality; 
     730  if (!probVar) 
     731    return rule->quality; 
     732 
     733  // get rule's probability coverage 
     734  float requiredQuality = 0.0; 
     735  int improved = 0; 
     736  PEITERATE(ei, rule->examples) { 
     737    if ((*ei).getClass().intV != targetClass) 
     738      continue; 
     739    if (rule->quality > (*ei)[probVar].floatV) 
     740      improved ++; 
     741    requiredQuality += (*ei)[probVar].floatV;  
     742  } 
     743  requiredQuality /= rule->classDistribution->atint(targetClass); 
     744 
     745  // compute future quality 
     746  float futureQuality = 0.0; 
     747  if (requiredQuality <= rule->quality) 
     748    futureQuality = 1+rule->quality; 
     749  else { 
     750    PDistribution oldRuleDist = rule->classDistribution; 
     751    rule->classDistribution = mlnew TDiscDistribution(examples->domain->classVar); 
     752    rule->classDistribution->setint(targetClass, oldRuleDist->atint(targetClass)); 
     753    rule->classDistribution->abs = rule->classDistribution->atint(targetClass); 
     754    float bestQuality = evaluateRule(rule,examples,weightID,targetClass,apriori,rLength+1,aprioriProb); 
     755    rule->classDistribution = oldRuleDist; 
     756    if (bestQuality < rule->quality) 
     757      futureQuality = -1; 
     758    else if (bestQuality < requiredQuality || (bestRule && bestQuality <= bestRule->quality)) 
     759      futureQuality = -1; 
     760    else 
     761      futureQuality = (bestQuality-requiredQuality)/(bestQuality-rule->quality); 
     762  } 
     763 
     764  // store best rule and return result 
     765  if (improved >= min_improved && improved/rule->classDistribution->atint(targetClass) > min_improved_perc && 
     766      rule->quality > aprioriProb && 
     767      (!bestRule || (rule->quality>bestRule->quality)) && 
     768      (!validator || validator->call(rule, examples, weightID, targetClass, apriori))) { 
     769      TRule *pbestRule = new TRule(rule.getReference(), true); 
     770      bestRule = pbestRule; 
     771  } 
     772  return futureQuality; 
     773} 
    425774 
    426775bool worstRule(const PRule &r1, const PRule &r2) 
     
    7211070      PITERATE(TRuleList, ni, newRules) { 
    7221071        (*ni)->quality = evaluator->call(*ni, data, weightID, targetClass, apriori); 
     1072        if ((*ni)->quality >= bestRule->quality && (!validator || validator->call(*ni, data, weightID, targetClass, apriori))) 
     1073          _selectBestRule(*ni, bestRule, wins, rgen); 
    7231074        if (!ruleStoppingValidator || ruleStoppingValidator->call(*ni, (*ri)->examples, weightID, targetClass, (*ri)->classDistribution)) { 
    7241075          ruleList->push_back(*ni); 
    725           if ((*ni)->quality >= bestRule->quality && (!validator || validator->call(*ni, data, weightID, targetClass, apriori))) 
    726             _selectBestRule(*ni, bestRule, wins, rgen); 
    7271076        } 
    7281077      }   
     
    9461295 
    9471296// Rule classifier based on logit (beta) coefficients 
    948 TRuleClassifier_logit::TRuleClassifier_logit(PRuleList arules, const float &minBeta, PExampleTable anexamples, const int &aweightID, const bool &anuseBestRuleOnly) 
     1297TRuleClassifier_logit::TRuleClassifier_logit(PRuleList arules, const float &minBeta, PExampleTable anexamples, const int &aweightID, const PClassifier &classifier, const PDistributionList &probList, const bool &anuseBestRuleOnly) 
    9491298: TRuleClassifier(arules, anexamples, aweightID), 
    9501299  minBeta(minBeta), 
    951   useBestRuleOnly(anuseBestRuleOnly) 
     1300  useBestRuleOnly(anuseBestRuleOnly), 
     1301  priorClassifier(classifier) 
    9521302{ 
    9531303  // compute prior distribution of learning examples 
     
    9571307  // initialize variables f, p, tempF, tempP, wavgCov, wavgCovPrior, wsd, wsdPrior, 
    9581308  // ruleIndices, betas, priorBetas, wpriorProb, wavgProb 
    959   initialize(); 
     1309  initialize(probList); 
    9601310  float step = 2.0; 
    9611311 
    9621312  // compute initial goodness-of-fit evaluation 
    9631313  eval = compPotEval(0,getClassIndex(*(rules->begin())),betas[0],tempF,tempP,wavgProb,wpriorProb); 
     1314  //raiseWarning("rule 0 prob: %f, rule 0 beta: %f",wavgProb->at(0),betas[0]); 
    9641315 
    9651316  // set up prior Betas 
     
    9691320  } 
    9701321  eval = compPotEval(0,getClassIndex(*(rules->begin())),betas[0],tempF,tempP,wavgProb,wpriorProb); 
     1322  //raiseWarning("rule 0 prob: %f, rule 0 beta: %f",wavgProb->at(0),betas[0]); 
    9711323 
    9721324  // evaluation loop 
     
    9821334    oldP[i] = new float[examples->numberOfExamples()]; 
    9831335  } 
     1336//  raiseWarning("rule 0 prob: %f, rule 0 beta: %f",wavgProb->at(0),betas[0]); 
    9841337 
    9851338  float priorb = priorBetas[0]; 
     
    9901343    bool beenInCorrectPrior = false; 
    9911344    float oldEval = eval; 
     1345//    raiseWarning("rule 0 prob: %f, rule 0 beta: %f, step: %f",wavgProb->at(0),betas[0],step); 
    9921346    while (improvedOverAll) { 
    9931347        updateRuleBetas(step); 
     1348//      raiseWarning("rule 0 prob: %f, rule 0 beta: %f out",wavgProb->at(0),betas[0],step); 
    9941349          // optimize prior betas 
    9951350          if (eval<=oldEval && beenInCorrectPrior) { 
     
    10171372            // if no change in prior betas - try to distort them 
    10181373            distortPriorBetas(step); 
     1374//        raiseWarning("rule 0 prob: %f, rule 0 beta: %f in",wavgProb->at(0),betas[0],step); 
    10191375      } 
    10201376    } 
     
    10441400{} 
    10451401 
     1402 
     1403//============================================================================== 
     1404// return 1 if system not solving 
     1405// nDim - system dimension 
     1406// pfMatr - matrix with coefficients 
     1407// pfVect - vector with free members 
     1408// pfSolution - vector with system solution 
     1409// pfMatr becames trianglular after function call 
     1410// pfVect changes after function call 
     1411// 
     1412// Developer: Henry Guennadi Levkin 
     1413// 
     1414//============================================================================== 
     1415int LinearEquationsSolving(int nDim, double* pfMatr, double* pfVect, double* pfSolution) 
     1416{ 
     1417  double fMaxElem; 
     1418  double fAcc; 
     1419 
     1420  int i, j, k, m; 
     1421 
     1422 
     1423  for(k=0; k<(nDim-1); k++) // base row of matrix 
     1424  { 
     1425    // search of line with max element 
     1426    fMaxElem = fabs( pfMatr[k*nDim + k] ); 
     1427    m = k; 
     1428    for(i=k+1; i<nDim; i++) 
     1429    { 
     1430      if(fMaxElem < fabs(pfMatr[i*nDim + k]) ) 
     1431      { 
     1432        fMaxElem = pfMatr[i*nDim + k]; 
     1433        m = i; 
     1434      } 
     1435    } 
     1436     
     1437    // permutation of base line (index k) and max element line(index m) 
     1438    if(m != k) 
     1439    { 
     1440      for(i=k; i<nDim; i++) 
     1441      { 
     1442        fAcc               = pfMatr[k*nDim + i]; 
     1443        pfMatr[k*nDim + i] = pfMatr[m*nDim + i]; 
     1444        pfMatr[m*nDim + i] = fAcc; 
     1445      } 
     1446      fAcc = pfVect[k]; 
     1447      pfVect[k] = pfVect[m]; 
     1448      pfVect[m] = fAcc; 
     1449    } 
     1450 
     1451    if( pfMatr[k*nDim + k] == 0.) return 1; // needs improvement !!! 
     1452 
     1453    // triangulation of matrix with coefficients 
     1454    for(j=(k+1); j<nDim; j++) // current row of matrix 
     1455    { 
     1456      fAcc = - pfMatr[j*nDim + k] / pfMatr[k*nDim + k]; 
     1457      for(i=k; i<nDim; i++) 
     1458      { 
     1459        pfMatr[j*nDim + i] = pfMatr[j*nDim + i] + fAcc*pfMatr[k*nDim + i]; 
     1460      } 
     1461      pfVect[j] = pfVect[j] + fAcc*pfVect[k]; // free member recalculation 
     1462    } 
     1463  } 
     1464 
     1465  for(k=(nDim-1); k>=0; k--) 
     1466  { 
     1467    pfSolution[k] = pfVect[k]; 
     1468    for(i=(k+1); i<nDim; i++) 
     1469    { 
     1470      pfSolution[k] -= (pfMatr[k*nDim + i]*pfSolution[i]); 
     1471    } 
     1472    pfSolution[k] = pfSolution[k] / pfMatr[k*nDim + k]; 
     1473  } 
     1474 
     1475  return 0; 
     1476} 
     1477 
    10461478// function sums; f = a0 + a1*r1.quality + ... AND example probabilities  
    10471479// set all to zero 
    10481480// Compute average example coverage and set index of examples covered by rule 
    10491481// set all remaining variables 
    1050 void TRuleClassifier_logit::initialize() 
     1482void TRuleClassifier_logit::initialize(const PDistributionList &probList) 
    10511483{ 
    10521484  psize = examples->domain->classVar->noOfValues()*examples->numberOfExamples(); 
     
    10651497      tempP[i] = new float[examples->numberOfExamples()]; 
    10661498      for (int j=0; j<examples->numberOfExamples(); j++) { 
    1067           f[i][j] = 0.0; 
    1068           p[i][j] = 1.0/examples->domain->classVar->noOfValues(); 
    1069           tempF[i][j] = 0.0; 
    1070           tempP[i][j] = 1.0/examples->domain->classVar->noOfValues(); 
     1499          f[i][j] = 0.0; 
     1500          p[i][j] = 1.0/examples->domain->classVar->noOfValues(); 
     1501            tempF[i][j] = 0.0; 
     1502            tempP[i][j] = 1.0/examples->domain->classVar->noOfValues(); 
    10711503      } 
    10721504  } 
     
    10781510          tempP[examples->domain->classVar->noOfValues()-1][j] = 1.0/examples->domain->classVar->noOfValues(); 
    10791511      } 
     1512  } 
     1513 
     1514   // if initial example probability is given, update F and P 
     1515  if (probList) { 
     1516    double *matrix = new double [pow(examples->domain->classVar->noOfValues()-1,2)]; 
     1517    double *fVals = new double [examples->domain->classVar->noOfValues()-1]; 
     1518    double *results = new double [examples->domain->classVar->noOfValues()-1]; 
     1519    for (i=0; i<probList->size(); i++) { 
     1520      int k1, k2; 
     1521      TDistribution *dist = mlnew TDiscDistribution(probList->at(i)->variable); 
     1522      PDistribution wdist = dist; 
     1523 
     1524      for (k1=0; k1<examples->domain->classVar->noOfValues(); k1++) { 
     1525        if (probList->at(i)->atint(k1) >= 1.0-1e-4) 
     1526          wdist->setint(k1,(float)(1.0-1e-4)); 
     1527        else if (probList->at(i)->atint(k1) <= 1e-4) 
     1528          wdist->setint(k1,(float)(1e-4)); 
     1529        else 
     1530          wdist->setint(k1,probList->at(i)->atint(k1)); 
     1531      } 
     1532      wdist->normalize(); 
     1533      for (k1=0; k1<examples->domain->classVar->noOfValues()-1; k1++) { 
     1534        fVals[k1] = -wdist->atint(k1); 
     1535        for (k2=0; k2<examples->domain->classVar->noOfValues()-1; k2++) { 
     1536          if (k1==k2) 
     1537            matrix[k1*(examples->domain->classVar->noOfValues()-1)+k2] = wdist->atint(k1)-1; 
     1538          else 
     1539            matrix[k1*(examples->domain->classVar->noOfValues()-1)+k2] = wdist->atint(k1); 
     1540        } 
     1541      } 
     1542      LinearEquationsSolving(examples->domain->classVar->noOfValues()-1, matrix, fVals, results); 
     1543      for (k1=0; k1<examples->domain->classVar->noOfValues()-1; k1++) { 
     1544        f[k1][i] = results[k1]>0.0 ? log(results[k1]) : -10.0; 
     1545          tempF[k1][i] = f[k1][i]; 
     1546      } 
     1547      for (k1=0; k1<examples->domain->classVar->noOfValues(); k1++) { 
     1548          p[k1][i] = wdist->atint(k1); 
     1549          tempP[k1][i] = wdist->atint(k1); 
     1550      } 
     1551    } 
     1552    delete [] matrix; 
     1553    delete [] fVals; 
     1554    delete [] results; 
    10801555  } 
    10811556 
     
    12101685      } 
    12111686  } 
     1687 //raiseWarning("rule 0 prob: %f, rule 0 beta: %f",wavgProb->at(0),betas[0]); 
    12121688  return newEval; 
    12131689} 
     
    12171693{ 
    12181694  // cut betas of optimistic rules (also copy from tempF to f - cutOptimisticBetas does not) 
     1695/*  if (step>=1.0) 
     1696    raiseWarning("before cut rule 0 prob: %f, rule 0 beta: %f, step: %f, counter: ",wavgProb->at(0),betas[0],step);*/ 
    12191697  eval = cutOptimisticBetas(step, eval); 
    12201698  copyTable(f, tempF, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
    12211699  copyTable(p, tempP, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
     1700  /*if (step>=1.0) 
     1701    raiseWarning("after cut rule 0 prob: %f, rule 0 beta: %f, step: %f, counter: ",wavgProb->at(0),betas[0],step);*/ 
    12221702 
    12231703  float *oldBetasU = new float[rules->size()]; 
    12241704    bool changed = true; 
    1225     while (changed) { 
     1705  int counter = 0; 
     1706  while (changed && counter < 10) { // 10 steps should be perfectly enough as steps are halved - teoretically we only need 1 step 
     1707/*    if (counter >= 1.0) 
     1708      raiseWarning("rule 0 prob: %f, rule 0 beta: %f, step: %f, counter: %f",wavgProb->at(0),betas[0],step, counter); */ 
     1709    counter += 1; 
    12261710        changed = false; 
    12271711    for (int i=0; i<rules->size(); i++) { 
    12281712            // positive update of beta 
    12291713            bool improve = false; 
     1714/*      if (step>=1.0 && i<2 && counter < 2) 
     1715        raiseWarning("after cut 1 rule 0 prob: %f, rule 0 beta: %f, step: %f, counter: %d, %d",wavgProb->at(0),betas[0],step, counter,i);*/ 
    12301716            float newEval = compPotEval(i, getClassIndex(rules->at(i)), betas[i]+step,tempF,tempP,wavgProb,wpriorProb); 
     1717/*      if (step>=1.0 && i<2 && counter < 2) 
     1718        raiseWarning("after cut 2 rule 0 prob: %f, rule 0 beta: %f, step: %f, counter: %d, %d",wavgProb->at(0),betas[0],step, counter,i);*/ 
    12311719      if (newEval>eval && wavgProb->at(i) <= rules->at(i)->quality) { //   
    12321720            memcpy(oldBetasU,betas,sizeof(float)*rules->size()); 
     
    12601748{ 
    12611749    bool changed = true; 
    1262     while (changed) { 
     1750  int counter = 0; 
     1751  while (changed && counter<10) { // for counter see updateRulesBeta method 
     1752    counter ++; 
    12631753        changed = false; 
    12641754        for (int i=0; i<examples->domain->classVar->noOfValues()-1; i++) { 
     
    12681758            float newEval = compPotEval(0, getClassIndex(rules->at(0)), betas[0],tempF,tempP,wavgProb,wpriorProb); 
    12691759            if (wpriorProb->at(i) <= prior->atint(i)/prior->abs) { 
    1270                 eval = cutOptimisticBetas(step, newEval); 
    1271         copyTable(p, tempP, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
    1272         copyTable(f, tempF, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
    1273                 improve = true; 
    1274                 changed = true; 
     1760                newEval = cutOptimisticBetas(step, newEval); 
     1761        if (newEval > eval) { 
     1762          eval = newEval; 
     1763          copyTable(p, tempP, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
     1764          copyTable(f, tempF, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
     1765                  improve = true; 
     1766                  changed = true; 
     1767        } 
     1768        else { 
     1769          priorBetas[i]-=step; 
     1770          copyTable(tempP, p, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
     1771          copyTable(tempF, f, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
     1772        } 
    12751773            } 
    1276             else  
     1774      else { 
    12771775                priorBetas[i]-=step; 
     1776        copyTable(tempP, p, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
     1777        copyTable(tempF, f, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
     1778      } 
    12781779            if (!improve) { 
    12791780                priorBetas[i]-=step; 
    12801781                newEval = compPotEval(0, getClassIndex(rules->at(0)), betas[0],tempF,tempP,wavgProb,wpriorProb); 
    12811782                if (wpriorProb->at(i) >= prior->atint(i)/prior->abs) { 
    1282                     eval = cutOptimisticBetas(step, newEval);; 
    1283           copyTable(p, tempP, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
    1284           copyTable(f, tempF, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
    1285                     changed = true; 
    1286           improve = true; 
     1783                    newEval = cutOptimisticBetas(step, newEval); 
     1784          if (newEval > eval) { 
     1785            eval = newEval; 
     1786            copyTable(p, tempP, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
     1787            copyTable(f, tempF, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
     1788                      changed = true; 
     1789            improve = true; 
     1790          } 
     1791          else { 
     1792            priorBetas[i]+=step; 
     1793            copyTable(tempP, p, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
     1794            copyTable(tempF, f, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
     1795          } 
    12871796                } 
    12881797                else  
    12891798                    priorBetas[i]+=step; 
     1799          copyTable(tempP, p, examples->domain->classVar->noOfValues(), examples->numberOfExamples()); 
     1800          copyTable(tempF, f, examples->domain->classVar->noOfValues()-1, examples->numberOfExamples()); 
    12901801            } 
    12911802    } 
     
    13531864  else if (newBeta > minBeta) 
    13541865    dif = newBeta; 
     1866/*  if (ruleIndex == 0) 
     1867    raiseWarning("dif = %f, %d, %d", dif, useBestRuleOnly, classIndex);*/ 
    13551868  // prepare new probabilities 
    13561869  if (abs(dif)>1e-10) 
     
    13731886      else 
    13741887        for (int fi=0; fi<examples->domain->classVar->noOfValues()-1; fi++) 
    1375             if (fi == classIndex) 
     1888          if (fi == classIndex) { 
     1889/*            if (ruleIndex == 0) 
     1890              raiseWarning("oldF = %f, newF = %f", tempF[fi][*ind], tempF[fi][*ind] + dif);*/ 
    13761891              tempF[fi][*ind] += dif; 
     1892          } 
    13771893            else 
    13781894                tempF[fi][*ind] -= dif; 
     
    14261942 
    14271943  // new evaluation 
    1428  
    14291944  float newEval = 0.0; 
     1945  for (int ei=0; ei<examples->numberOfExamples(); ei++) { 
     1946    newEval += tempP[examples->at(ei).getClass().intV][ei]>0.0 ? log(tempP[examples->at(ei).getClass().intV][ei]) : -1e+6; 
     1947//    newEval -= pow(1.0-tempP[examples->at(ei).getClass().intV][ei],2); 
     1948  } 
     1949 
     1950 
     1951/*  float newEval = 0.0; 
    14301952  TFloatList::iterator sdi(wsd->begin()), sde(wsd->end()); 
    14311953  TFloatList::iterator aci(wavgCov->begin()), ace(wavgCov->end()); 
     
    14371959      int nExamples = (*rit)->examples->numberOfExamples(); 
    14381960      float quality = (*rit)->quality; 
    1439       newEval += (nExamples*quality*log((*api)/quality)+nExamples*(1-quality)*log((1-(*api))/(1-quality)))/(*aci); 
     1961      if ((*api)>quality) 
     1962        newEval += 0.1*(nExamples*quality*log((*api)/quality)+nExamples*(1-quality)*log((1-(*api))/(1-quality)))/(*aci); 
     1963      else 
     1964        newEval += (nExamples*quality*log((*api)/quality)+nExamples*(1-quality)*log((1-(*api))/(1-quality)))/(*aci); 
    14401965    } 
    14411966    bi++; 
    1442   } 
     1967  } */ 
    14431968 
    14441969  // new evaluation from prior 
    1445   sdi = wsdPrior->begin(); sde = wsdPrior->end(); 
     1970/*  sdi = wsdPrior->begin(); sde = wsdPrior->end(); 
    14461971  aci = wavgCovPrior->begin(); ace = wavgCovPrior->end(); 
    14471972  api = wpriorProb->begin(); ape = wpriorProb->end(); 
     
    14501975    float quality = prior->atint(i)/prior->abs; 
    14511976    newEval += (nExamples*quality*log((*api)/quality)+nExamples*(1-quality)*log((1-(*api))/(1-quality)))/(*aci); 
    1452   } 
     1977  } */ 
    14531978  return newEval; 
     1979} 
     1980 
     1981void TRuleClassifier_logit::addPriorClassifier(const TExample &ex, double * priorFs) { 
     1982  // initialize variables 
     1983  double *matrix = new double [pow(examples->domain->classVar->noOfValues()-1,2)]; 
     1984  double *fVals = new double [examples->domain->classVar->noOfValues()-1]; 
     1985  double *results = new double [examples->domain->classVar->noOfValues()-1]; 
     1986  int k1, k2; 
     1987  TDistribution *dist = mlnew TDiscDistribution(domain->classVar); 
     1988  PDistribution wdist = dist; 
     1989 
     1990  PDistribution classifierDist = priorClassifier->classDistribution(ex); 
     1991  // correct probablity if equals 1.0 
     1992  for (k1=0; k1<examples->domain->classVar->noOfValues(); k1++) { 
     1993    if (classifierDist->atint(k1) >= 1.0-1e-4) 
     1994      wdist->setint(k1,(float)(1.0-1e-4)); 
     1995    else if (classifierDist->atint(k1) <= 1e-4) 
     1996      wdist->setint(k1,(float)(1e-4)); 
     1997    else 
     1998      wdist->setint(k1,classifierDist->atint(k1)); 
     1999  } 
     2000  wdist->normalize(); 
     2001 
     2002  // create matrix 
     2003  for (k1=0; k1<examples->domain->classVar->noOfValues()-1; k1++) { 
     2004    fVals[k1] = -wdist->atint(k1); 
     2005    for (k2=0; k2<examples->domain->classVar->noOfValues()-1; k2++) { 
     2006      if (k1==k2) 
     2007        matrix[k1*(examples->domain->classVar->noOfValues()-1)+k2] = wdist->atint(k1)-1; 
     2008      else 
     2009        matrix[k1*(examples->domain->classVar->noOfValues()-1)+k2] = wdist->atint(k1); 
     2010    } 
     2011  } 
     2012  // solve equation 
     2013  LinearEquationsSolving(examples->domain->classVar->noOfValues()-1, matrix, fVals, results); 
     2014  for (k1=0; k1<examples->domain->classVar->noOfValues()-1; k1++) 
     2015    priorFs[k1] = results[k1]>0.0 ? log(results[k1]) : -10.0; 
     2016  // clean up 
     2017  delete [] matrix; 
     2018  delete [] fVals; 
     2019  delete [] results; 
    14542020} 
    14552021 
     
    14642030  PDistribution res = dist; 
    14652031 
    1466   float *bestBeta = mlnew float [domain->classVar->noOfValues()]; 
     2032  // if correcting a classifier, use that one first then 
     2033  double * priorFs = new double [examples->domain->classVar->noOfValues()-1]; 
     2034  if (priorClassifier) 
     2035    addPriorClassifier(ex, priorFs); 
     2036  else 
     2037    for (int k=0; k<examples->domain->classVar->noOfValues()-1; k++) 
     2038      priorFs[k] = 0.0; 
     2039 
     2040  // find best beta influence (logit) 
     2041  float *bestBeta = new float [domain->classVar->noOfValues()]; 
    14672042  if (useBestRuleOnly) { 
    14682043    for (int i=0; i<domain->classVar->noOfValues(); i++) 
     
    14742049        bestBeta[getClassIndex(*r)] = *b; 
    14752050  } 
     2051  // compute return probabilities 
    14762052  for (int i=0; i<res->noOfElements()-1; i++) { 
    1477     float f = priorProbBetas->at(i); 
     2053    float f = priorProbBetas->at(i) + priorFs[i]; 
    14782054    TFloatList::const_iterator b(ruleBetas->begin()), be(ruleBetas->end()); 
    14792055    TRuleList::iterator r(rules->begin()), re(rules->end()); 
     
    14902066                f += (*b);  
    14912067          else 
    1492             f -= (*b); 
     2068            f -= (*b);  
    14932069    dist->addint(i,exp(f)); 
    14942070  } 
     
    14962072  dist->normalize(); 
    14972073  delete [] bestBeta; 
     2074  delete [] priorFs; 
    14982075  return res; 
    14992076} 
     
    15362113{} 
    15372114 
    1538 TRuleClassifier_logit_bestRule::TRuleClassifier_logit_bestRule(PRuleList arules, const float &minBeta, PExampleTable anexamples, const int &aweightID) 
    1539 : TRuleClassifier_logit(arules, minBeta, anexamples, aweightID, true) 
     2115TRuleClassifier_logit_bestRule::TRuleClassifier_logit_bestRule(PRuleList arules, const float &minBeta, PExampleTable anexamples, const int &aweightID, const PClassifier &classifier,const PDistributionList &probList) 
     2116: TRuleClassifier_logit(arules, minBeta, anexamples, aweightID, classifier, probList, true) 
    15402117{} 
    15412118 
  • source/orange/rulelearner.hpp

    r3405 r3576  
    3131WRAPPER(Rule) 
    3232WRAPPER(Discretization) 
     33WRAPPER(EVCDist) 
    3334 
    3435#define TRuleList TOrangeVector<PRule> 
    3536VWRAPPER(RuleList) 
     37#define TEVCDistList TOrangeVector<PEVCDist> 
     38VWRAPPER(EVCDistList) 
     39 
    3640 
    3741WRAPPER(ExampleGenerator) 
     
    5458  float quality; //P some measure of rule quality 
    5559  float complexity; //P 
     60  float chi; //P  
    5661 
    5762  int *coveredExamples; 
     
    113118  float min_quality; //P 
    114119 
    115   TRuleValidator_LRS(const float &alpha = 0.05, const float &min_coverage = 0.0, const float &max_rule_complexity = 0.0, const float &min_quality = -numeric_limits<float>::max()); 
     120  TRuleValidator_LRS(const float &alpha = 0.05, const float &min_coverage = 0.0, const float &max_rule_complexity = -1.0, const float &min_quality = -numeric_limits<float>::max()); 
    116121  virtual bool operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ) const; 
    117122}; 
     
    123128  __REGISTER_ABSTRACT_CLASS 
    124129 
    125   virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ) const = 0; 
     130  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ) = 0; 
    126131}; 
    127132 
     
    130135  __REGISTER_CLASS 
    131136 
    132   virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ) const; 
     137  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ); 
    133138}; 
    134139 
     
    136141  __REGISTER_CLASS 
    137142 
    138   virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ) const; 
     143  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ); 
     144}; 
     145 
     146class ORANGE_API TEVCDist : public TOrange { 
     147public: 
     148  __REGISTER_CLASS 
     149 
     150  float mu; //P mu of Fisher-Tippett distribution 
     151  float beta; //P beta of Fisher-Tippett distribution 
     152  PFloatList percentiles; //P 10 values - 0 = 5th percentile, 1 = 15th percentile, 9 = 95th percentile 
     153 
     154  TEVCDist(); 
     155  TEVCDist(const float &, const float &, PFloatList &); 
     156  double getProb(const float & chi); 
     157  float median(); 
     158}; 
     159 
     160WRAPPER(EVCDistGetter) 
     161class ORANGE_API TEVCDistGetter: public TOrange { 
     162public: 
     163  __REGISTER_ABSTRACT_CLASS 
     164 
     165  virtual PEVCDist operator()(const PRule , const int &) const = 0; 
     166}; 
     167 
     168class ORANGE_API TEVCDistGetter_Standard: public TEVCDistGetter { 
     169public: 
     170  __REGISTER_CLASS 
     171 
     172  PEVCDistList dists; //P EVC distribution (sorted by rule length, 0 = for rules without conditions) 
     173  TEVCDistGetter_Standard(); 
     174  TEVCDistGetter_Standard(PEVCDistList); 
     175  virtual PEVCDist operator()(const PRule, const int &) const; 
     176}; 
     177 
     178WRAPPER(ChiFunction) 
     179class ORANGE_API TChiFunction: public TOrange { 
     180public: 
     181  __REGISTER_ABSTRACT_CLASS 
     182 
     183  virtual float operator()(PRule rule, PExampleTable data, const int & weightID, const int & targetClass, PDistribution apriori, float & nonOptimistic_Chi) const = 0; 
     184}; 
     185 
     186class ORANGE_API TChiFunction_2LOGLR: public TChiFunction { 
     187public: 
     188  __REGISTER_CLASS 
     189 
     190  virtual float operator()(PRule rule, PExampleTable data, const int & weightID, const int & targetClass, PDistribution apriori, float & nonOptimistic_Chi) const; 
     191}; 
     192 
     193class DiffFunc { 
     194public: 
     195  virtual double operator()(float) const = 0; 
     196}; 
     197 
     198class LNLNChiSq: public DiffFunc { 
     199public: 
     200  PEVCDist evc; 
     201  float chi; 
     202  double extremeAlpha; 
     203 
     204  LNLNChiSq(PEVCDist evc, const float & chi); 
     205  double operator()(float chix) const; 
     206}; 
     207 
     208class LRInv: public DiffFunc { 
     209public: 
     210  float pn, P, N, chiCorrected; 
     211 
     212  LRInv(const float & pn, const float & P, const float & PN, const float & chiCorrected); 
     213  double operator()(float p) const; 
     214}; 
     215 
     216 
     217class ORANGE_API TRuleEvaluator_mEVC: public TRuleEvaluator { 
     218public: 
     219  __REGISTER_CLASS 
     220 
     221  int m; //P Parameter m for m-estimate after EVC correction 
     222  PChiFunction chiFunction; //P function for computing chi square significance 
     223  PEVCDistGetter evcDistGetter; //P get EVC distribution for chi correction 
     224  PVariable probVar;//P probability coverage variable (meta usually) 
     225  PRuleValidator validator; //P rule validator for best rule 
     226  int min_improved; //P minimal number of improved examples 
     227  float min_improved_perc; //P minimal percentage of improved examples 
     228  PRule bestRule; //P best rule found and evaluated given conditions (min_improved, validator) 
     229 
     230  TRuleEvaluator_mEVC(); 
     231  TRuleEvaluator_mEVC(const int & m, PChiFunction, PEVCDistGetter, PVariable, PRuleValidator, const int & min_improved, const float & min_improved_perc); 
     232  void reset(); 
     233  float chiAsimetryCorrector(const float &); 
     234  float evaluateRule(PRule rule, PExampleTable examples, const int & weightID, const int &targetClass, PDistribution apriori, const int & rLength, const float & aprioriProb) const; 
     235  float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ); 
     236}; 
     237 
     238 
     239class ORANGE_API TRuleEvaluator_LRS : public TRuleEvaluator { 
     240public: 
     241  __REGISTER_CLASS 
     242 
     243  PRuleList rules; //P 
     244  bool storeRules; //P 
     245 
     246  TRuleEvaluator_LRS(const bool & = false); 
     247  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ); 
    139248}; 
    140249 
     
    373482  float minBeta; //P minimum beta value of a rule, if lower, rule is set to have beta 0.  
    374483 
     484  PClassifier priorClassifier; 
     485 
    375486  TRuleClassifier_logit(); 
    376   TRuleClassifier_logit(PRuleList rules, const float &minBeta, PExampleTable examples, const int &weightID = 0, const bool &useBestRuleOnly = false); 
     487  TRuleClassifier_logit(PRuleList rules, const float &minBeta, PExampleTable examples, const int &weightID = 0, const PClassifier &classifer = NULL, const PDistributionList &probList = NULL, const bool &useBestRuleOnly = false); 
    377488  ~TRuleClassifier_logit(); 
    378489 
    379   void initialize(); 
     490  void initialize(const PDistributionList &); 
    380491  void updateRuleBetas(float step); 
    381492  float cutOptimisticBetas(float step, float curr_eval); 
     
    385496  float compPotEval(int ruleIndex, int classIndex, float newBeta, float **tempF, float **tempP, PFloatList &wavgProb, PFloatList &wpriorProb); 
    386497  int getClassIndex(PRule r); 
     498  void addPriorClassifier(const TExample &, double *); 
    387499  virtual PDistribution classDistribution(const TExample &ex); 
    388500}; 
     
    393505 
    394506  TRuleClassifier_logit_bestRule(); 
    395   TRuleClassifier_logit_bestRule(PRuleList rules, const float &minBeta, PExampleTable examples, const int &weightID = 0); 
     507  TRuleClassifier_logit_bestRule(PRuleList rules, const float &minBeta, PExampleTable examples, const int &weightID = 0, const PClassifier &classifer = NULL, const PDistributionList &probList = NULL); 
    396508}; 
    397509 
  • source/pyxtract/defvectors.py

    r3097 r3576  
    7171   normalList("C45TreeNode", "lib_learner.cpp"), 
    7272   normalList("Rule", "lib_learner.cpp"), 
     73   normalList("EVCDist", "lib_learner.cpp"), 
    7374 
    7475   normalList("Heatmap", "orangene.cpp"), 
Note: See TracChangeset for help on using the changeset viewer.