source: orange/source/orange/rulelearner.hpp @ 8735:065a34c267f2

Revision 8735:065a34c267f2, 17.1 KB checked in by matejd <matejd@…>, 3 years ago (diff)

Moved over code from qtgraph branch, turned primitives into a module (plot.primitives); added Visualize Qt folder to setup.py packages

Line 
1/*
2    This file is part of Orange.
3   
4    Copyright 1996-2010 Faculty of Computer and Information Science, University of Ljubljana
5    Contact: janez.demsar@fri.uni-lj.si
6
7    Orange is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11
12    Orange is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with Orange.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21#ifndef __RULES_HPP
22#define __RULES_HPP
23
24#include "domain.hpp"
25#include "classify.hpp"
26#include "learn.hpp"
27
28WRAPPER(ProgressCallback)
29WRAPPER(Rule)
30WRAPPER(Discretization)
31WRAPPER(EVDist)
32
33#define TRuleList TOrangeVector<PRule>
34VWRAPPER(RuleList)
35#define TEVDistList TOrangeVector<PEVDist>
36VWRAPPER(EVDistList)
37
38
39WRAPPER(ExampleGenerator)
40WRAPPER(ExampleTable)
41WRAPPER(Filter)
42
43class ORANGE_API TRule : public TOrange {
44public:
45  __REGISTER_CLASS
46
47  PFilter filter; //P stored filter for this rule
48  PFilter valuesFilter; //P Filter_values representation of main filter (sometimes needed)
49  PClassifier classifier; //P classifies an example
50  PLearner learner; //P learns a classifier from data
51  PRule parentRule; //P
52  PDistribution baseDist; //P initial distribution
53
54  PDistribution classDistribution; //P distribution of classes covered by the rule
55
56  PExampleTable examples; //P covered examples
57  int weightID; //P weight for the stored examples
58  float quality; //P some measure of rule quality
59  int complexity; //P
60  float chi; //P
61  float estRF;
62  float distP;
63  int requiredConditions; //P conditions that are mandatory in rule - rule attribute significance avoids these
64
65  int *coveredExamples;
66  int coveredExamplesLength;
67 
68  TRule();
69  TRule(PFilter filter, PClassifier classifier, PLearner lr, PDistribution dist, PExampleTable ce = PExampleTable(), const int &w = 0, const float &qu = -1);
70  TRule(const TRule &other, bool copyData = true);
71  ~TRule();
72
73  bool operator()(const TExample &); //P Returns 1 for accept, 0 for reject
74  PExampleTable operator()(PExampleTable, const bool ref = true, const bool negate = false); //P filter examples
75   
76  void filterAndStore(PExampleTable, const int &weightID = 0, const int &targetClass = -1, const int *prevCovered = NULL, const int anExamples = -1); //P Selects examples from given data
77                                                                          // stores them in coveredExamples, computes distribution
78                                                                          // and sets classValue (if -1 then take majority)
79  bool operator >(const TRule &) const; 
80  bool operator <(const TRule &) const;
81  bool operator >=(const TRule &) const;
82  bool operator <=(const TRule &) const;
83  bool operator ==(const TRule &) const;
84
85  bool operator >(const PRule &r) const
86  { return operator >(r.getReference()); }
87
88  bool operator <(const PRule &r) const
89  { return operator <(r.getReference()); }
90
91  bool operator >=(const PRule &r) const
92  { return operator >=(r.getReference()); }
93
94  bool operator <=(const PRule &r) const
95  { return operator <=(r.getReference()); }
96
97  bool operator ==(const PRule &r) const
98  { return operator ==(r.getReference()); }
99
100  // need string representation of a rule?
101};
102
103
104
105WRAPPER(RuleValidator)
106class ORANGE_API TRuleValidator : public TOrange {
107public:
108  __REGISTER_ABSTRACT_CLASS
109
110  virtual bool operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) const = 0;
111};
112
113
114class ORANGE_API TRuleValidator_LRS : public TRuleValidator {
115public:
116  __REGISTER_CLASS
117
118  float alpha; //P
119  float min_coverage; //P
120  int max_rule_complexity; //P
121  float min_quality; //P
122
123  TRuleValidator_LRS(const float &alpha = 0.05, const float &min_coverage = 0.0, const int &max_rule_complexity = -1, const float &min_quality = -numeric_limits<float>::max());
124  virtual bool operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ) const;
125};
126
127
128WRAPPER(RuleEvaluator)
129class ORANGE_API TRuleEvaluator : public TOrange {
130public:
131  __REGISTER_ABSTRACT_CLASS
132
133  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution ) = 0;
134};
135
136
137class ORANGE_API TRuleEvaluator_Entropy : public TRuleEvaluator {
138  __REGISTER_CLASS
139
140  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution );
141};
142
143class ORANGE_API TRuleEvaluator_Laplace : public TRuleEvaluator {
144  __REGISTER_CLASS
145
146  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution );
147};
148
149class ORANGE_API TEVDist : public TOrange {
150public:
151  __REGISTER_CLASS
152
153  float mu; //P mu of Fisher-Tippett distribution
154  float beta; //P beta of Fisher-Tippett distribution
155  PFloatList percentiles; //P usually 10 values - 0 = 5th percentile, 1 = 15th percentile, 9 = 95th percentile, change maxPercentile and step for other settings
156  float maxPercentile; //P maxPercentile Value, default 0.95
157  float step; //P step of percentiles, default 0.1
158
159  TEVDist();
160  TEVDist(const float &, const float &, PFloatList &);
161  double getProb(const float & chi);
162  float median();
163};
164
165WRAPPER(EVDistGetter)
166class ORANGE_API TEVDistGetter: public TOrange {
167public:
168  __REGISTER_ABSTRACT_CLASS
169
170  virtual PEVDist operator()(const PRule , const int & parentLength, const int & length) const = 0;
171};
172
173class ORANGE_API TEVDistGetter_Standard: public TEVDistGetter {
174public:
175  __REGISTER_CLASS
176
177  PEVDistList dists; //P EVC distribution (sorted by rule length, 0 = for rules without conditions)
178  TEVDistGetter_Standard();
179  TEVDistGetter_Standard(PEVDistList);
180  virtual PEVDist operator()(const PRule, const int & parentLength, const int & length) const;
181};
182
183class DiffFunc {
184public:
185  virtual double operator()(float) = 0;
186};
187
188class LNLNChiSq: public DiffFunc {
189public:
190  PEVDist evd;
191  float chi, exponent, pp;
192  double extremeAlpha;
193
194  LNLNChiSq(PEVDist evd, const float & chi, const float & aprioriProb);
195  double operator()(float chix);
196};
197
198class LRInv: public DiffFunc {
199public:
200  float n,P,N,chiCorrected;
201
202  LRInv(float &, float &, float &, float chiCorrected);
203  double operator()(float p);
204};
205
206class LRInvMean: public DiffFunc {
207public:
208  float p,n,P,N;
209
210  LRInvMean(float, PRule, PRule, const int & targetClass);
211  double operator()(float pc);
212};
213
214
215class LRInvE: public DiffFunc {
216public:
217  float n,p,N,chiCorrected;
218
219  LRInvE(PRule, PRule, const int & targetClass, float chiCorrected);
220  double operator()(float P);
221};
222
223
224class ORANGE_API TRuleEvaluator_mEVC: public TRuleEvaluator {
225public:
226  __REGISTER_CLASS
227
228  float m; //P Parameter m for m-estimate after EVC correction
229  PEVDistGetter evDistGetter; //P get EVC distribution for chi correction
230  PVariable probVar;//P probability coverage variable (meta usually)
231  PRuleValidator validator; //P rule validator for best rule
232  int min_improved; //P minimal number of improved examples
233  float min_improved_perc; //P minimal percentage of improved examples
234  PRule bestRule; //P best rule found and evaluated given conditions (min_improved, validator)
235  float ruleAlpha; //P minimal 'true' rule significance
236  float attributeAlpha; //P minimal attribute significance
237  bool returnExpectedProb; //P if true, evaluator returns expected class probability, if false, current class probability
238  int optimismReduction; //P to select optimstic (0), pessimistic (1) or EVC (2) evaluation
239
240  TRuleEvaluator_mEVC();
241  TRuleEvaluator_mEVC(const int & m,  PEVDistGetter, PVariable, PRuleValidator, const int & min_improved, const float & min_improved_perc, const int & optimismReduction);
242  void reset();
243  bool ruleAttSignificant(PRule, PExampleTable, const int &, const int &targetClass, PDistribution, float &);
244  float chiAsimetryCorrector(const float &);
245  float evaluateRuleEVC(PRule rule, PExampleTable examples, const int & weightID, const int &targetClass, PDistribution apriori, const int & rLength, const float & aprioriProb);
246  float evaluateRulePessimistic(PRule rule, PExampleTable examples, const int & weightID, const int &targetClass, PDistribution apriori, const int & rLength, const float & aprioriProb);
247  float evaluateRuleM(PRule rule, PExampleTable examples, const int & weightID, const int &targetClass, PDistribution apriori, const int & rLength, const float & aprioriProb);
248  float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution );
249};
250
251class ORANGE_API TRuleEvaluator_LRS : public TRuleEvaluator {
252public:
253  __REGISTER_CLASS
254
255  PRuleList rules; //P
256  bool storeRules; //P
257
258  TRuleEvaluator_LRS(const bool & = false);
259  virtual float operator()(PRule, PExampleTable, const int &, const int &targetClass, PDistribution );
260};
261
262WRAPPER(RuleFinder)
263class ORANGE_API TRuleFinder : public TOrange {
264public:
265  __REGISTER_ABSTRACT_CLASS
266
267  PRuleValidator validator; //P
268  PRuleEvaluator evaluator; //P
269
270  virtual PRule operator()(PExampleTable, const int & =0, const int &targetClass = -1, PRuleList baseRules = PRuleList()) = 0;
271};
272
273
274WRAPPER(RuleBeamInitializer)
275class ORANGE_API TRuleBeamInitializer : public TOrange {
276public:
277  __REGISTER_ABSTRACT_CLASS
278
279  virtual PRuleList operator()(PExampleTable, const int &weightID, const int &targetClass, PRuleList baseRules, PRuleEvaluator, PDistribution apriori, PRule &bestRule) = 0;
280};
281
282
283class ORANGE_API TRuleBeamInitializer_Default : public TRuleBeamInitializer {
284public:
285  __REGISTER_CLASS
286
287  virtual PRuleList operator()(PExampleTable, const int &weightID, const int &targetClass, PRuleList baseRules, PRuleEvaluator, PDistribution apriori, PRule &bestRule);
288};
289
290
291WRAPPER(RuleBeamRefiner)
292class ORANGE_API TRuleBeamRefiner : public TOrange {
293public:
294  __REGISTER_ABSTRACT_CLASS
295
296  virtual PRuleList operator()(PRule rule, PExampleTable, const int &weightID, const int &targetClass = -1) = 0;
297};
298
299
300class ORANGE_API TRuleBeamRefiner_Selector : public TRuleBeamRefiner {
301public:
302  __REGISTER_CLASS
303
304  PDiscretization discretization; //P discretization for continuous attributes
305 
306  virtual PRuleList operator()(PRule rule, PExampleTable, const int &weightID, const int &targetClass = -1);
307};
308
309
310WRAPPER(RuleBeamCandidateSelector)
311class ORANGE_API TRuleBeamCandidateSelector : public TOrange {
312public:
313  __REGISTER_ABSTRACT_CLASS
314
315  virtual PRuleList operator()(PRuleList &existingRules, PExampleTable, const int &weightID) = 0;
316};
317
318
319class ORANGE_API TRuleBeamCandidateSelector_TakeAll : public TRuleBeamCandidateSelector {
320public:
321  __REGISTER_CLASS
322
323  virtual PRuleList operator()(PRuleList &existingRules, PExampleTable, const int &weightID);
324};
325
326
327WRAPPER(RuleBeamFilter)
328class ORANGE_API TRuleBeamFilter : public TOrange {
329public:
330  __REGISTER_ABSTRACT_CLASS
331
332  virtual void operator()(PRuleList &existingRules, PExampleTable, const int &weightID) = 0;
333};
334
335
336class ORANGE_API TRuleBeamFilter_Width : public TRuleBeamFilter {
337public:
338  __REGISTER_CLASS
339
340  int width; //P
341
342  TRuleBeamFilter_Width(const int &w = 5);
343
344  void operator()(PRuleList &rules, PExampleTable, const int &weightID);
345};
346
347
348
349class ORANGE_API TRuleBeamFinder : public TRuleFinder {
350public:
351  __REGISTER_CLASS
352
353  PRuleBeamInitializer initializer; //P
354  PRuleBeamRefiner refiner; //P
355  PRuleBeamCandidateSelector candidateSelector; //P
356  PRuleBeamFilter ruleFilter; //P
357  PRuleValidator ruleStoppingValidator; //P
358 
359  PRule operator()(PExampleTable, const int & =0, const int &targetClass = -1, PRuleList baseRules = PRuleList());
360};
361
362
363
364WRAPPER(RuleDataStoppingCriteria)
365class ORANGE_API TRuleDataStoppingCriteria : public TOrange {
366public:
367  __REGISTER_ABSTRACT_CLASS
368
369  virtual bool operator()(PExampleTable, const int &weightID, const int &targetClass) const = 0;
370};
371
372
373class ORANGE_API TRuleDataStoppingCriteria_NoPositives : public TRuleDataStoppingCriteria {
374public:
375  __REGISTER_CLASS
376
377  virtual bool operator()(PExampleTable, const int &weightID, const int &targetClass) const;
378};
379
380
381WRAPPER(RuleStoppingCriteria)
382class ORANGE_API TRuleStoppingCriteria : public TOrange {
383public:
384  __REGISTER_ABSTRACT_CLASS
385
386  virtual bool operator()(PRuleList, PRule, PExampleTable, const int &weightID) const = 0;
387};
388
389class ORANGE_API TRuleStoppingCriteria_NegativeDistribution : public TRuleStoppingCriteria {
390public:
391  __REGISTER_CLASS
392
393  virtual bool operator()(PRuleList, PRule, PExampleTable, const int &weightID) const;
394};
395
396WRAPPER(RuleCovererAndRemover)
397class ORANGE_API TRuleCovererAndRemover : public TOrange {
398public:
399  __REGISTER_ABSTRACT_CLASS
400
401   virtual PExampleTable operator()(PRule, PExampleTable, const int &weightID, int &newWeight, const int &targetClass) const = 0;
402};
403
404
405class ORANGE_API TRuleCovererAndRemover_Default : public TRuleCovererAndRemover {
406public:
407  __REGISTER_CLASS
408
409  virtual PExampleTable operator()(PRule, PExampleTable, const int &weightID, int &newWeight, const int &targetClass) const;
410};
411
412WRAPPER(RuleClassifierConstructor)
413WRAPPER(RuleClassifier)
414class ORANGE_API TRuleClassifierConstructor : public TOrange {
415public:
416  __REGISTER_ABSTRACT_CLASS
417
418  virtual PRuleClassifier operator()(PRuleList, PExampleTable, const int &weightID = 0) = 0;
419};
420
421
422class ORANGE_API TRuleClassifierConstructor_firstRule: public TRuleClassifierConstructor {
423  __REGISTER_CLASS
424  virtual PRuleClassifier operator()(PRuleList, PExampleTable, const int &weightID = 0);
425};
426
427
428WRAPPER(RuleLearner)
429class ORANGE_API TRuleLearner : public TLearner {
430public:
431  __REGISTER_CLASS
432 
433  PRuleDataStoppingCriteria dataStopping; //P
434  PRuleStoppingCriteria ruleStopping; //P
435  PRuleCovererAndRemover coverAndRemove; //P
436  PRuleFinder ruleFinder; //P
437  PRuleClassifierConstructor classifierConstructor; //P classifier
438
439  PProgressCallback progressCallback; //P progress callback function
440
441  bool storeExamples; //P
442  int targetClass; //P
443  PRuleList baseRules; //P
444
445  TRuleLearner(bool storeExamples = true, int targetClass = -1, PRuleList baseRules = PRuleList());
446
447  PClassifier operator()(PExampleGenerator, const int & =0);
448  PClassifier operator()(PExampleGenerator, const int &, const int &targetClass = -1, PRuleList baseRules = PRuleList());
449};
450
451
452
453class ORANGE_API TRuleClassifier : public TClassifier {
454public:
455  __REGISTER_ABSTRACT_CLASS
456
457  PRuleList rules; //P
458  PExampleTable examples; //P
459  int weightID; //P
460
461  TRuleClassifier();
462  TRuleClassifier(PRuleList rules, PExampleTable examples, const int &weightID = 0);
463
464  virtual PDistribution classDistribution(const TExample &ex) = 0;
465};
466
467// Zakaj moram se enkrat definirati konstruktor;
468class ORANGE_API TRuleClassifier_firstRule : public TRuleClassifier {
469public:
470  __REGISTER_CLASS
471
472  PDistribution prior; //P prior distribution
473
474  TRuleClassifier_firstRule();
475  TRuleClassifier_firstRule(PRuleList rules, PExampleTable examples, const int &weightID = 0);
476  virtual PDistribution classDistribution(const TExample &ex);
477};
478
479WRAPPER(LogitClassifierState)
480class ORANGE_API TLogitClassifierState : public TOrange {
481public:
482  __REGISTER_CLASS
483
484  PRuleList rules;
485  PExampleTable examples;
486  int weightID;
487
488  float eval, **f, **p, *betas, *priorBetas;
489  bool *isExampleFixed;
490  PFloatList avgProb, avgPriorProb;
491  PIntList *ruleIndices, prefixRules;
492
493  TLogitClassifierState(PRuleList, PExampleTable, const int &);
494  TLogitClassifierState(PRuleList,const PDistributionList &,PExampleTable,const int &);
495  ~TLogitClassifierState();
496  void updateExampleP(int);
497  void computePs(int);
498  void setFixed(int);
499  void updateFixedPs(int);
500  void setPrefixRule(int);
501  void computeAvgProbs();
502  void computePriorProbs();
503  void copyTo(PLogitClassifierState &);
504  void newBeta(int, float);
505  void newPriorBeta(int, float);
506  float getAUC();
507  float getBrierScore();
508};
509
510class ORANGE_API TRuleClassifier_logit : public TRuleClassifier {
511public:
512  __REGISTER_CLASS
513
514  PDistribution prior; //P prior distribution
515  PDomain domain; //P Domain
516  PFloatList ruleBetas; //P Rule betas
517  float minStep; //P minimal step value
518  float minSignificance; //P minimum requested significance for betas.
519  float minBeta; //P minimum beta by rule to be included in the model.
520  bool setPrefixRules; // P should we order prefix rules ?
521  bool optimizeBetasFlag; // P should we assign betas to rules ?
522
523  PClassifier priorClassifier; //P prior classifier used if provided
524  PLogitClassifierState currentState;
525  bool *skipRule;
526  PFloatList wsd, wavgCov, wSatQ, wsig; // standard deviations of rule quality
527  PRuleList prefixRules; //P rules that trigger before logit sum.
528
529  TRuleClassifier_logit();
530  TRuleClassifier_logit(PRuleList rules, const float &minSignificance, const float &minBeta, PExampleTable examples, const int &weightID = 0, const PClassifier &classifer = NULL, const PDistributionList &probList = NULL, bool setPrefixRules = false, bool optimizeBetasFlag = true);
531
532  void initialize(const PDistributionList &);
533  void updateRuleBetas(float step);
534  void optimizeBetas();
535  void updateRuleBetas_old(float step_);
536  bool setBestPrefixRule();
537  void correctPriorBetas(float & step);
538  void stabilizeAndEvaluate(float & step, int rule_index);
539  float getRuleLoss(int &);
540 
541  void addPriorClassifier(const TExample &, double *);
542  virtual PDistribution classDistribution(const TExample &ex);
543};
544
545#endif
Note: See TracBrowser for help on using the repository browser.