source: orange/source/orange/assoc.hpp @ 10960:2bb772691bd8

Revision 10960:2bb772691bd8, 11.5 KB checked in by Ales Erjavec <ales.erjavec@…>, 21 months ago (diff)

Fixes for clang compiler support.

Added 'TOrangeVector::classDescription()' specialization to
'DEFINE_TOrangeVector_classDescription' macro and removed its default implementation
in the template class (static 'st_classDescription' member must be initialized/specialized before
its use).

A similar problem arises in the included '.ppp' files where st_classDescription is used. So I
moved all '.ppp. includes after the "DEFINE_*"s.

Removed unnecessary/duplicate VWRAPPER/WRAPPER uses.

Added 'this->...' qualifier in template classes (that inherit a from a template class) for
accessing base class methods, otherwise clang does not find them (as per C++ standard).

Moved the TVarList and TAttributed*List definitions/implementations from 'orvector' to 'vars'.
Fixed includes in other files using the two headers accordingly.

Fixed some casts to int with more appropriate casts to size_t/uintptr_t.

Added explicit casts of constants to template class in include/stat.hpp
statistics functions.

(references #1219).

Line 
1/*
2    This file is part of Orange.
3       
4    Copyright 1996-2010 Faculty of Computer and Information Science, University of Ljubljana
5    Contact: janez.demsar@fri.uni-lj.si
6
7    Orange is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11
12    Orange is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with Orange.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21
22#ifndef _ASSOC_HPP
23#define _ASSOC_HPP
24
25#include "stladdon.hpp"
26
27#include "vars.hpp"
28#include "domain.hpp"
29#include "examples.hpp"
30#include "classify.hpp"
31#include "learn.hpp"
32#include "orvector.hpp"
33
34WRAPPER(Example)
35WRAPPER(ExampleTable)
36
37class ORANGE_API TAssociationRule : public TOrange {
38public:
39  __REGISTER_CLASS
40
41  PExample left; //PR left side of the rule
42  PExample right; //PR right side of the rule
43  float support; //P support for the rule
44  float confidence; //P confidence of the rule
45  float coverage; //P rule's coverage
46  float strength; //P rule's strength
47  float lift; //P rule's lift
48  float leverage; //P rule's leverage
49  float nAppliesLeft; //P number of examples covered by the rule's left side
50  float nAppliesRight; //P number of examples covered by the rule's right side
51  float nAppliesBoth; //P number of examples covered by the rule
52  float nExamples; //P number of learning examples
53  int nLeft; //PR number of items on the rule's left side
54  int nRight; //PR number of items on the rule's right side
55
56  PExampleTable examples; //PR examples which the rule was built from
57  PIntList matchLeft; //PR indices of examples that match the left side of the rule
58  PIntList matchBoth; //PR indices to examples that match both sides of the rule
59
60  TAssociationRule(PExample, PExample);
61
62  TAssociationRule(PExample al, PExample ar,
63                   const float &napLeft, const float &napRight, const float &napBoth, const float &nExamples,
64                   int anleft=-1, int anright=-1);
65
66  virtual bool operator ==(const TAssociationRule &other) const
67  { return (left->operator==(other.left.getReference())) && (right->operator==(other.right.getReference())); }
68
69  static bool applies(const TExample &ex, const PExample &side);
70
71  bool appliesLeft(const TExample &ex) const
72  { return applies(ex, left); }
73
74  bool appliesRight(const TExample &ex) const
75  { return applies(ex, right); }
76
77  bool appliesBoth(const TExample &ex) const
78  { return applies(ex, left) && applies(ex, right); }
79
80  static int countItems(PExample ex);
81};
82
83WRAPPER(AssociationRule)
84
85#ifdef _MSC_VER
86  ORANGE_EXTERN template class ORANGE_API TOrangeVector<PAssociationRule>;
87#endif
88
89#define TAssociationRules TOrangeVector<PAssociationRule>
90VWRAPPER(AssociationRules)
91
92
93class TItemSetNode;
94
95/* These objects are collected in TExampleSets, lists of examples that correspond to a particular tree node.
96   'example' is a unique example id (basically its index in the original dataset)
97   'weight' is the example's weight. */
98class TExWei {
99public:
100  int example;
101  float weight;
102
103  TExWei(const int &ex, const float &wei)
104  : example(ex),
105    weight(wei)
106  {}
107};
108
109/* This is a set of examples, used to list the examples that support a particular tree node */
110typedef vector<TExWei> TExampleSet;
111
112
113/* A tree element that corresponds to an attribute value (ie, TItemSetNode has as many
114   TlItemSetValues as there are values that appear in itemsets.
115   For each value, we have the 'examples' that support it, the sum of their weights
116   ('support') and the branch that contains more specialized itemsets. */
117class TItemSetValue {
118public:
119  int value;
120  TItemSetNode *branch;
121
122  float support;
123  TExampleSet examples;
124
125  // This constructor is called when building the 1-tree
126  TItemSetValue(int al);
127
128  // This constructor is called when itemsets are intersected (makePairs ets)
129  TItemSetValue(int al, const TExampleSet &ex, float asupp);
130
131  ~TItemSetValue();
132  void sumSupport();
133};
134
135
136/* TItemSetNode splits itemsets according to the value of attribute 'attrIndex';
137   each element of 'values' corresponds to an attribute value (not necessarily to all,
138   but only to those values that appear in itemsets).
139   Itemsets for which the value is not defined are stored in a subtree in 'nextAttribute'.
140   This can be seen in TItemSetTree::findSupport that finds a node that corresponds to the
141   given itemset */
142class TItemSetNode {
143public:
144  int attrIndex;
145  TItemSetNode *nextAttribute;
146  vector<TItemSetValue> values;
147
148  // This constructor is called by 1-tree builder which initializes all values (and later reduces them)
149  TItemSetNode(PVariable var, int anattri);
150
151  // This constructor is called when extending the tree
152  TItemSetNode(int anattri);
153
154  ~TItemSetNode();
155};
156
157class TRuleTreeNode;
158
159
160class ORANGE_API TAssociationRulesInducer : public TOrange {
161public:
162  __REGISTER_CLASS
163
164  int maxItemSets; //P maximal number of itemsets (increase if you want)
165
166  float confidence; //P required confidence
167  float support; //P required support
168  bool classificationRules; //P if true, rules will have the class and only the class attribute on the right-hand side
169  bool storeExamples; //P if true, each rule is going to have tables with references to examples which match its left side or both sides
170
171public:
172
173  TAssociationRulesInducer(float asupp=0.3, float aconf=0.5);
174  PAssociationRules operator()(PExampleGenerator, const int &weightID = 0);
175
176  void buildTrees(PExampleGenerator, const int &weightID, TItemSetNode *&, int &depth, int &nOfExamples, TDiscDistribution &);
177  int  buildTree1(PExampleGenerator, const int &weightID, TItemSetNode *&, float &suppN, int &nOfExamples, TDiscDistribution &);
178  int  buildNext1(TItemSetNode *, int k, const float suppN);
179  int  makePairs (TItemSetNode *, const float suppN);
180
181  PAssociationRules generateClassificationRules(PDomain, TItemSetNode *tree, const int nOfExamples, const TDiscDistribution &);
182  void generateClassificationRules1(PDomain, TItemSetNode *root, TItemSetNode *node, TExample &left, const int nLeft, const float nAppliesLeft, PAssociationRules, const int nOfExamples, const TDiscDistribution &, TExampleSet *leftSet);
183
184  PAssociationRules generateRules(PDomain, TItemSetNode *, const int depth, const int nOfExamples);
185  void generateRules1(TExample &, TItemSetNode *root, TItemSetNode *node, int k, int oldk, PAssociationRules, const int nOfExamples);
186  void find1Rules(TExample &, TItemSetNode *, const float &support, int oldk, PAssociationRules, const int nOfExamples, const TExampleSet &bothSets);
187  TRuleTreeNode *buildTree1FromExample(TExample &, TItemSetNode *);
188  int generateNext1(TRuleTreeNode *ruleTree, TRuleTreeNode *node, TItemSetNode *itemsetsTree, TExample &right, int k, TExample &whole, const float &support, PAssociationRules, const int nOfExamples, const TExampleSet &bothSets);
189  int generatePairs(TRuleTreeNode *ruleTree, TRuleTreeNode *node, TItemSetNode *itemsetsTree, TExample &right, TExample &whole, const float &support, PAssociationRules, const int nOfExamples, const TExampleSet &bothSets);
190};
191
192WRAPPER(AssociationRulesInducer)
193
194
195
196
197class TSparseExample{
198public:
199    float weight;           // weight of thi example
200    long *itemset;      // vector storing just items that have some value in original example
201    int length;
202
203    TSparseExample(TExample *example, int weightID);
204  ~TSparseExample();
205  TSparseExample(const TSparseExample &);
206};
207
208
209class TSparseExamples{
210public:
211    float fullWeight;                   // weight of all examples
212    vector<TSparseExample*> transaction;    // vector storing all sparse examples
213    PDomain domain;                     // domain of original example or exampleGenerator
214    vector<long> intDomain;             // domain mapped longint values
215
216    TSparseExamples(PExampleGenerator examples, int weightID);
217    ~TSparseExamples();
218};
219
220
221class TSparseItemsetNode;
222typedef map<long, TSparseItemsetNode *> TSparseISubNodes;
223
224class TSparseItemsetNode{                           //item node used in TSparseItemsetTree
225public:
226    float weiSupp;                          //support of itemset consisting node and all of its parents
227    long value;                             //value of this node
228    TSparseItemsetNode *parent;                 //pointer to parent node
229    TSparseISubNodes subNode;               //children items
230    vector<int> exampleIds;
231
232    TSparseItemsetNode(long avalue = -1);           //constructor
233    ~TSparseItemsetNode();
234
235  TSparseItemsetNode *operator[] (long avalue); //directly gets subnode
236
237    TSparseItemsetNode* addNode(long avalue);       //adds new subnode
238    bool hasNode(long avalue);              //returns true if has subnode with given value
239};
240
241
242class TSparseItemsetTree : TOrange {                            //item node used in TSparseItemsetTree
243public:
244    TSparseItemsetTree(const TSparseExamples &examples);            //constructor
245    ~TSparseItemsetTree();
246
247    int buildLevelOne(vector<long> intDomain);
248    long extendNextLevel(int maxDepth, long maxCount);
249    bool allowExtend(long itemset[], int iLength);
250    long countLeafNodes();
251    void considerItemset(long itemset[], int iLength, float weight, int aimLength);
252    void considerExamples(TSparseExamples *examples, int aimLength);
253  void assignExamples(TSparseItemsetNode *node, long *itemset, long *itemsetend, const int exampleId);
254  void assignExamples(TSparseExamples &examples);
255  void delLeafSmall(float minSupport);
256    PAssociationRules genRules(int maxDepth, float minConf, float nOfExamples, bool storeExamples);
257    long getItemsetRules(long itemset[], int iLength, float minConf,
258                         float nAppliesBoth, float nOfExamples, PAssociationRules rules, bool storeExamples, TSparseItemsetNode *bothNode);
259    PDomain domain;
260
261//private:
262    TSparseItemsetNode *root;
263};
264
265
266class ORANGE_API TAssociationRulesSparseInducer : public TOrange {
267public:
268  __REGISTER_CLASS
269
270  int maxItemSets; //P maximal number of itemsets (increase if you want)
271
272  float confidence; //P required confidence
273  float support; //P required support
274
275  bool storeExamples; //P stores examples corresponding to rules
276
277  TAssociationRulesSparseInducer(float asupp=0.3, float aconf=0, int awei=0);
278  TSparseItemsetTree *TAssociationRulesSparseInducer::buildTree(PExampleGenerator examples, const int &weightID, long &i, float &fullWeight);
279  PAssociationRules operator()(PExampleGenerator, const int &weightID);
280
281private:
282  float nOfExamples;
283};
284
285WRAPPER(AssociationRulesSparseInducer)
286
287
288WRAPPER(SparseItemsetTree)
289
290class ORANGE_API TItemsetsSparseInducer : public TOrange {
291public:
292  __REGISTER_CLASS
293
294  int maxItemSets; //P maximal number of itemsets (increase if you want)
295  float support; //P required support
296
297  bool storeExamples; //P stores examples corresponding to itemsets
298
299  TItemsetsSparseInducer(float asupp=0.3, int awei=0);
300  PSparseItemsetTree operator()(PExampleGenerator, const int &weightID);
301
302private:
303  float nOfExamples;
304};
305
306
307class ORANGE_API TAssociationLearner : public TLearner {
308public:
309  __REGISTER_CLASS
310
311  float confidence; //P required confidence
312  float support; //P required support
313  int voteWeight; //P vote weight (s=support, c=confidence, p=product)
314  int maxItemSets; //P maximal number of itemsets (increase if you want)
315
316  TAssociationLearner();
317  virtual PClassifier operator()(PExampleGenerator gen, const int & = 0);
318};
319
320
321class ORANGE_API TAssociationClassifier : public TClassifierFD {
322public:
323  __REGISTER_CLASS
324
325  PAssociationRules rules; //P association rules
326  int voteWeight; //P vote weight (s=support, c=confidence, p=product)
327
328  TAssociationClassifier(PDomain dom=PDomain(), PAssociationRules arules=PAssociationRules(), char avote='s');
329  virtual PDistribution classDistribution(const TExample &);
330};
331
332#endif
Note: See TracBrowser for help on using the repository browser.