source: orange/source/orange/discretize.hpp @ 6782:a99c9e8d1b58

Revision 6782:a99c9e8d1b58, 6.5 KB checked in by janezd <janez.demsar@…>, 4 years ago (diff)
  • fixed and removed the fix related to the number of decimals shown in descriptions of intervals
Line 
1/*
2    This file is part of Orange.
3   
4    Copyright 1996-2010 Faculty of Computer and Information Science, University of Ljubljana
5    Contact: janez.demsar@fri.uni-lj.si
6
7    Orange is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11
12    Orange is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with Orange.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21
22#ifndef __DISCRETIZE_HPP
23#define __DISCRETIZE_HPP
24
25#include <vector>
26using namespace std;
27
28#include "values.hpp"
29#include "transval.hpp"
30#include "domain.hpp"
31#include "distvars.hpp"
32
33WRAPPER(BasicAttrStat)
34
35WRAPPER(Discretization)
36WRAPPER(EquiDistDiscretizer)
37WRAPPER(IntervalDiscretizer)
38
39
40class ORANGE_API TDiscretization : public TOrange {
41public:
42  __REGISTER_ABSTRACT_CLASS
43
44  virtual PVariable operator()(PExampleGenerator, PVariable, const long &weightID=0)=0;
45  void setTransformer(PVariable var, PVariable evar);
46};
47
48
49class ORANGE_API TDiscretizer : public TTransformValue {
50public:
51  __REGISTER_ABSTRACT_CLASS
52
53  /* If you want to avoid rewrapping, you should write this as static and
54     pass the discretizer as PDiscretizer. */
55  virtual PVariable constructVar(PVariable, float mindiff = 1.0) = 0;
56
57  virtual void getCutoffs(vector<float> &cutoffs) const = 0;
58};
59
60WRAPPER(Discretizer)
61
62
63class ORANGE_API TDomainDiscretization : public TOrange {
64public:
65  __REGISTER_CLASS
66
67  PDiscretization discretization; //P discretization
68
69  TDomainDiscretization(PDiscretization = PDiscretization());
70  virtual PDomain operator()(PExampleGenerator, const long &weightID=0);
71
72protected:
73  PDomain equiDistDomain(PExampleGenerator gen);
74  PDomain equiNDomain(PExampleGenerator gen, const long &weightID=0);
75  PDomain otherDomain(PExampleGenerator gen, const long &weightID=0);
76};
77
78
79class ORANGE_API TEquiDistDiscretizer : public TDiscretizer {
80public:
81  __REGISTER_CLASS
82
83  int   numberOfIntervals; //P number of intervals
84  float firstCut; //P the first cut-off point
85  float step; //P step (width of interval)
86
87  TEquiDistDiscretizer(const int=-1, const float=-1.0, const float=-1.0);
88
89  virtual void transform(TValue &);
90  virtual PVariable constructVar(PVariable, float mindiff = 1.0);
91
92  virtual void getCutoffs(vector<float> &cutoffs) const;
93};
94
95
96class ORANGE_API TThresholdDiscretizer : public TDiscretizer {
97public:
98  __REGISTER_CLASS
99
100  float threshold; //P threshold
101
102  TThresholdDiscretizer(const float &threshold = 0.0);
103  virtual void transform(TValue &);
104
105  virtual PVariable constructVar(PVariable, float mindiff = 1.0);
106
107  virtual void getCutoffs(vector<float> &cutoffs) const;
108};
109
110
111class ORANGE_API TIntervalDiscretizer : public TDiscretizer  {
112public:
113  __REGISTER_CLASS
114
115  PFloatList points; //P cut-off points
116
117  TIntervalDiscretizer();
118  TIntervalDiscretizer(PFloatList apoints);
119  TIntervalDiscretizer(const string &boundaries);
120
121  virtual void      transform(TValue &);
122  PVariable constructVar(PVariable var, float mindiff = 1.0);
123
124  virtual void getCutoffs(vector<float> &cutoffs) const;
125};
126
127
128class ORANGE_API TBiModalDiscretizer : public TDiscretizer {
129public:
130  __REGISTER_CLASS
131
132  float low; //P low threshold
133  float high; //P high threshold
134
135  TBiModalDiscretizer(const float & = 0.0, const float & = 0.0);
136  virtual void transform(TValue &);
137  PVariable constructVar(PVariable var, float mindiff = 1.0);
138
139  virtual void getCutoffs(vector<float> &cutoffs) const;
140};
141
142
143
144
145class ORANGE_API TEquiDistDiscretization : public TDiscretization {
146public:
147  __REGISTER_CLASS
148
149  int numberOfIntervals; //P number of intervals
150
151  TEquiDistDiscretization(const int anumber=4);
152  virtual PVariable operator()(PExampleGenerator, PVariable, const long &weightID=0);
153  virtual PVariable operator()(PBasicAttrStat, PVariable) const;
154};
155
156
157
158
159
160class ORANGE_API TFixedDiscretization : public TDiscretization {
161public:
162  __REGISTER_CLASS
163
164  PFloatList points; //P cut-off points
165
166  TFixedDiscretization(TFloatList &apoints);
167  TFixedDiscretization(const string &boundaries);
168
169  virtual PVariable operator()(PExampleGenerator, PVariable, const long &weightID=0);
170};
171
172
173
174class ORANGE_API TEquiNDiscretization : public TDiscretization {
175public:
176  __REGISTER_CLASS
177
178  int numberOfIntervals; //P number of intervals
179  bool recursiveDivision; //P find cut-off points by recursive division (default = true)
180
181  TEquiNDiscretization(int anumber =4);
182  virtual PVariable operator()(const TContDistribution &, PVariable var) const;
183  virtual PVariable operator()(PExampleGenerator, PVariable, const long &weightID=0);
184
185  void cutoffsByMidpoints(PIntervalDiscretizer discretizer, const TContDistribution &distr, float &mindiff) const;
186  void cutoffsByCounting(PIntervalDiscretizer, const TContDistribution &, float &mindiff) const;
187  void cutoffsByDivision(PIntervalDiscretizer, const TContDistribution &, float &mindiff) const;
188  void cutoffsByDivision(const int &noInt, TFloatList &points, 
189                        map<float, float>::const_iterator fbeg, map<float, float>::const_iterator fend,
190                        const float &N, float &mindiff) const;
191};
192
193
194
195class TSimpleRandomGenerator;
196
197class ORANGE_API TEntropyDiscretization : public TDiscretization {
198public:
199  __REGISTER_CLASS
200
201  int maxNumberOfIntervals; //P maximal number of intervals; default = 0 (no limits)
202  bool forceAttribute; //P minimal number of intervals; default = 0 (no limits)
203
204  TEntropyDiscretization();
205  typedef map<float, TDiscDistribution> TS;
206
207  virtual PVariable operator()(PExampleGenerator, PVariable, const long &weightID = 0);
208  virtual PVariable operator()(const TS &, const TDiscDistribution &, PVariable, const long &weightID, TSimpleRandomGenerator &rgen) const;
209
210protected:
211  void divide(const TS::const_iterator &, const TS::const_iterator &, const TDiscDistribution &,
212              float entropy, int k, vector<pair<float, float> > &, TSimpleRandomGenerator &rgen, float &mindiff) const;
213};
214
215
216
217
218class ORANGE_API TBiModalDiscretization : public TDiscretization {
219public:
220  __REGISTER_CLASS
221
222  bool splitInTwo; //P if true (default), flanks are merged into a single interval
223
224  TBiModalDiscretization(const bool = true);
225  virtual PVariable operator()(PExampleGenerator, PVariable, const long &weightID=0);
226};
227
228
229#endif
Note: See TracBrowser for help on using the repository browser.