source: orange/source/orange/trindex.cpp @ 11703:9b8d8ab7820c

Revision 11703:9b8d8ab7820c, 14.1 KB checked in by janezd <janez.demsar@…>, 7 months ago (diff)

Removed the GPL copyright notice from all files except orangeqt.

Line 
1// to include Python.h before STL defines a template set (doesn't work with VC 6.0)
2#include "garbage.hpp"
3
4#include <math.h>
5#include <algorithm>
6#include <set>
7
8#include "stladdon.hpp"
9#include "random.hpp"
10
11#include "vars.hpp"
12#include "domain.hpp"
13#include "examples.hpp"
14#include "examplegen.hpp"
15
16#include "trindex.ppp"
17
18
19class rsrgen {
20public:
21  PRandomGenerator randomGenerator;
22
23  rsrgen(const int &seed)
24  : randomGenerator(PRandomGenerator(mlnew TRandomGenerator((unsigned long)(seed>=0 ? seed : 0))))
25  {}
26
27  rsrgen(PRandomGenerator rgen)
28  : randomGenerator(rgen ? rgen : PRandomGenerator(mlnew TRandomGenerator()))
29  {}
30
31  rsrgen(PRandomGenerator rgen, const int &seed)
32  : randomGenerator(rgen ? rgen : PRandomGenerator(mlnew TRandomGenerator((unsigned long)(seed>=0 ? seed : 0))))
33  {}
34
35  int operator()(int n)
36  { return randomGenerator->randint(n); }
37};
38
39
40TMakeRandomIndices::TMakeRandomIndices(const int &astratified, const int &arandseed)
41: stratified(astratified),
42  randseed(arandseed),
43  randomGenerator()
44{}
45
46
47TMakeRandomIndices::TMakeRandomIndices(const int &astratified, PRandomGenerator randgen)
48: stratified(astratified),
49  randseed(-1),
50  randomGenerator(randgen)
51{}
52
53
54
55TMakeRandomIndices2::TMakeRandomIndices2(const float &ap0, const int &astratified, const int &arandseed)
56: TMakeRandomIndices(astratified, arandseed),
57  p0(ap0)
58{}
59
60
61TMakeRandomIndices2::TMakeRandomIndices2(const float &ap0, const int &astratified, PRandomGenerator randgen)
62: TMakeRandomIndices(astratified, randgen),
63  p0(ap0)
64{}
65
66
67PRandomIndices TMakeRandomIndices2::operator()(const int &n)
68{ return operator()(n, p0); }
69
70
71PRandomIndices TMakeRandomIndices2::operator()(const int &n, const float &p0)
72 { if (stratified==TMakeRandomIndices::STRATIFIED)
73     raiseError("cannot prepare stratified indices (no class values)");
74
75   if (!randomGenerator && (randseed<0))
76     raiseCompatibilityWarning("object always returns the same indices unless either 'randomGenerator' or 'randseed' is set");
77
78   PRandomIndices indices(mlnew TFoldIndices(n, 1));
79   TFoldIndices::iterator ii(indices->begin());
80
81   int no= (p0<=1.0) ? int(p0*n+0.5) : int(p0+0.5);
82   if (no>n) no=n;
83   while(no--)
84     *(ii++)=0;
85
86   rsrgen rg(randomGenerator, randseed);
87   or_random_shuffle(indices->begin(), indices->end(), rg);
88   return indices;
89 }
90
91
92PRandomIndices TMakeRandomIndices2::operator()(PExampleGenerator gen)
93 { return operator()(gen, p0); }
94
95
96PRandomIndices TMakeRandomIndices2::operator()(PExampleGenerator gen, const float &ap0)
97{ 
98  if (!gen)
99    raiseError("invalid example generator");
100
101   if (!randomGenerator && (randseed<0))
102     raiseCompatibilityWarning("object always returns the same indices unless either 'randomGenerator' or 'randseed' is set");
103
104  if (stratified==TMakeRandomIndices::NOT_STRATIFIED)
105    return operator()(gen->numberOfExamples(), ap0);
106
107  if (!gen->domain->classVar)
108    if (stratified==TMakeRandomIndices::STRATIFIED_IF_POSSIBLE)
109      return operator()(gen->numberOfExamples(), ap0);
110    else
111      raiseError("invalid example generator or class-less domain");
112
113  if (gen->domain->classVar->varType!=TValue::INTVAR)
114    if (stratified==TMakeRandomIndices::STRATIFIED_IF_POSSIBLE)
115      return operator()(gen->numberOfExamples(), ap0);
116    else
117      raiseError("cannot prepare stratified indices (non-discrete class values)");
118 
119  TExampleIterator ri=gen->begin();
120  if (!ri)
121    return PRandomIndices(mlnew TFoldIndices());
122 
123  typedef pair<int, int> pii; // index of example, class value
124  vector<pii> ricv;
125
126  for(int in=0; ri; ++ri)
127    if ((*ri).getClass().isSpecial()) {
128      if (stratified==TMakeRandomIndices::STRATIFIED_IF_POSSIBLE)
129        return operator()(gen->numberOfExamples(), ap0);
130      else
131        raiseError("cannot prepare stratified indices (undefined class value(s))");
132    }
133    else
134      ricv.push_back(pii(in++, (*ri).getClass()));
135
136  random_sort(ricv.begin(), ricv.end(),
137              predOn2nd<pair<int, int>, less<int> >(), predOn2nd<pair<int, int>, equal_to<int> >(),
138              rsrgen(randomGenerator, randseed));
139
140  float p0;
141  if (ap0>1.0) {
142    if (ap0>ricv.size())
143      raiseError("p0 is greater than the number of examples");
144    else
145      p0 = ap0/float(ricv.size());
146  }
147  else
148    p0 = ap0;
149
150  float p1 = 1-p0;
151  float rem = 0;
152
153  PRandomIndices indices(mlnew TFoldIndices());
154  indices->resize(ricv.size());
155  ITERATE(vector<pii>, ai, ricv)
156    if (rem<=0) { 
157      indices->at((*ai).first) = 1;
158      rem += p0;
159    }
160    else {
161      indices->at((*ai).first) = 0;
162      rem -= p1;
163    }
164  // E.g., if p0 is two times p1, two 0's will cancel one 1.
165
166  return indices;
167}
168
169
170
171TMakeRandomIndicesN::TMakeRandomIndicesN(const int &astrat, const int &randseed)
172: TMakeRandomIndices(astrat, randseed)
173{}
174
175
176TMakeRandomIndicesN::TMakeRandomIndicesN(const int &astrat, PRandomGenerator randgen)
177: TMakeRandomIndices(astrat, randgen)
178{}
179
180
181TMakeRandomIndicesN::TMakeRandomIndicesN(PFloatList ap, const int &astrat, const int &randseed)
182: TMakeRandomIndices(astrat, randseed),
183  p(ap)
184{}
185
186
187TMakeRandomIndicesN::TMakeRandomIndicesN(PFloatList ap, const int &astrat, PRandomGenerator randgen)
188: TMakeRandomIndices(astrat, randgen),
189  p(ap)
190{}
191
192
193/*  Prepares a vector of given size and with given distribution of elements. Distribution is given as a vector of
194    floats and the constructor prepares a vector with elements from 0 to p.size() (therefore p.size()+1 elements
195    with the last one having probability 1-sum(p)). */
196PRandomIndices TMakeRandomIndicesN::operator()(const int &n)
197{ checkProperty(p); // although it is checked later, a better diagnostics can be given here
198  return operator()(n, p); }
199
200
201PRandomIndices TMakeRandomIndicesN::operator()(PExampleGenerator gen)
202{ checkProperty(p); // although it is checked later, a better diagnostics can be given here
203  return operator()(gen->numberOfExamples(), p); }
204
205
206PRandomIndices TMakeRandomIndicesN::operator()(PExampleGenerator gen, PFloatList ap)
207{ return operator()(gen->numberOfExamples(), ap); }
208
209
210PRandomIndices TMakeRandomIndicesN::operator()(const int &n, PFloatList ap)
211{ 
212  if (!ap || !ap->size())
213    raiseError("'p' not defined or empty");
214
215  if (!randomGenerator && (randseed<0))
216    raiseCompatibilityWarning("object always returns the same indices unless either 'randomGenerator' or 'randseed' is set");
217 
218  float sum = 0;
219  bool props = true;
220  for(TFloatList::const_iterator pis(ap->begin()), pie(ap->end()); pis!=pie; pis++) {
221    sum += *pis;
222    if (*pis > 1.0)
223      props = false;
224  }
225
226  if (props) {
227    if (sum>=1.0)
228      raiseError("elements of 'p' sum to 1 or more");
229  }
230  else {
231    if (sum>n)
232      raiseError("elements of 'p' sum to more than number of examples");
233  } 
234
235  if (stratified==TMakeRandomIndices::STRATIFIED)
236    raiseError("stratification not implemented");
237
238  PRandomIndices indices(mlnew TFoldIndices(n, ap->size()));
239  TFoldIndices::iterator ii(indices->begin()), ie(indices->end());
240  int no, ss=-1;
241  PITERATE(TFloatList, pi, ap)
242    for(ss++, no = props ? int(*pi*n+0.5) : int(*pi+0.5); no-- && (ii!=ie); *(ii++)=ss);
243
244  rsrgen rg(randomGenerator, randseed);
245  or_random_shuffle(indices->begin(), indices->end(), rg);
246
247  return indices;
248}
249
250
251// Prepares a vector of indices for f-fold cross validation with n examples
252TMakeRandomIndicesCV::TMakeRandomIndicesCV(const int &afolds, const int &astratified, const int &arandseed)
253: TMakeRandomIndices(astratified, arandseed),
254  folds(afolds)
255{}
256
257
258TMakeRandomIndicesCV::TMakeRandomIndicesCV(const int &afolds, const int &astratified, PRandomGenerator randgen)
259: TMakeRandomIndices(astratified, randgen),
260  folds(afolds)
261{}
262
263
264PRandomIndices TMakeRandomIndicesCV::operator()(const int &n)
265{ return operator()(n, folds); }
266
267
268PRandomIndices TMakeRandomIndicesCV::operator()(const int &n, const int &afolds)
269{ 
270  if (stratified==TMakeRandomIndices::STRATIFIED)
271    raiseError("cannot prepare stratified indices (no class values)");
272
273  if (!randomGenerator && (randseed<0))
274    raiseCompatibilityWarning("object always returns the same indices unless either 'randomGenerator' or 'randseed' is set");
275
276  if (n<=0)
277    raiseError("unknown number of examples");
278
279  if (afolds<=0)
280    raiseError("invalid number of folds");
281
282  PRandomIndices indices(mlnew TFoldIndices(n, afolds-1));
283
284  TFoldIndices::iterator ii=indices->begin();
285  for(int ss=0; ss<afolds; ss++)
286    for(int no=n/afolds+(ss<n%afolds ? 1 : 0); no--; *(ii++)=ss);
287
288  rsrgen rg(randomGenerator, randseed);
289  or_random_shuffle(indices->begin(), indices->end(), rg);
290
291  return indices;
292}
293
294
295PRandomIndices TMakeRandomIndicesCV::operator()(PExampleGenerator gen)
296{ return operator()(gen, folds); }
297
298
299PRandomIndices TMakeRandomIndicesCV::operator()(PExampleGenerator gen, const int &afolds)
300{
301  if (!gen)
302    raiseError("invalid example generator");
303
304  if (afolds<=0)
305    raiseError("invalid number of folds");
306
307
308  if (stratified==TMakeRandomIndices::NOT_STRATIFIED)
309    return operator()(gen->numberOfExamples(), afolds);
310
311  if (!gen->domain->classVar)
312    if (stratified==TMakeRandomIndices::STRATIFIED_IF_POSSIBLE)
313      return operator()(gen->numberOfExamples(), afolds);
314    else
315      raiseError("invalid example generator or class-less domain");
316
317  if (gen->domain->classVar->varType!=TValue::INTVAR)
318    if (stratified==TMakeRandomIndices::STRATIFIED_IF_POSSIBLE)
319      return operator()(gen->numberOfExamples(), afolds);
320    else
321      raiseError("cannot prepare stratified indices (non-discrete class values)");
322   
323  if (!randomGenerator && (randseed<0))
324    raiseCompatibilityWarning("object always returns the same indices unless either 'randomGenerator' or 'randseed' is set");
325
326  TExampleIterator ri=gen->begin();
327  if (!ri)
328    return PRandomIndices(mlnew TFoldIndices());
329
330  typedef pair<int, int> pii; // index of example, class value
331  vector<pii> ricv;
332  for(int in=0; ri; ++ri) 
333    if ((*ri).getClass().isSpecial()) {
334      if (stratified==TMakeRandomIndices::STRATIFIED_IF_POSSIBLE)
335        return operator()(gen->numberOfExamples(), afolds);
336      else
337        raiseError("cannot prepare stratified indices (undefined class value(s))");
338    }
339    else
340      ricv.push_back(pii(in++, (*ri).getClass()));
341
342  random_sort(ricv.begin(), ricv.end(),
343              predOn2nd<pair<int, int>, less<int> >(), predOn2nd<pair<int, int>, equal_to<int> >(),
344              rsrgen(randomGenerator, randseed));
345
346  PRandomIndices indices(mlnew TFoldIndices());
347  indices->resize(ricv.size());
348  int gr=0;
349  ITERATE(vector<pii>, ai, ricv) {
350    indices->at((*ai).first)=gr++;
351    gr=gr%afolds;
352  }
353
354  return indices;
355};
356
357
358class TRndIndCls 
359{ public: 
360   int rnd, ind, cls;
361   TRndIndCls(const int &ar, const int &ai, const int &ac)
362     : rnd(ar), ind(ai), cls(ac)
363     {}
364};
365
366bool compareRnd(const TRndIndCls &fr, const TRndIndCls &sc)
367{ return fr.rnd<sc.rnd; }
368
369bool compareCls(const TRndIndCls &fr, const TRndIndCls &sc)
370{ return fr.cls<sc.cls; }
371
372
373void sortedRndIndCls(PExampleGenerator gen, vector<long> rands, vector<TRndIndCls> ricv)
374{
375  TExampleIterator ri=gen->begin();
376  if (!ri)
377    raiseError("no examples");
378 
379  char vt=(*ri).getClass().varType;
380  if (vt!=TValue::INTVAR)
381    raiseError("cannot perform stratified cross-validation for non-discrete classes");
382
383  ricv.clear();
384  vector<long>::const_iterator rndi(rands.begin()), endi(rands.end());
385
386  for(int in=0; ri; ++ri) {
387    if ((*ri).getClass().isSpecial())
388      raiseError("cannot perform stratified cross-validation when examples have undefined class values");
389
390    ricv.push_back(TRndIndCls(*(rndi++),  in++, (*ri).getClass()));
391    if (rndi==endi) rndi=rands.begin();
392  }
393
394  sort(ricv.begin(), ricv.end(), compareRnd);
395  stable_sort(ricv.begin(), ricv.end(), compareCls);
396}
397
398
399
400TMakeRandomIndicesMultiple::TMakeRandomIndicesMultiple(const float &ap0, const int &astratified, const int &arandseed)
401: TMakeRandomIndices(astratified, arandseed),
402  p0(ap0)
403{}
404
405
406TMakeRandomIndicesMultiple::TMakeRandomIndicesMultiple(const float &ap0, const int &astratified, PRandomGenerator randgen)
407: TMakeRandomIndices(astratified, randgen),
408  p0(ap0)
409{}
410
411
412PRandomIndices TMakeRandomIndicesMultiple::operator()(const int &n)
413{ return operator()(n, p0); }
414
415
416PRandomIndices TMakeRandomIndicesMultiple::operator()(const int &n, const float &p0)
417 {
418   if (stratified==TMakeRandomIndices::STRATIFIED)
419     raiseError("cannot prepare stratified indices (no class values)");
420
421   if (!randomGenerator && (randseed<0))
422     raiseCompatibilityWarning("object always returns the same indices unless either 'randomGenerator' or 'randseed' is set");
423
424   int no= (p0<=1.0) ? int(p0*n+0.5) : int(p0+0.5);
425   rsrgen rg(randomGenerator, randseed);
426   PRandomIndices indices(mlnew TFoldIndices(no, 1));
427   PITERATE(TFoldIndices, ii, indices)
428     *ii=rg(n);
429   return indices;
430 }
431
432
433PRandomIndices TMakeRandomIndicesMultiple::operator()(PExampleGenerator gen)
434{ return operator()(gen, p0); }
435
436
437PRandomIndices TMakeRandomIndicesMultiple::operator()(PExampleGenerator gen, const float &ap0)
438{ 
439  if (stratified==TMakeRandomIndices::NOT_STRATIFIED)
440     return operator()(gen->numberOfExamples(), ap0);
441
442  if (!randomGenerator && (randseed<0))
443    raiseCompatibilityWarning("object always returns the same indices unless either 'randomGenerator' or 'randseed' is set");
444
445  TExampleIterator ri=gen->begin();
446  if (!ri)
447    raiseError("no examples");
448 
449  if (gen->domain->classVar->varType!=TValue::INTVAR)
450     raiseError("cannot prepare stratified indices (non-discrete class values)");
451
452  vector<vector<int> > byclasses=vector<vector<int> >(gen->domain->classVar->noOfValues(), vector<int>());
453  long nexamples=0;
454  PEITERATE(ei, gen)
455    if ((*ei).getClass().isSpecial())
456      raiseError("cannot prepare stratified indices (undefined class value(s))");
457    else
458      byclasses[(*ei).getClass().intV].push_back(nexamples++);
459
460  int no= (p0<=1.0) ? int(p0*nexamples+0.5) : int(p0+0.5);
461  rsrgen rg(randomGenerator, randseed);
462
463  PRandomIndices indices(mlnew TFoldIndices());
464 
465  ITERATE(vector<vector<int> >, clsi, byclasses) {
466    int texamples=(*clsi).size();
467    for(int i=0, ie=int(0.5 + no * (float(texamples)/nexamples)); i<ie; i++)
468      indices->push_back((*clsi)[rg(texamples)]);
469  }
470
471  if (int(indices->size())>no)
472    indices->erase(indices->begin()+no);
473  else 
474    while (int(indices->size())<no)
475      indices->push_back(rg(nexamples));
476
477  or_random_shuffle(indices->begin(), indices->end(), rg);
478
479  return indices;
480}
481
Note: See TracBrowser for help on using the repository browser.