source: orange/source/orange/lib_preprocess.cpp @ 6531:57bdc92cd8e9

Revision 6531:57bdc92cd8e9, 53.0 KB checked in by janezd <janez.demsar@…>, 4 years ago (diff)
  • changed licenses to GPL 3.0
Line 
1/*
2    This file is part of Orange.
3   
4    Copyright 1996-2010 Faculty of Computer and Information Science, University of Ljubljana
5    Contact: janez.demsar@fri.uni-lj.si
6
7    Orange is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11
12    Orange is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with Orange.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21
22#ifdef _MSC_VER
23  #pragma warning (disable : 4786 4114 4018 4267 4244)
24#endif
25
26#include "vars.hpp"
27#include "domain.hpp"
28#include "examples.hpp"
29#include "examplegen.hpp"
30#include "table.hpp"
31#include "classify.hpp"
32#include "estimateprob.hpp"
33#include "distvars.hpp"
34#include "distance.hpp"
35
36#include "cls_orange.hpp"
37#include "cls_value.hpp"
38#include "cls_example.hpp"
39#include "lib_kernel.hpp"
40#include "vectortemplates.hpp"
41#include "maptemplates.hpp"
42
43#include "converts.hpp"
44#include "slist.hpp"
45
46#include "externs.px"
47
48
49/* ************ DISCRETIZATION ************ */
50
51#include "discretize.hpp"
52
53
54ABSTRACT(Discretizer, TransformValue)
55C_NAMED(EquiDistDiscretizer, Discretizer, "([numberOfIntervals=, firstCut=, step=])")
56C_NAMED(IntervalDiscretizer, Discretizer, "([points=])")
57C_NAMED(ThresholdDiscretizer, Discretizer, "([threshold=])")
58C_NAMED(BiModalDiscretizer, Discretizer, "([low=, high=])")
59
60ABSTRACT(Discretization, Orange)
61C_CALL (EquiDistDiscretization, Discretization, "() | (attribute, examples[, weight, numberOfIntervals=]) -/-> Variable")
62C_CALL (   EquiNDiscretization, Discretization, "() | (attribute, examples[, weight, numberOfIntervals=]) -/-> Variable")
63C_CALL ( EntropyDiscretization, Discretization, "() | (attribute, examples[, weight]) -/-> Variable")
64C_CALL ( BiModalDiscretization, Discretization, "() | (attribute, examples[, weight]) -/-> Variable")
65
66
67PyObject *Discretization_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(attribute, examples[, weight]) -> Variable")
68{
69  PyTRY
70    NO_KEYWORDS
71
72    PyObject *variable;
73    PExampleGenerator egen;
74    int weightID=0;
75    if (!PyArg_ParseTuple(args, "OO&|O&", &variable, pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID)) 
76      PYERROR(PyExc_SystemError, "invalid parameters", PYNULL);
77
78    PVariable toDiscretize = varFromArg_byDomain(variable, egen->domain);
79    if (!toDiscretize)
80      return PYNULL; // varFromArg_byDomain has already set the error message
81
82    PVariable discr = SELF_AS(TDiscretization)(egen, toDiscretize, weightID);
83    if (!discr)
84      PYERROR(PyExc_SystemError, "discretization construction failed", PYNULL);
85
86    return WrapOrange(discr);
87  PyCATCH
88}
89
90
91PyObject *Discretizer_constructVariable(PyObject *self, PyObject *var) PYARGS(METH_O, "(variable) -> variable")
92{ PyTRY
93    if (!PyOrVariable_Check(var))
94      PYERROR(PyExc_TypeError, "invalid parameters (variable expected)", PYNULL);
95
96    return WrapOrange(PyOrange_AsDiscretizer(self)->constructVar(PyOrange_AsVariable(var)));
97  PyCATCH
98}
99
100
101PyObject *EquiDistDiscretizer_get_points(PyObject *self)
102{ PyTRY
103   CAST_TO(TEquiDistDiscretizer, edd);
104    int nint = edd->numberOfIntervals - 1;
105    PyObject *res = PyList_New(nint);
106    for(Py_ssize_t i = 0; i < nint; i++)
107      PyList_SetItem(res, i, PyFloat_FromDouble(edd->firstCut + i*edd->step));
108    return res;
109  PyCATCH
110}
111
112
113/* ************ FILTERS FOR REGRESSION ************** */
114
115#include "transval.hpp"
116
117C_NAMED(MapIntValue, TransformValue, "([mapping=])")
118C_NAMED(Discrete2Continuous, TransformValue, "([value=])")
119C_NAMED(Ordinal2Continuous, TransformValue, "([nvalues=])")
120C_NAMED(NormalizeContinuous, TransformValue, "([average=, span=])")
121
122C_NAMED(DomainContinuizer, Orange, "(domain|examples, convertClass=, invertClass=, zeroBased=, normalizeContinuous=, baseValueSelection=) -/-> Domain")
123
124int getTargetClass(PVariable classVar, PyObject *pyval)
125{
126  if (pyval) {
127    if (!classVar)
128      PYERROR(PyExc_TypeError, "cannot set target class value for class-less domain", -2);
129    if (classVar->varType != TValue::INTVAR)
130      PYERROR(PyExc_TypeError, "cannot set target value for non-discrete class", -2);
131
132    TValue targetValue;
133    if (!convertFromPython(pyval, targetValue, classVar))
134      return -2;
135    if (targetValue.isSpecial())
136      PYERROR(PyExc_TypeError, "unknown value passed as class target", -2)
137    else
138      return targetValue.intV;
139  }
140  return -1; // not an error, but undefined!
141}
142
143PyObject *DomainContinuizer_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(domain[, targetClass] | examples[, weightID, targetClass]) -> domain")
144{ 
145  PyTRY
146    NO_KEYWORDS
147
148    if (args && (PyTuple_GET_SIZE(args)<=2) && PyOrDomain_Check(PyTuple_GET_ITEM(args, 0))) {
149      PDomain domain;
150      PyObject *pyval = PYNULL;
151      if (!PyArg_ParseTuple(args, "O&|O", cc_Domain, &domain, &pyval))
152        return PYNULL;
153      int targetClass = getTargetClass(domain->classVar, pyval);
154      if (targetClass == -2)
155        return PYNULL;
156     
157      return WrapOrange(SELF_AS(TDomainContinuizer)(domain, targetClass));
158    }
159
160    PExampleGenerator egen;
161    int weightID = 0;
162    PyObject *pyval = PYNULL;
163    if (!PyArg_ParseTuple(args, "O&|O&O", pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &pyval))
164      PYERROR(PyExc_AttributeError, "DomainContinuizer.__call__: domain or examples (and, optionally, weight attribute) expected", PYNULL);
165
166    int targetClass = getTargetClass(egen->domain->classVar, pyval);
167    if (targetClass == -2)
168      return PYNULL;
169
170    //printf("%p-%p\n", self, ((TPyOrange *)self)->ptr);
171    return WrapOrange(SELF_AS(TDomainContinuizer)(egen, weightID, targetClass));
172
173  PyCATCH
174}
175
176/* ************ REDUNDANCIES ************ */
177
178#include "redundancy.hpp"
179
180ABSTRACT(RemoveRedundant, Orange)
181
182C_CALL(RemoveRedundantByInduction, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")
183C_CALL(RemoveRedundantByQuality, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")
184C_CALL(RemoveRedundantOneValue, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")
185
186C_CALL3(RemoveUnusedValues, RemoveUnusedValues, Orange, "([[attribute, ]examples[, weightId]]) -/-> attribute")
187
188PyObject *RemoveRedundant_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("([examples[, weightID][, suspicious]) -/-> Domain")
189{
190  PyTRY
191    NO_KEYWORDS
192
193    PExampleGenerator egen;
194    PyObject *suspiciousList=NULL;
195    int weight=0;
196    if (!PyArg_ParseTuple(args, "O&|OO&:RemoveRedundant.call", pt_ExampleGenerator, &egen, &suspiciousList, pt_weightByGen(egen), &weight))
197      return PYNULL;
198
199    TVarList suspiciousset;
200    if (suspiciousList)
201      if (!varListFromDomain(suspiciousList, egen->domain, suspiciousset))
202        return PYNULL;
203
204    PDomain newdomain = SELF_AS(TRemoveRedundant)(egen, suspiciousList ? &suspiciousset : NULL, NULL, weight);
205    return WrapOrange(newdomain);
206  PyCATCH
207}
208
209
210PyObject *RemoveRedundantOneValue_hasAtLeastTwoValues(PyObject *, PyObject *args) PYARGS(METH_VARARGS | METH_STATIC, "(attribute, examples) -> bool")
211{
212  PyTRY
213    PExampleGenerator gen;
214    PyObject *var;
215    if (!PyArg_ParseTuple(args, "O&O:RemoveRedundantOneValue.hasAtLeastTwoValues", pt_ExampleGenerator, &gen, &var))
216      return NULL;
217    int varIdx;
218    if (!varNumFromVarDom(var, gen->domain, varIdx))
219      PYERROR(PyExc_AttributeError, "RemoveRedundantOneValue.hasAtLeastTwoValues: invalid attribute", NULL);
220    return PyBool_FromLong(TRemoveRedundantOneValue::hasAtLeastTwo(gen, varIdx) ? 1 : 0);
221  PyCATCH
222}
223   
224PyObject *RemoveUnusedValues_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(attribute, examples[, weightId]) -> attribute")
225{
226  PyTRY
227    NO_KEYWORDS
228    CAST_TO(TRemoveUnusedValues, ruv);
229    bool storeOv = ruv->removeOneValued;
230
231    PExampleGenerator egen;
232    PVariable var;
233    int weightID = 0;
234    int removeOneValued = -1;
235    int checkClass = 0;
236
237    if (PyArg_ParseTuple(args, "O&O&|O&i:RemoveUnusedValues.call", cc_Variable, &var, pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &removeOneValued)) {
238      if (removeOneValued >= 0)
239        ruv->removeOneValued = removeOneValued != 0;
240      PyObject *res = WrapOrange(ruv->call(var, egen, weightID));
241      ruv->removeOneValued = storeOv;
242      return res;
243    }
244
245    PyErr_Clear();
246
247    if (PyArg_ParseTuple(args, "O&|O&ii:RemoveUnusedValues.call", pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &removeOneValued, &checkClass)) {
248      if (removeOneValued >= 0)
249        ruv->removeOneValued = removeOneValued != 0;
250      PyObject *res = WrapOrange(ruv->call(egen, weightID, checkClass != 0));
251      ruv->removeOneValued = storeOv;
252      return res;
253    }
254
255    PYERROR(PyExc_AttributeError, "RemoveUnusedValues.__call__: invalid arguments", PYNULL);
256
257  PyCATCH
258}
259
260
261/* ************ PREPROCESSORS ************ */
262
263#include "preprocessors.hpp"
264
265ABSTRACT(Preprocessor, Orange)
266
267C_CALL(Preprocessor_select, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
268C_CALL(Preprocessor_ignore, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
269
270C_CALL(Preprocessor_take, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
271C_CALL(Preprocessor_drop, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
272C_CALL(Preprocessor_removeDuplicates, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
273C_CALL(Preprocessor_takeMissing, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
274C_CALL(Preprocessor_dropMissing, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
275C_CALL(Preprocessor_takeMissingClasses, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
276C_CALL(Preprocessor_dropMissingClasses, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
277
278C_CALL(Preprocessor_shuffle, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
279
280C_CALL(Preprocessor_addMissing, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
281C_CALL(Preprocessor_addMissingClasses, Preprocessor, "([examples[, weightID]] [classMissing=<float>]) -/-> ExampleTable")
282C_CALL(Preprocessor_addNoise, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
283C_CALL(Preprocessor_addClassNoise, Preprocessor, "([examples[, weightID]] [proportion=<float>]) -/-> ExampleTable")
284C_CALL(Preprocessor_addGaussianNoise, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
285C_CALL(Preprocessor_addGaussianClassNoise, Preprocessor, "([examples[, weightID]] [deviation=<float>]) -/-> ExampleTable")
286
287C_CALL(Preprocessor_addClassWeight, Preprocessor, "([examples[, weightID]] [equalize=, classWeights=) -/-> ExampleTable")
288C_CALL(Preprocessor_addCensorWeight, Preprocessor, "([examples[, weightID]] [method=0-km, 1-nmr, 2-linear, outcomeVar=, eventValue=, timeID=, maxTime=]) -/-> ExampleTable")
289
290C_CALL(Preprocessor_filter, Preprocessor, "([examples[, weightID]] [filter=]) -/-> ExampleTable")
291C_CALL(Preprocessor_imputeByLearner, Preprocessor, "([examples[, weightID]] [learner=]) -/-> ExampleTable")
292C_CALL(Preprocessor_discretize, Preprocessor, "([examples[, weightID]] [notClass=, method=, attributes=<list-of-strings>]) -/-> ExampleTable")
293
294C_NAMED(ImputeClassifier, Classifier, "([classifierFromVar=][imputer=])")
295
296PyObject *Preprocessor_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(examples[, weightID]) -> ExampleTable")
297{ 
298  PyTRY
299    NO_KEYWORDS
300
301    int weightID=0;
302    PExampleGenerator egen = exampleGenFromArgs(args, weightID);
303    if (!egen)
304      PYERROR(PyExc_TypeError, "attribute error (example generator expected)", PYNULL);
305    bool weightGiven = (weightID!=0);
306
307    int newWeight;
308    PExampleGenerator res = SELF_AS(TPreprocessor)(egen, weightID, newWeight);
309    PyObject *wrappedGen=WrapOrange(res);
310    return weightGiven || newWeight ? Py_BuildValue("Ni", wrappedGen, newWeight) : wrappedGen;
311  PyCATCH
312}
313
314
315PyObject *Preprocessor_selectionVector(PyObject *self, PyObject *args, PyObject *) PYARGS(METH_VARARGS, "(examples[, weightID])")
316{
317  PyTRY
318    int weightID = 0;
319    PExampleGenerator egen = exampleGenFromArgs(args, weightID);
320    if (!egen)
321      PYERROR(PyExc_TypeError, "attribute error (example generator expected)", PYNULL);
322
323    return WrapOrange(SELF_AS(TPreprocessor).selectionVector(egen, weightID));
324  PyCATCH
325}
326
327
328#include "stringvars.hpp"
329
330typedef MapMethods<PVariableFilterMap, TVariableFilterMap, PVariable, PValueFilter> TMM_VariableFilterMap;
331
332int VariableFilterMap_setitemlow(TVariableFilterMap *aMap, PVariable var, PyObject *pyvalue)
333{
334  PValueFilter value;
335  if (TMM_VariableFilterMap::_valueFromPython(pyvalue, value)) {
336    aMap->__ormap[var] = value;
337    return 0;
338  }
339
340  PyErr_Clear();
341
342  if (var->varType == TValue::FLOATVAR) {
343    float min, max;
344    if (!PyArg_ParseTuple(pyvalue, "ff:VariableFilterMap.__setitem__", &min, &max))
345      return -1;
346
347    aMap->__ormap[var] = (min<=max) ? mlnew TValueFilter_continuous(ILLEGAL_INT, min, max)
348                                   : mlnew TValueFilter_continuous(ILLEGAL_INT, max, min, true);
349    return 0;
350  }
351
352  if (var->varType == TValue::INTVAR) {
353    TValueFilter_discrete *vfilter = mlnew TValueFilter_discrete(ILLEGAL_INT, var);
354    PValueFilter wvfilter = vfilter;
355    TValueList &valueList = vfilter->values.getReference();
356
357    if (PyTuple_Check(pyvalue) || PyList_Check(pyvalue)) {
358      PyObject *iterator = PyObject_GetIter(pyvalue);
359      for(PyObject *item = PyIter_Next(iterator); item; item = PyIter_Next(iterator)) {
360        TValue value;
361        if (!convertFromPython(item, value, var)) {
362          Py_DECREF(item);
363          Py_DECREF(iterator);
364          return -1;
365        }
366        Py_DECREF(item);
367        if (value.isSpecial())
368          vfilter->acceptSpecial = 1;
369        else
370          valueList.push_back(value);
371      }
372      Py_DECREF(iterator);
373    }
374    else {
375      TValue value;
376      if (!convertFromPython(pyvalue, value, var))
377        return -1;
378      if (value.isSpecial())
379        vfilter->acceptSpecial = 1;
380      else
381        valueList.push_back(value);
382    }
383
384    aMap->__ormap[var] = wvfilter;
385    return 0;
386  }
387
388  if (var.is_derived_from(TStringVariable)) {
389    TValueFilter_stringList *vfilter = mlnew TValueFilter_stringList(ILLEGAL_INT, mlnew TStringList());
390    PValueFilter wvfilter = vfilter;
391    TStringList &values = vfilter->values.getReference();
392
393    if (PyTuple_Check(pyvalue) || PyList_Check(pyvalue)) {
394      PyObject *iterator = PyObject_GetIter(pyvalue);
395      int i = 0;
396      for(PyObject *item = PyIter_Next(iterator); item; item = PyIter_Next(iterator), i++) {
397        if (!PyString_Check(item)) {
398          PyErr_Format(PyExc_TypeError, "error at index %i, string expected", i);
399          Py_DECREF(item);
400          Py_DECREF(iterator);
401          return -1;
402        }
403        Py_DECREF(item);
404        values.push_back(PyString_AsString(item));
405      }
406      Py_DECREF(iterator);
407    }
408    else if (PyString_Check(pyvalue))
409      values.push_back(PyString_AsString(pyvalue));
410    else
411      PyErr_Format(PyExc_TypeError, "string or a list of strings expected", -1);
412
413    aMap->__ormap[var] = wvfilter;
414    return 0;
415  }
416   
417  PYERROR(PyExc_TypeError, "VariableFilterMap.__setitem__: unrecognized item type", -1);
418}
419
420
421template<>
422int TMM_VariableFilterMap::_setitemlow(TVariableFilterMap *aMap, PyObject *pykey, PyObject *pyvalue)
423{ PyTRY
424    PVariable var;
425    return TMM_VariableFilterMap::_keyFromPython(pykey, var) ? VariableFilterMap_setitemlow(aMap, var, pyvalue) : -1;
426  PyCATCH_1
427}
428
429
430template<>
431PyObject *TMM_VariableFilterMap::_setdefault(TPyOrange *self, PyObject *args)
432{ PyObject *pykey;
433  PyObject *deflt = Py_None;
434  if (!PyArg_ParseTuple(args, "O|O:get", &pykey, &deflt))
435    return PYNULL;
436
437  PVariable var;
438  if (!TMM_VariableFilterMap::_keyFromPython(pykey, var))
439    return PYNULL;
440
441  TVariableFilterMap *aMap = const_cast<TVariableFilterMap *>(PyOrange_AsVariableFilterMap(self).getUnwrappedPtr());
442 
443  iterator fi = aMap->find(var);
444  if (fi==aMap->end()) {
445    if (VariableFilterMap_setitemlow(aMap, var, deflt)<0)
446      return PYNULL;
447
448    // cannot return deflt here, since it is probably a string or tuple which was converted to ValueFilter
449    // we just reinitialize fi and let the function finish :)
450    fi = aMap->find(var);
451  }
452
453  return convertValueToPython((*fi).second);
454}
455
456
457PDistribution kaplanMeier(PExampleGenerator gen, const int &outcomeIndex, TValue &failValue, const int &timeIndex, const int &weightID);
458
459PyObject *kaplanMeier(PyObject *, PyObject *args) PYARGS(METH_VARARGS, "(examples, outcome attribute, fail value, time attribute[, weightID]) -> survival curve")
460{ PExampleGenerator egen;
461  PyObject *outcomevar, *timevar;
462  PyObject *pyfailvalue;
463  TValue failvalue;
464  int weightID = 0;
465  if (!PyArg_ParseTuple(args, "O&OOOO&:kaplanMeier", pt_ExampleGenerator, &egen, &outcomevar, &pyfailvalue, &timevar, pt_weightByGen(egen), &weightID))
466
467    return PYNULL;
468
469  int outcomeIndex, timeIndex;
470  if (outcomevar) {
471    if (!varNumFromVarDom(outcomevar, egen->domain, outcomeIndex)) 
472      PYERROR(PyExc_AttributeError, "outcome variable not found in domain", PYNULL);
473  }
474  else
475    if (egen->domain->classVar)
476      outcomeIndex = egen->domain->attributes->size();
477    else
478      PYERROR(PyExc_AttributeError, "'outcomeVar' not set and the domain is class-less", PYNULL);
479
480  PVariable ovar = egen->domain->getVar(outcomeIndex);
481
482  if (   !convertFromPython(pyfailvalue, failvalue, ovar)
483      || failvalue.isSpecial()
484      || (failvalue.varType != TValue::INTVAR))
485    PYERROR(PyExc_AttributeError, "invalid value for failure", PYNULL);
486
487  return WrapOrange(kaplanMeier(egen, outcomeIndex, failvalue, timeIndex, weightID));
488}
489
490
491// modified setitem to accept intervals/names of values
492INITIALIZE_MAPMETHODS(TMM_VariableFilterMap, &PyOrVariable_Type, &PyOrValueFilter_Type, _orangeValueFromPython<PVariable>, _orangeValueFromPython<PValueFilter>, _orangeValueToPython<PVariable>, _orangeValueToPython<PValueFilter>)
493
494PVariableFilterMap PVariableFilterMap_FromArguments(PyObject *arg) { return TMM_VariableFilterMap::P_FromArguments(arg); }
495PyObject *VariableFilterMap_FromArguments(PyTypeObject *type, PyObject *arg) { return TMM_VariableFilterMap::_FromArguments(type, arg); }
496PyObject *VariableFilterMap_new(PyTypeObject *type, PyObject *arg, PyObject *kwds) BASED_ON(Orange, "(items)") ALLOWS_EMPTY { return TMM_VariableFilterMap::_new(type, arg, kwds); }
497PyObject *VariableFilterMap_str(TPyOrange *self) { return TMM_VariableFilterMap::_str(self); }
498PyObject *VariableFilterMap_repr(TPyOrange *self) { return TMM_VariableFilterMap::_str(self); }
499PyObject *VariableFilterMap_getitem(TPyOrange *self, PyObject *key) { return TMM_VariableFilterMap::_getitem(self, key); }
500int       VariableFilterMap_setitem(TPyOrange *self, PyObject *key, PyObject *value) { return TMM_VariableFilterMap::_setitem(self, key, value); }
501Py_ssize_t       VariableFilterMap_len(TPyOrange *self) { return TMM_VariableFilterMap::_len(self); }
502int       VariableFilterMap_contains(TPyOrange *self, PyObject *key) { return TMM_VariableFilterMap::_contains(self, key); }
503
504PyObject *VariableFilterMap_has_key(TPyOrange *self, PyObject *key) PYARGS(METH_O, "(key) -> None") { return TMM_VariableFilterMap::_has_key(self, key); }
505PyObject *VariableFilterMap_get(TPyOrange *self, PyObject *args) PYARGS(METH_VARARGS, "(key[, default]) -> value") { return TMM_VariableFilterMap::_get(self, args); }
506PyObject *VariableFilterMap_setdefault(TPyOrange *self, PyObject *args) PYARGS(METH_VARARGS, "(key[, default]) -> value") { return TMM_VariableFilterMap::_setdefault(self, args); }
507PyObject *VariableFilterMap_clear(TPyOrange *self, PyObject *args) PYARGS(METH_NOARGS, "() -> None") { return TMM_VariableFilterMap::_clear(self); }
508PyObject *VariableFilterMap_keys(TPyOrange *self, PyObject *args) PYARGS(METH_NOARGS, "() -> keys") { return TMM_VariableFilterMap::_keys(self); }
509PyObject *VariableFilterMap_values(TPyOrange *self, PyObject *args) PYARGS(METH_NOARGS, "() -> values") { return TMM_VariableFilterMap::_values(self); }
510PyObject *VariableFilterMap_items(TPyOrange *self, PyObject *args) PYARGS(METH_NOARGS, "() -> items") { return TMM_VariableFilterMap::_items(self); }
511PyObject *VariableFilterMap_update(TPyOrange *self, PyObject *args) PYARGS(METH_O, "(items) -> None") { return TMM_VariableFilterMap::_update(self, args); }
512PyObject *VariableFilterMap__reduce__(TPyOrange *self, PyObject *) { return TMM_VariableFilterMap::_reduce(self); }
513
514
515typedef MapMethods<PVariableFloatMap, TVariableFloatMap, PVariable, float> TMM_VariableFloatMap;
516INITIALIZE_MAPMETHODS(TMM_VariableFloatMap, &PyOrVariable_Type, NULL, _orangeValueFromPython<PVariable>, _nonOrangeValueFromPython<float>, _orangeValueToPython<PVariable>, _nonOrangeValueToPython<float>);
517
518PVariableFloatMap PVariableFloatMap_FromArguments(PyObject *arg) { return TMM_VariableFloatMap::P_FromArguments(arg); }
519PyObject *VariableFloatMap_FromArguments(PyTypeObject *type, PyObject *arg) { return TMM_VariableFloatMap::_FromArguments(type, arg); }
520PyObject *VariableFloatMap_new(PyTypeObject *type, PyObject *arg, PyObject *kwds) BASED_ON(Orange, "(items)") ALLOWS_EMPTY { return TMM_VariableFloatMap::_new(type, arg, kwds); } 
521PyObject *VariableFloatMap_str(TPyOrange *self) { return TMM_VariableFloatMap::_str(self); }
522PyObject *VariableFloatMap_repr(TPyOrange *self) { return TMM_VariableFloatMap::_str(self); }
523PyObject *VariableFloatMap_getitem(TPyOrange *self, PyObject *key) { return TMM_VariableFloatMap::_getitem(self, key); }
524int       VariableFloatMap_setitem(TPyOrange *self, PyObject *key, PyObject *value) { return TMM_VariableFloatMap::_setitem(self, key, value); }
525Py_ssize_t       VariableFloatMap_len(TPyOrange *self) { return TMM_VariableFloatMap::_len(self); }
526int       VariableFloatMap_contains(TPyOrange *self, PyObject *key) { return TMM_VariableFloatMap::_contains(self, key); }
527
528PyObject *VariableFloatMap_has_key(TPyOrange *self, PyObject *key) PYARGS(METH_O, "(key) -> None") { return TMM_VariableFloatMap::_has_key(self, key); }
529PyObject *VariableFloatMap_get(TPyOrange *self, PyObject *args) PYARGS(METH_VARARGS, "(key[, default]) -> value") { return TMM_VariableFloatMap::_get(self, args); }
530PyObject *VariableFloatMap_setdefault(TPyOrange *self, PyObject *args) PYARGS(METH_VARARGS, "(key[, default]) -> value") { return TMM_VariableFloatMap::_setdefault(self, args); }
531PyObject *VariableFloatMap_clear(TPyOrange *self, PyObject *args) PYARGS(METH_NOARGS, "() -> None") { return TMM_VariableFloatMap::_clear(self); }
532PyObject *VariableFloatMap_keys(TPyOrange *self, PyObject *args) PYARGS(METH_NOARGS, "() -> keys") { return TMM_VariableFloatMap::_keys(self); }
533PyObject *VariableFloatMap_values(TPyOrange *self, PyObject *args) PYARGS(METH_NOARGS, "() -> values") { return TMM_VariableFloatMap::_values(self); }
534PyObject *VariableFloatMap_items(TPyOrange *self, PyObject *args) PYARGS(METH_NOARGS, "() -> items") { return TMM_VariableFloatMap::_items(self); }
535PyObject *VariableFloatMap_update(TPyOrange *self, PyObject *args) PYARGS(METH_O, "(items) -> None") { return TMM_VariableFloatMap::_update(self, args); }
536PyObject *VariableFloatMap__reduce__(TPyOrange *self, PyObject *) { return TMM_VariableFloatMap::_reduce(self); }
537
538
539C_CALL3(TableAverager, TableAverager, Orange, "(list-of-example-generators) -/-> ExampleTable")
540
541PExampleGeneratorList PExampleGeneratorList_FromArguments(PyObject *arg);
542
543PyObject *TableAverager_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(list-of-example-generators) --> ExampleTable")
544{
545  PyTRY
546    NO_KEYWORDS
547    if (!args || (PyTuple_Size(args) != 1))
548      PYERROR(PyExc_TypeError, "TableAverager expects a list of example generators", PYNULL);
549    PExampleGeneratorList tables = PExampleGeneratorList_FromArguments(PyTuple_GET_ITEM(args, 0));
550    if (!tables)
551      return PYNULL;
552    return WrapOrange(SELF_AS(TTableAverager)(tables));
553  PyCATCH
554}
555
556/* ************ INDUCE ************ */
557
558#include "induce.hpp"
559#include "subsets.hpp"
560
561ABSTRACT(FeatureInducer, Orange)
562
563ABSTRACT(SubsetsGenerator, Orange)
564C_NAMED(SubsetsGenerator_withRestrictions, SubsetsGenerator, "([subGenerator=])")
565
566ABSTRACT(SubsetsGenerator_iterator, Orange)
567C_NAMED(SubsetsGenerator_constant_iterator, SubsetsGenerator_iterator, "")
568BASED_ON(SubsetsGenerator_constSize_iterator, SubsetsGenerator_iterator)
569BASED_ON(SubsetsGenerator_minMaxSize_iterator, SubsetsGenerator_iterator)
570C_NAMED(SubsetsGenerator_withRestrictions_iterator, SubsetsGenerator_iterator, "")
571
572PyObject *FeatureInducer_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(examples, bound-attrs, new-name, weightID) -> (Variable, float)")
573{
574  PyTRY
575    NO_KEYWORDS
576
577    PExampleGenerator egen;
578    PyObject *boundList;
579    char *name;
580    int weight=0;
581    if (!PyArg_ParseTuple(args, "O&Os|O&", pt_ExampleGenerator, &egen, &boundList, &name, pt_weightByGen(egen), &weight))
582      PYERROR(PyExc_TypeError, "invalid arguments", PYNULL);
583
584    TVarList boundset;
585    if (!varListFromDomain(boundList, egen->domain, boundset))
586      return PYNULL;
587
588    float meas;
589    PVariable newvar = SELF_AS(TFeatureInducer)(egen, boundset, name, meas, weight);
590    return Py_BuildValue("Nf", WrapOrange(newvar), meas);
591  PyCATCH
592}
593
594
595
596
597PVarList PVarList_FromArguments(PyObject *arg);
598
599PVarList varListForReset(PyObject *vars)
600{
601  if (PyOrDomain_Check(vars))
602    return PyOrange_AsDomain(vars)->attributes;
603
604  PVarList variables = PVarList_FromArguments(vars);
605  if (!variables)
606    PYERROR(PyExc_TypeError, "SubsetsGenerator.reset: invalid arguments", NULL);
607
608  return variables;
609}
610
611
612PyObject *SubsetsGenerator_reset(PyObject *self, PyObject *args) PYARGS(METH_VARARGS, "([[var0, var1, ...]]) -> int")
613{ PyTRY
614    PyObject *vars = PYNULL;
615    if (!PyArg_ParseTuple(args, "|O:SubsetsGenerator.reset", &vars))
616      return PYNULL;
617
618    if (!vars)
619      PYERROR(PyExc_TypeError, "SubsetsGenerator.reset does not reset the generator (as it used to)", false);
620
621    PVarList varList = varListForReset(vars);
622    if (!varList)
623      return NULL;
624
625    SELF_AS(TSubsetsGenerator).varList = varList;
626    RETURN_NONE;
627  PyCATCH
628}
629
630
631PyObject *SubsetsGenerator_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("([var0, var1] | domain |) -> SubsetsGenerator")
632{ PyTRY
633    NO_KEYWORDS
634
635    if (args && PyTuple_Size(args) && !SubsetsGenerator_reset(self, args))
636      return NULL;
637
638    Py_INCREF(self);
639    return self;
640  PyCATCH
641}
642
643
644PyObject *SubsetsGenerator_iter(PyObject *self)
645{ PyTRY
646    return WrapOrange(SELF_AS(TSubsetsGenerator).call());
647  PyCATCH
648}
649
650
651PyObject *SubsetsGenerator_iterator_iternext(PyObject *self)
652{ PyTRY
653    TVarList vl;
654    if (!SELF_AS(TSubsetsGenerator_iterator).call(vl))
655      return PYNULL;
656
657    PyObject *list=PyTuple_New(vl.size());
658    Py_ssize_t i=0;
659    ITERATE(TVarList, vi, vl)
660      PyTuple_SetItem(list, i++, WrapOrange(*vi));
661    return list;
662  PyCATCH
663}
664
665
666PyObject *SubsetsGenerator_iterator_next(PyObject *self)
667{ Py_INCREF(self);
668  return self;
669}
670
671
672void packCounter(const TCounter &cnt, TCharBuffer &buf)
673{
674  buf.writeInt(cnt.limit);
675  buf.writeInt(cnt.size());
676  const_ITERATE(TCounter, ci, cnt)
677    buf.writeInt(*ci);
678}
679
680
681void unpackCounter(TCharBuffer &buf, TCounter &cnt)
682{
683  cnt.limit = buf.readInt();
684
685  int size = buf.readInt();
686  cnt.resize(size);
687  for(TCounter::iterator ci(cnt.begin()); size--; *ci++ = buf.readInt());
688}
689
690
691PyObject *SubsetsGenerator_constSize_new(PyTypeObject *type, PyObject *args, PyObject *) BASED_ON(SubsetsGenerator, "(size)") ALLOWS_EMPTY
692{ PyTRY
693    int B = 2;
694    PyObject *varlist = NULL;
695    PyObject *res;
696
697    // This is for compatibility ...
698    if (PyArg_ParseTuple(args, "|iO:SubsetsGenerator_constSize.__new__", &B, &varlist)) {
699      TSubsetsGenerator *ssg = mlnew TSubsetsGenerator_constSize(B);
700      res = WrapNewOrange(ssg, type);
701      if (varlist) {
702        SubsetsGenerator_reset(res, varlist);
703      }
704      return res;
705    }
706    PyErr_Clear();
707
708    // ... and this if for real
709    if (!PyArg_ParseTuple(args, "|O:SubsetsGenerator_constSize.__new__", &varlist))
710      return PYNULL;
711
712    TSubsetsGenerator *gen = mlnew TSubsetsGenerator_constSize(B);
713    if (varlist && !(gen->varList = varListForReset(varlist))) {
714      delete gen;
715      return NULL;
716    }
717
718    return WrapNewOrange(gen, type);
719  PyCATCH
720}
721
722PyObject *SubsetsGenerator_constSize_iterator__reduce__(PyObject *self)
723{
724  PyTRY
725    CAST_TO(TSubsetsGenerator_constSize_iterator, gen);
726
727    TCharBuffer buf((gen->counter.size() + 4) * sizeof(int));
728    packCounter(gen->counter, buf);
729    buf.writeChar(gen->moreToCome ? 1 : 0);
730
731    return Py_BuildValue("O(OOs#)N", getExportedFunction("__pickleLoaderSubsetsGeneratorConstSizeIterator"),
732                                    self->ob_type,
733                                    WrapOrange(gen->varList),
734                                    buf.buf, buf.length(),
735                                    packOrangeDictionary(self));
736 PyCATCH
737}
738
739
740PyObject *__pickleLoaderSubsetsGeneratorConstSizeIterator(PyObject *, PyObject *args) PYARGS(METH_VARARGS, "(type, packed_counter)")
741{
742  PyTRY
743    PyTypeObject *type;
744    PVarList varList;
745    char *pbuf;
746    int bufSize;
747    if (!PyArg_ParseTuple(args, "OOs#:__pickleLoaderSubsetsGenerator_constSizeIterator", &type, ccn_VarList, &varList, &pbuf, &bufSize))
748      return NULL;
749
750    TCharBuffer buf(pbuf);
751
752    TSubsetsGenerator_constSize_iterator *gen = new TSubsetsGenerator_constSize_iterator(varList, buf.readInt());
753    unpackCounter(buf, gen->counter);
754    gen->moreToCome = buf.readChar() != 0;
755
756    return WrapNewOrange(gen, type);
757  PyCATCH
758}
759
760
761
762PyObject *SubsetsGenerator_minMaxSize_new(PyTypeObject *type, PyObject *args, PyObject *) BASED_ON(SubsetsGenerator, "([min=, max=][, varList=])") ALLOWS_EMPTY
763{ PyTRY
764    int min = 2, max = 3;
765    PyObject *varlist = NULL;
766
767    // This is for compatibility ...
768    if (args && PyArg_ParseTuple(args, "|iiO", &min, &max, &varlist)) {
769      PyObject *res = WrapNewOrange(mlnew TSubsetsGenerator_minMaxSize(min, max), type);
770      if (varlist)
771        SubsetsGenerator_reset(res, varlist);
772
773      return res;
774    }
775    PyErr_Clear();
776
777    // ... and this if for real
778    if (!PyArg_ParseTuple(args, "|O:SubsetsGenerator_minMaxSize.__new__", &varlist))
779      return PYNULL;
780     
781    TSubsetsGenerator *gen = mlnew TSubsetsGenerator_minMaxSize(min, max);
782    if (varlist && !(gen->varList = varListForReset(varlist))) {
783      delete gen;
784      return NULL;
785    }
786
787    return WrapNewOrange(gen, type);
788  PyCATCH
789}
790
791
792PyObject *SubsetsGenerator_minMaxSize_iterator__reduce__(PyObject *self)
793{
794  PyTRY
795    CAST_TO(TSubsetsGenerator_minMaxSize_iterator, gen);
796
797    TCharBuffer buf((gen->counter.size() + 5) * sizeof(int));
798    buf.writeInt(gen->B);
799    buf.writeInt(gen->max);
800    packCounter(gen->counter, buf);
801    buf.writeChar(gen->moreToCome ? 1 : 0);
802
803    return Py_BuildValue("O(OOs#)N", getExportedFunction("__pickleLoaderSubsetsGeneratorMinMaxSizeIterator"),
804                                    self->ob_type,
805                                    WrapOrange(gen->varList),
806                                    buf.buf, buf.length(),
807                                    packOrangeDictionary(self));
808 PyCATCH
809}
810
811
812PyObject *__pickleLoaderSubsetsGeneratorMinMaxSizeIterator(PyObject *, PyObject *args) PYARGS(METH_VARARGS, "(type, varList, packed_counter)")
813{
814  PyTRY
815    PyTypeObject *type;
816    PVarList varList;
817    char *pbuf;
818    int bufSize;
819    if (!PyArg_ParseTuple(args, "OO&s#:__pickleLoaderSubsetsGenerator_minMaxSizeIterator", &type, ccn_VarList, &varList, &pbuf, &bufSize))
820      return NULL;
821
822    TCharBuffer buf(pbuf);
823
824    const int B = buf.readInt();
825    const int max = buf.readInt();
826    TSubsetsGenerator_minMaxSize_iterator *gen = new TSubsetsGenerator_minMaxSize_iterator(varList, B, max);
827    unpackCounter(buf, gen->counter);
828    gen->moreToCome = buf.readChar() != 0;
829
830    return WrapNewOrange(gen, type);
831  PyCATCH
832}
833
834
835
836
837PyObject *SubsetsGenerator_constant_new(PyTypeObject *type, PyObject *args, PyObject *) BASED_ON(SubsetsGenerator, "([constant=])") ALLOWS_EMPTY
838{ PyTRY
839    PyObject *varlist = NULL;
840
841    if (!PyArg_ParseTuple(args, "|O:SubsetsGenerator_constant.__new__", &varlist))
842      return PYNULL;
843
844    TSubsetsGenerator_constant *gen = mlnew TSubsetsGenerator_constant();
845    if (varlist && !(gen->varList = varListForReset(varlist))) {
846      delete gen;
847      return NULL;
848    }
849
850    gen->constant = CLONE(TVarList, gen->varList);
851    return WrapNewOrange(gen, type);
852  PyCATCH
853}
854/* ************ MINIMAL COMPLEXITY ************ */
855
856#include "minimal_complexity.hpp"
857
858ABSTRACT(IGConstructor, Orange)
859C_CALL(IGBySorting, IGConstructor, "([examples, bound-attrs]) -/-> IG")
860
861ABSTRACT(ColorIG, Orange)
862C_CALL(ColorIG_MCF, ColorIG, "([IG]) -/-> ColoredIG")
863
864C_CALL(FeatureByMinComplexity, FeatureInducer, "([examples, bound-attrs, name] [IGConstructor=, classifierFromIG=) -/-> Variable")
865
866C_NAMED(ColoredIG, GeneralExampleClustering, "(ig=, colors=)")
867
868
869bool convertFromPython(PyObject *args, TIGNode &ign)
870{ PyTRY
871    PDiscDistribution inco, co;
872    TExample *example;
873    if (!PyArg_ParseTuple(args, "O&|O&O&:convertFromPython(IG)", ptr_Example, &example, ccn_DiscDistribution, &inco, ccn_DiscDistribution, &co))
874      return false;
875
876    ign.example = PExample(mlnew TExample(*example));
877
878    if (inco)
879      ign.incompatibility = inco.getReference();
880    if (co)
881      ign.compatibility = co.getReference();
882    return true;
883  PyCATCH_r(false);
884}
885     
886
887bool convertFromPython(PyObject *args, PIG &ig)
888{ if (!PyList_Check(args))
889    PYERROR(PyExc_AttributeError, "invalid arguments (list expected)", false);
890
891  ig=PIG(mlnew TIG());
892  for(Py_ssize_t i=0; i<PyList_Size(args); i++) {
893    ig->nodes.push_back(TIGNode());
894    if (!convertFromPython(PyList_GetItem(args, i), ig->nodes.back())) {
895      ig=PIG();
896      PYERROR(PyExc_AttributeError, "invalid list argument", false);
897    }
898  }
899
900  return true;
901}
902
903
904PyObject *IG_new(PyTypeObject *type, PyObject *args, PyObject *) BASED_ON(Orange, "<see the manual>")
905{ PyTRY
906    PyObject *pyig;
907    PIG ig;
908    return PyArg_ParseTuple(args, "O:IG.new", &pyig) && convertFromPython(pyig, ig) ? WrapOrange(ig) : PYNULL;
909  PyCATCH
910}
911
912
913PyObject *IG_native(PyObject *self) PYARGS(0, "() -> [(Example, [incompatibility-float], [compatibility-float])]")
914{ PyTRY
915    CAST_TO(TIG, graph);
916 
917    PyObject *result=PyList_New(graph->nodes.size());
918    Py_ssize_t i=0;
919    ITERATE(vector<TIGNode>, ni, graph->nodes)
920      PyList_SetItem(result, i++, Py_BuildValue("NNN", 
921        Example_FromWrappedExample((*ni).example),
922         // it's OK to wrap a reference - we're just copying it
923        WrapNewOrange(mlnew TDiscDistribution((*ni).incompatibility), (PyTypeObject *)&PyOrDiscDistribution_Type),
924        WrapNewOrange(mlnew TDiscDistribution((*ni).compatibility), (PyTypeObject *)&PyOrDiscDistribution_Type)
925      ));
926
927    return result;
928  PyCATCH
929}
930
931
932PyObject *IG__reduce__(PyObject *self)
933{
934  PyTRY
935    return Py_BuildValue("O(N)N", self->ob_type, IG_native(self), packOrangeDictionary(self));
936  PyCATCH
937}
938
939
940PyObject *IG_normalize(PyObject *self) PYARGS(0, "() -> None")
941{ PyTRY
942    SELF_AS(TIG).normalize();
943    RETURN_NONE;
944  PyCATCH
945}
946
947
948PyObject *IG_make0or1(PyObject *self) PYARGS(0, "() -> None")
949{ PyTRY
950    SELF_AS(TIG).make0or1();
951    RETURN_NONE;
952  PyCATCH
953}
954
955
956PyObject *IG_complete(PyObject *self) PYARGS(0, "() -> None")
957{ PyTRY
958    SELF_AS(TIG).complete();
959    RETURN_NONE;
960  PyCATCH
961}
962
963
964PyObject *IG_removeEmpty(PyObject *self) PYARGS(0, "() -> None")
965{ PyTRY
966    SELF_AS(TIG).complete();
967    RETURN_NONE;
968  PyCATCH
969}
970
971
972
973PyObject *IGConstructor_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(examples, bound-attrs) -> IG")
974{
975  PyTRY
976    NO_KEYWORDS
977
978    PExampleGenerator egen;
979    PyObject *boundList;
980    int weight=0;
981    if (!PyArg_ParseTuple(args, "O&O|O&", pt_ExampleGenerator, &egen, &boundList, pt_weightByGen(egen), &weight))
982      PYERROR(PyExc_TypeError, "attribute error", PYNULL);
983
984    TVarList boundset;
985    if (!varListFromDomain(boundList, egen->domain, boundset))
986      return PYNULL;
987
988    return WrapOrange(SELF_AS(TIGConstructor)(egen, boundset, weight));
989  PyCATCH
990}
991
992
993
994PyObject *ColorIG_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(ig) -> [int]")
995{
996  PyTRY
997    NO_KEYWORDS
998
999    PIG graph;
1000    return PyArg_ParseTuple(args, "O&:ColorIG.__call__", cc_IG, &graph) ? WrapOrange(SELF_AS(TColorIG)(graph)) : PYNULL;
1001  PyCATCH
1002}
1003
1004
1005/* ************ MINIMAL ERROR ******** */
1006
1007#include "minimal_error.hpp"
1008
1009C_CALL(FeatureByIM, FeatureInducer, "([examples, bound-attrs, name] [constructIM=, classifierFromIM=]) -/-> Variable")
1010
1011ABSTRACT(IMConstructor, Orange)
1012C_CALL(IMBySorting, IMConstructor, "() | (examples, bound-attrs[[, free-attrs], weightID]) -/-> IM")
1013C_CALL(IMByIMByRows, IMConstructor, "() | (examples, bound-attrs[[, free-attrs], weightID]) -/-> IM")
1014C_CALL(IMByRelief, IMConstructor, "() | (examples, bound-attrs[[, free-attrs], weightID]) -/-> IM")
1015
1016ABSTRACT(ClustersFromIM, Orange)
1017C_CALL(ClustersFromIMByAssessor, ClustersFromIM, "([IM] [minProfitProportion=, columnAssessor=, stopCriterion=]) -/-> IMClustering")
1018
1019C_NAMED(IMClustering, Orange, "([im= clusters=, maxCluster=])")
1020
1021BASED_ON(IMByRows, Orange)
1022NO_PICKLE(IMByRows)
1023
1024ABSTRACT(IMByRowsConstructor, Orange)
1025C_CALL(IMByRowsBySorting, IMByRowsConstructor, "() | (examples, bound-attrs[[, free-attrs], weightID]) -/-> IMByRows")
1026C_CALL(IMByRowsByRelief, IMByRowsConstructor, "() | (examples, bound-attrs[[, free-attrs], weightID]) -/-> IMByRows")
1027
1028ABSTRACT(IMByRowsPreprocessor, Orange)
1029C_CALL(IMBlurer, IMByRowsPreprocessor, "([IMByRows]) -> None")
1030
1031C_CALL3(AssessIMQuality, AssessIMQuality, Orange, "([IM] -/-> float)")
1032
1033ABSTRACT(StopIMClusteringByAssessor, Orange)
1034C_NAMED(StopIMClusteringByAssessor_noProfit, StopIMClusteringByAssessor, "([minProfitProportion=])")
1035C_NAMED(StopIMClusteringByAssessor_binary, StopIMClusteringByAssessor, "()")
1036C_NAMED(StopIMClusteringByAssessor_n, StopIMClusteringByAssessor, "(n=)")
1037C_NAMED(StopIMClusteringByAssessor_noBigChange, StopIMClusteringByAssessor, "()")
1038
1039ABSTRACT(ColumnAssessor, Orange)
1040C_NAMED(ColumnAssessor_m, ColumnAssessor, "([m=])")
1041C_NAMED(ColumnAssessor_Laplace, ColumnAssessor, "()")
1042C_NAMED(ColumnAssessor_mf, ColumnAssessor, "([m=])")
1043C_NAMED(ColumnAssessor_N, ColumnAssessor, "()")
1044C_NAMED(ColumnAssessor_Relief, ColumnAssessor, "()")
1045C_NAMED(ColumnAssessor_Measure, ColumnAssessor, "(measure=)")
1046C_NAMED(ColumnAssessor_Kramer, ColumnAssessor, "()")
1047
1048C_CALL(MeasureAttribute_IM, MeasureAttribute, "(constructIM=, columnAssessor=) | (attr, examples[, apriori] [,weightID]) -/-> (float, meas-type)")
1049
1050
1051PyObject *IMConstructor_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(example, bound-attrs[, weightID]) -> IM")
1052{
1053  PyTRY
1054    NO_KEYWORDS
1055
1056    PExampleGenerator egen;
1057    PyObject *boundList;
1058    int weightID = 0;
1059    if (PyArg_ParseTuple(args, "O&O|O&", pt_ExampleGenerator, &egen, &boundList, pt_weightByGen(egen), &weightID)) {
1060      TVarList boundset;
1061      if (!varListFromDomain(boundList, egen->domain, boundset))
1062        return PYNULL;
1063
1064      PIM im=SELF_AS(TIMConstructor)(egen, boundset, weightID);
1065      return WrapOrange(im);
1066    }
1067
1068    PyErr_Clear();
1069
1070    PyObject *freeList;
1071    if (PyArg_ParseTuple(args, "O&OO|O&", pt_ExampleGenerator, &egen, &boundList, &freeList, pt_weightByGen(egen), &weightID)) {
1072      TVarList boundset;
1073      if (!varListFromDomain(boundList, egen->domain, boundset))
1074        return PYNULL;
1075
1076      TVarList freeset;
1077      if (!varListFromDomain(freeList, egen->domain, freeset))
1078        return PYNULL;
1079
1080      PIM im = SELF_AS(TIMConstructor)(egen, boundset, freeset, weightID);
1081      return WrapOrange(im);
1082    }
1083
1084    PyErr_Clear();
1085
1086    PIMByRows imbr;
1087    if (PyArg_ParseTuple(args, "O&", cc_IMByRows, &imbr))
1088      return WrapOrange(SELF_AS(TIMConstructor)(imbr));
1089
1090    PYERROR(PyExc_TypeError, "invalid arguments -- examples, boundset and optional freeset and weight expected", PYNULL);
1091  PyCATCH
1092}
1093
1094
1095
1096PyObject *IMByRowsConstructor_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(example, bound-attrs[, weightID]) -> IM")
1097{
1098  PyTRY
1099    NO_KEYWORDS
1100
1101    PExampleGenerator egen;
1102    PyObject *boundList;
1103    int weightID=0;
1104    if (PyArg_ParseTuple(args, "O&O|O&", pt_ExampleGenerator, &egen, &boundList, pt_weightByGen(egen), &weightID)) {
1105      TVarList boundset;
1106      if (!varListFromDomain(boundList, egen->domain, boundset))
1107        return PYNULL;
1108
1109      PIMByRows im=SELF_AS(TIMByRowsConstructor)(egen, boundset, weightID);
1110      return WrapOrange(im);
1111    }
1112
1113    PyErr_Clear();
1114
1115    PyObject *freeList;
1116    if (PyArg_ParseTuple(args, "O&OO|O&", pt_ExampleGenerator, &egen, &boundList, &freeList, pt_weightByGen(egen), &weightID)) {
1117      TVarList boundset;
1118      if (!varListFromDomain(boundList, egen->domain, boundset))
1119        return PYNULL;
1120
1121      TVarList freeset;
1122      if (!varListFromDomain(freeList, egen->domain, freeset))
1123        return PYNULL;
1124
1125      PIMByRows im=SELF_AS(TIMByRowsConstructor)(egen, boundset, freeset, weightID);
1126      return WrapOrange(im);
1127    }
1128
1129    PYERROR(PyExc_TypeError, "invalid arguments -- examples, boundset and optional freeset and weight expected", PYNULL);
1130  PyCATCH
1131}
1132
1133
1134
1135
1136PyObject *IMByRowsPreprocessor_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(IMByRows) -> None")
1137{ 
1138  PyTRY
1139    NO_KEYWORDS
1140   
1141    PIMByRows pimbr;
1142    if (!PyArg_ParseTuple(args, "O&", cc_IMByRows, &pimbr))
1143      PYERROR(PyExc_TypeError, "IMByRows expected", PYNULL)
1144
1145    SELF_AS(TIMByRowsPreprocessor)(pimbr);
1146    RETURN_NONE;
1147  PyCATCH
1148}
1149
1150
1151PyObject *Float2List(float *f, int size)
1152{ PyObject *dlist = PyList_New(size);
1153  for(int i = 0; i < size; i++)
1154    PyList_SetItem(dlist, Py_ssize_t(i), PyFloat_FromDouble((double)*(f++)));
1155  return dlist;
1156}
1157
1158
1159bool List2Float(PyObject *l, float *&f, int &size)
1160{ if (!PyList_Check(l))
1161    PYERROR(PyExc_TypeError, "invalid type (list expected)", false);
1162
1163  size = PyList_Size(l);
1164  float *fi = f = mlnew float[size];
1165
1166  for(int s = 0; s<size; s++) {
1167    PyObject *flt = PyNumber_Float(PyList_GetItem(l, s));
1168    if (!flt) {
1169      PyErr_Format(PyExc_TypeError, "invalid list element at index '%i'", s);
1170      mldelete f;
1171      return false;
1172    }
1173    *fi = (float)PyFloat_AsDouble(flt);
1174    Py_DECREF(flt);
1175  }
1176
1177  return true;
1178}
1179
1180
1181PyObject *convertToPython(const T_ExampleIMColumnNode &eicn)
1182{ PyObject *column=PyList_New(0);
1183
1184  if (eicn.column) {
1185    bool discrete = dynamic_cast<TDIMColumnNode *>(eicn.column) != NULL;
1186
1187    for(TIMColumnNode *node=eicn.column; node; node=node->next) {
1188      PyObject *pycnode = PYNULL;
1189      if (discrete) {
1190        TDIMColumnNode *dnode=dynamic_cast<TDIMColumnNode *>(node);
1191        pycnode=Py_BuildValue("ifN", dnode->index, dnode->nodeQuality, Float2List(dnode->distribution, dnode->noOfValues)); 
1192      }
1193      else {
1194        TFIMColumnNode *fnode=dynamic_cast<TFIMColumnNode *>(node);
1195        if (fnode)
1196          pycnode=Py_BuildValue("iffff", fnode->index, fnode->nodeQuality,
1197                                         fnode->sum, fnode->sum2, fnode->N);
1198      }
1199
1200      if (!pycnode)
1201        PYERROR(PyExc_TypeError, "invalid IMColumnNode", PYNULL);
1202
1203      PyList_Append(column, pycnode);
1204      Py_DECREF(pycnode);
1205    }
1206  }
1207
1208  return Py_BuildValue("NN", Example_FromWrappedExample(eicn.example), column);
1209}
1210
1211
1212bool convertFromPython(PyObject *args, T_ExampleIMColumnNode &eicn)
1213{ PyObject *column;
1214  TExample *example;
1215  if (   !PyArg_ParseTuple(args, "O&O", ptr_Example, &example, &column)
1216      || !PyTuple_Check(column))
1217    PYERROR(PyExc_TypeError, "convertFromPython(T_ExampleIMColumnNode): invalid arguments", false);
1218
1219  bool discrete = PyTuple_Size(column)==3;
1220
1221  eicn.example = mlnew TExample(*example);
1222  eicn.column = NULL;
1223  TIMColumnNode **nodeptr = &eicn.column;
1224
1225  for(Py_ssize_t i=0; i<PyList_Size(column); i++) {
1226    PyObject *item=PyList_GetItem(column, i);
1227    if (discrete) {
1228      *nodeptr=mlnew TDIMColumnNode(0, 0);
1229      PyObject *distr;
1230      TDIMColumnNode *dimcn = dynamic_cast<TDIMColumnNode *>(*nodeptr);
1231      if (   !PyArg_ParseTuple(item, "ifO", &(*nodeptr)->index, &(*nodeptr)->nodeQuality, &distr)
1232          || !List2Float(distr, dimcn->distribution, dimcn->noOfValues)) {
1233        mldelete eicn.column;
1234        PYERROR(PyExc_TypeError, "invalid column node", false);
1235      }
1236    }
1237    else {
1238      *nodeptr=mlnew TFIMColumnNode(0);
1239      if (!PyArg_ParseTuple(item, "iffff", &(*nodeptr)->index, &(*nodeptr)->nodeQuality,
1240                                           &dynamic_cast<TFIMColumnNode *>(*nodeptr)->sum, 
1241                                           &dynamic_cast<TFIMColumnNode *>(*nodeptr)->sum2, 
1242                                           &dynamic_cast<TFIMColumnNode *>(*nodeptr)->N)) {
1243        mldelete eicn.column;
1244        PYERROR(PyExc_TypeError, "invalid column node", false);
1245      }
1246    }
1247  }
1248  return true;
1249}
1250
1251
1252PyObject *convertToPython(const PIM &im)
1253{ PyObject *result=PyList_New(0);
1254  const_ITERATE(vector<T_ExampleIMColumnNode>, ici, im->columns) {
1255    PyObject *item=convertToPython(*ici);
1256    if (!item) {
1257      PyMem_DEL(result);
1258      PYERROR(PyExc_SystemError, "out of memory", PYNULL);
1259    }
1260    PyList_Append(result, item);
1261    Py_DECREF(item);
1262  }
1263  return result;
1264}
1265     
1266bool convertFromPython(PyObject *args, PIM &im)
1267{ im=PIM();
1268  if (!PyList_Check(args) || !PyList_Size(args))
1269    PYERROR(PyExc_TypeError, "invalid incompatibility matrix", false);
1270
1271  // This is just to determine the type...
1272  int varType = -1;
1273  T_ExampleIMColumnNode testcolumn;
1274  if (!convertFromPython(PyList_GetItem(args, 0), testcolumn))
1275    return false;
1276
1277  varType = dynamic_cast<TDIMColumnNode *>(testcolumn.column) ? TValue::INTVAR : TValue::FLOATVAR;
1278  const type_info &tinfo = typeid(*testcolumn.column);
1279
1280  im=PIM(mlnew TIM(varType));
1281  im->columns=vector<T_ExampleIMColumnNode>();
1282  for(Py_ssize_t i=0; i<PyList_Size(args); i++) {
1283    PyObject *item=PyList_GetItem(args, i);
1284    im->columns.push_back(T_ExampleIMColumnNode());
1285    if (!convertFromPython(item, im->columns.back())) {
1286      im=PIM();
1287      return false;
1288    }
1289    if (tinfo == typeid(im->columns.back().column))
1290      PYERROR(PyExc_TypeError, "invalid incompatibility matrix (mixed discrete and continuous classes)", false)
1291  }
1292
1293  return true;
1294}
1295
1296
1297PyObject *IM_native(PyObject *self) PYARGS(0, "() -> [[index, quality, distribution, c]] | [[index, quality, sum, sum2, N]]")
1298{ PyTRY
1299    return convertToPython(PyOrange_AsIM(self)); 
1300  PyCATCH
1301}
1302
1303PyObject *IM_fuzzy(PyObject *self) PYARGS(0, "() -> boolean")
1304{ PyTRY
1305    return PyInt_FromLong(SELF_AS(TIM).fuzzy() ? 1L : 0L); 
1306  PyCATCH
1307}
1308
1309
1310PyObject *IM_new(PyTypeObject *type, PyObject *args, PyObject *) BASED_ON(Orange, "<see the manual>")
1311{ PyTRY
1312    PIM im;
1313    PyObject *pyim;
1314    return PyArg_ParseTuple(args, "O:IM.new", &pyim) && convertFromPython(pyim, im) ? WrapOrange(im) : PYNULL;
1315  PyCATCH
1316}
1317
1318
1319PyObject *IM__reduce__(PyObject *self)
1320{
1321  PyTRY
1322    return Py_BuildValue("O(N)N", self->ob_type, IM_native(self), packOrangeDictionary(self));
1323  PyCATCH
1324}
1325
1326
1327PyObject *convertToPython(const TDIMRow &row)
1328{ PyObject *pyrow=PyList_New(row.nodes.size());
1329  Py_ssize_t i = 0;
1330  const int &noval = row.noOfValues;
1331  const_ITERATE(vector<float *>, ii, row.nodes)
1332    PyList_SetItem(pyrow, i++, Float2List(*ii, noval));
1333
1334  return Py_BuildValue("NN", Example_FromWrappedExample(row.example), pyrow);
1335}
1336
1337
1338PyObject *convertToPython(const PIMByRows &im)
1339{ PyObject *result=PyList_New(im->rows.size());
1340  Py_ssize_t i=0;
1341  const_ITERATE(vector<TDIMRow>, ri, im->rows)
1342    PyList_SetItem(result, i++, convertToPython(*ri));
1343  return result; 
1344}
1345
1346PyObject *IMByRows_native(PyObject *self) PYARGS(0, "() -> [example, [distributions]]")
1347{ PyTRY
1348    return convertToPython(PyOrange_AsIMByRows(self));
1349  PyCATCH
1350}
1351
1352PyObject *IMByRows_get_columnExamples(PyObject *self) PYDOC("Values of bound attributes for each column")
1353{ PyTRY
1354    CAST_TO(TIMByRows, pimr);
1355    PyObject *result=PyList_New(pimr->columnExamples.size());
1356    Py_ssize_t i=0;
1357    ITERATE(vector<PExample>, ei, pimr->columnExamples)
1358      PyList_SetItem(result, i++, Example_FromWrappedExample(*ei));
1359    return result;
1360  PyCATCH
1361}
1362
1363PyObject *ClustersFromIM_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(im) -> IMClustering")
1364{ 
1365  PyTRY
1366    NO_KEYWORDS
1367
1368    PIM im;
1369    if (!PyArg_ParseTuple(args, "O&:ClustersFromIM.__call__", cc_IM, &im))
1370      return PYNULL;
1371
1372    return WrapOrange(SELF_AS(TClustersFromIM)(im));
1373  PyCATCH
1374}
1375
1376
1377
1378
1379
1380PyObject *AssessIMQuality_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(im) -> float")
1381{ 
1382  PyTRY
1383    NO_KEYWORDS
1384
1385    PIM im;
1386    if (!PyArg_ParseTuple(args, "O&:AssessIMQuality.__call__", cc_IM, &im))
1387      return PYNULL;
1388
1389    return PyFloat_FromDouble((double)SELF_AS(TAssessIMQuality)(im));
1390  PyCATCH
1391}
1392
1393
1394
1395/* ************ FEATURE CONSTRUCTION BY CLUSTERING ******** */
1396
1397#include "dist_clustering.hpp"
1398
1399ABSTRACT(ExampleDistConstructor, Orange)
1400C_CALL(ExampleDistBySorting, ExampleDistConstructor, "([examples, bound-attrs[, weightID]]) -/-> ExampleDistVector")
1401BASED_ON(ExampleDistVector, Orange)
1402ABSTRACT(ClustersFromDistributions, Orange)
1403C_CALL(ClustersFromDistributionsByAssessor, ClustersFromDistributions, "([example-dist-vector] [minProfitProportion=, distributionAssessor=, stopCriterion=]) -/-> DistClustering")
1404C_CALL(FeatureByDistributions, FeatureInducer, "() | ([examples, bound-attrs, name], [constructExampleDist=, completion=]) -/-> Variable")
1405
1406ABSTRACT(DistributionAssessor, Orange)
1407C_NAMED(DistributionAssessor_Laplace, DistributionAssessor, "()")
1408C_NAMED(DistributionAssessor_m, DistributionAssessor, "([m=])")
1409C_NAMED(DistributionAssessor_mf, DistributionAssessor, "([m=])")
1410C_NAMED(DistributionAssessor_Relief, DistributionAssessor, "()")
1411C_NAMED(DistributionAssessor_Measure, DistributionAssessor, "([measure=])")
1412C_NAMED(DistributionAssessor_Kramer, DistributionAssessor, "()")
1413
1414ABSTRACT(StopDistributionClustering, Orange)
1415C_NAMED(StopDistributionClustering_noProfit, StopDistributionClustering, "([minProfitProportion=])")
1416C_NAMED(StopDistributionClustering_binary, StopDistributionClustering, "()")
1417C_NAMED(StopDistributionClustering_n, StopDistributionClustering, "([n=])")
1418C_NAMED(StopDistributionClustering_noBigChange, StopDistributionClustering, "()")
1419
1420
1421PyObject *ExampleDistConstructor_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(examples, bound-attrs[, weightID]) -> ExampleDistVector")
1422{
1423  PyTRY
1424    NO_KEYWORDS
1425
1426    PExampleGenerator egen;
1427    PyObject *boundList;
1428    int weightID=0;
1429    if (!PyArg_ParseTuple(args, "O&O|O&:ExampleDistConstructor.__call__", pt_ExampleGenerator, &egen, &boundList, pt_weightByGen(egen), &weightID))
1430      return PYNULL;
1431
1432    TVarList boundset;
1433    if (!varListFromDomain(boundList, egen->domain, boundset))
1434      return PYNULL;
1435
1436    PExampleDistVector edv = SELF_AS(TExampleDistConstructor)(egen, boundset, weightID);
1437    return WrapOrange(edv);
1438  PyCATCH
1439}
1440
1441
1442
1443
1444
1445PyObject *convertToPython(const T_ExampleDist &ed)
1446{ return Py_BuildValue("NN", Example_FromWrappedExample(ed.example), WrapOrange(const_cast<GCPtr<TDistribution> &>(ed.distribution))); }
1447
1448
1449
1450PyObject *convertToPython(const PExampleDistVector &edv)
1451{ PyObject *result=PyList_New(0);
1452  const_ITERATE(vector<T_ExampleDist>, ici, edv->values) {
1453    PyObject *item=convertToPython(*ici);
1454    if (!item) {
1455      PyMem_DEL(result);
1456      PYERROR(PyExc_SystemError, "out of memory", PYNULL);
1457    }
1458    PyList_Append(result, item);
1459    Py_DECREF(item);
1460  }
1461  return result;
1462}
1463     
1464
1465PyObject *ExampleDistVector__reduce__(PyObject *self)
1466{
1467  PyTRY
1468    vector<T_ExampleDist> &values = SELF_AS(TExampleDistVector).values;
1469
1470    PyObject *pyvalues = PyList_New(values.size() * 2);
1471    Py_ssize_t i = 0;
1472    ITERATE(vector<T_ExampleDist>, edi, values) {
1473      PyList_SetItem(pyvalues, i++, Example_FromWrappedExample(edi->example));
1474      PyList_SetItem(pyvalues, i++, WrapOrange(edi->distribution));
1475    }
1476     
1477    return Py_BuildValue("O(ON)N", getExportedFunction("__pickleLoaderExampleDistVector"),
1478                                   self->ob_type,
1479                                   pyvalues,
1480                                   packOrangeDictionary(self));
1481
1482  PyCATCH
1483}
1484
1485
1486PyObject *__pickleLoaderExampleDistVector(PyObject *, PyObject *args) PYARGS(METH_VARARGS, "(type, values)")
1487{
1488  PyTRY
1489    PyTypeObject *type;
1490    PyObject *pyvalues;
1491    if (!PyArg_ParseTuple(args, "OO:__pickleLoaderExampleDistVector", &type, &pyvalues))
1492      return NULL;
1493
1494    TExampleDistVector *ed = new TExampleDistVector();
1495
1496    try {
1497      Py_ssize_t i = 0, e = PyList_Size(pyvalues);
1498      ed->values.reserve(e>>1);
1499      while(i < e) {
1500        PExample ex = PyExample_AS_Example(PyList_GetItem(pyvalues, i++));
1501        PDistribution dist = PyOrange_AsDistribution(PyList_GetItem(pyvalues, i++));
1502        ed->values.push_back(T_ExampleDist(ex, dist));
1503      }
1504
1505      return WrapNewOrange(ed, type);
1506    }
1507    catch (...) {
1508      delete ed;
1509      throw;
1510    }
1511  PyCATCH
1512}
1513
1514
1515PyObject *ExampleDistVector_native(PyObject *self) PYARGS(0, "() -> [[[float]]] | [[{float: float}]]")
1516{ PyTRY
1517    return convertToPython(PyOrange_AsExampleDistVector(self));
1518  PyCATCH
1519}
1520
1521
1522
1523PyObject *ClustersFromDistributions_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(example-dist-vector) -> DistClustering")
1524{ 
1525  PyTRY
1526    NO_KEYWORDS
1527
1528    PExampleDistVector edv;
1529    if (!PyArg_ParseTuple(args, "O&:ClustersFromDistributions.__call__", cc_ExampleDistVector, &edv))
1530      return PYNULL;
1531
1532    return WrapOrange(SELF_AS(TClustersFromDistributions)(edv));
1533  PyCATCH
1534}
1535
1536
1537#include "lib_preprocess.px"
Note: See TracBrowser for help on using the repository browser.