source: orange/source/orange/gslconversions.cpp @ 9532:2c6a8bb6ec89

Revision 9532:2c6a8bb6ec89, 5.3 KB checked in by janezd <janez.demsar@…>, 2 years ago (diff)

Conversion to numpy now supports multiple classes (Ticket #1012)

Line 
1/*
2    This file is part of Orange.
3   
4    Copyright 1996-2010 Faculty of Computer and Information Science, University of Ljubljana
5    Contact: janez.demsar@fri.uni-lj.si
6
7    Orange is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11
12    Orange is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with Orange.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21/*
22#include "gsl/gsl_matrix.h"
23#include "gsl/gsl_vector.h"
24*/
25#include <vector>
26
27#include "orange.hpp"
28#include "vars.hpp"
29#include "examplegen.hpp"
30
31/*
32#ifndef _DEBUG
33// I prefer to see exceptions when debugging
34extern "C" void my_gsl_error_handler(const char *reason, const char *file, int line, int)
35{ raiseErrorWho("GSL", "%s (%s:%i)", reason, file, line); }
36
37gsl_error_handler_t *fooerrh = gsl_set_error_handler(my_gsl_error_handler);
38#endif
39*/
40
41void parseMatrixContents(PExampleGenerator egen, const int &weightID, const char *contents, const int &multiTreatment,
42                         bool &hasClass, bool &classVector, bool &multiclassVector, bool &weightVector, bool &classIsDiscrete, int &columns,
43                         vector<bool> &include)
44{
45  hasClass = bool(egen->domain->classVar);
46
47  columns = 0;
48  int classIncluded = 0, attrsIncluded = 0, weightIncluded = 0, multiclassIncluded = 0;
49  bool attrsRequested = false, classRequested = false, weightRequested = false;
50  bool multiclassRequested = false;
51  const char *cp;
52  for(cp = contents; *cp && (*cp!='/'); cp++) {
53    switch (*cp) {
54      case 'A': attrsRequested = true;
55      case 'a': attrsIncluded++;
56                break;
57
58      case 'C': classRequested = true;
59      case 'c': classIncluded++;
60                break;
61
62      case 'W': weightRequested = true;
63      case 'w': weightIncluded++;
64                break;
65
66      case 'M': multiclassRequested = true;
67      case 'm': multiclassIncluded++;
68                break;
69
70      case '0':
71      case '1': columns++;
72                break;
73      default:
74        raiseErrorWho("parseMatrixContents", "unrecognized character '%c' in format string '%s')", *cp, contents);
75    }
76  }
77
78  classVector = false;
79  weightVector = false;
80
81  if (*cp)
82    while(*++cp)
83      switch (*cp) {
84        case 'A':
85        case 'a': raiseErrorWho("parseMatrixContents", "invalid format string (attributes on the right side)");
86
87        case '0':
88        case '1': raiseErrorWho("parseMatrixContents", "invalid format string (constants on the right side)");
89
90        case 'c': classVector = hasClass; break;
91        case 'C': classVector = true; break;
92
93        case 'w': weightVector = (weightID != 0); break;
94        case 'W': weightVector = true; break;
95
96        case 'm': multiclassVector = (egen->domain->classVars->size() != 0); break;
97        case 'M': multiclassVector = true; break;
98        default:
99          raiseErrorWho("parseMatrixContents", "unrecognized character '%c' in format string '%s')", *cp, contents);
100      }
101
102
103  if (classIncluded || classVector) {
104    if (hasClass) {
105      TEnumVariable *eclassVar = egen->domain->classVar.AS(TEnumVariable);
106      classIsDiscrete = eclassVar != NULL;
107      if (classIsDiscrete) {
108        if ((eclassVar->values->size()>2) && (multiTreatment != 1))
109          raiseErrorWho("parseMatrixContents", "multinomial classes are allowed only when explicitly treated as ordinal"); 
110      }
111      else {
112        if (egen->domain->classVar->varType != TValue::FLOATVAR)
113          raiseErrorWho("parseMatrixContents", "unsupported class type");
114      } 
115
116      columns += classIncluded;
117    }
118    else if (classRequested || classVector)
119      raiseErrorWho("parseMatrixContents", "classless domain");
120  }
121
122
123  if (weightIncluded || weightVector) {
124    if (weightID)
125      columns += weightIncluded;
126  }
127
128  if (multiclassIncluded || multiclassVector) {
129      columns += multiclassIncluded * egen->domain->classVars->size();
130  }
131
132  include.clear();
133
134  if (attrsIncluded) {
135    int attrs_in = 0;
136
137    const_PITERATE(TVarList, vi, egen->domain->attributes) {
138      if ((*vi)->varType == TValue::FLOATVAR) {
139        attrs_in++;
140        include.push_back(true);
141      }
142      else if ((*vi)->varType == TValue::INTVAR) {
143        if ((*vi).AS(TEnumVariable)->values->size() == 2) {
144          attrs_in++;
145          include.push_back(true);
146        }
147        else
148          switch (multiTreatment) {
149            case 0:
150              include.push_back(false);
151              break;
152
153            case 1:
154              attrs_in++;
155              include.push_back(true);
156              break;
157
158            default:
159              raiseErrorWho("parseMatrixContents", "attribute '%s' is multinomial", (*vi)->get_name().c_str());
160          }
161      }
162      else {
163        attrs_in++;
164        include.push_back(true);
165        raiseWarning(PyExc_OrangeKernelWarning, "attribute '%s' is of unsupported type", (*vi)->get_name().c_str());
166      }
167    }
168
169    if (attrsRequested && !attrs_in)
170      raiseErrorWho("parseMatrixContents", "the domain has no (useful) attributes");
171
172    columns += attrs_in * attrsIncluded;
173  }
174}
175
Note: See TracBrowser for help on using the repository browser.