source: orange/source/orange/lib_io.cpp @ 10220:b6e694facf69

Revision 10220:b6e694facf69, 10.8 KB checked in by Janez Demšar <janez.demsar@…>, 2 years ago (diff)

Reenabled renaming of Orange classes (ticket #1086), added a script for checking
that all classes are named so that unpickle can reconstruct them.

Line 
1/*
2    This file is part of Orange.
3   
4    Copyright 1996-2010 Faculty of Computer and Information Science, University of Ljubljana
5    Contact: janez.demsar@fri.uni-lj.si
6
7    Orange is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11
12    Orange is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with Orange.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21
22#ifdef _MSC_VER
23  #pragma warning (disable : 4786 4114 4018 4267 4244)
24#endif
25
26#include "vars.hpp"
27#include "domain.hpp"
28#include "examplegen.hpp"
29#include "table.hpp"
30
31#include "cls_orange.hpp"
32#include "externs.px"
33
34PVarList knownVars(PyObject *keywords); // defined in lib_kernel.cpp
35TMetaVector *knownMetas(PyObject *keywords); // ibid
36PDomain knownDomain(PyObject *keywords); // ibid
37
38PyObject *encodeStatus(const vector<int> &Status);  // in cls_misc.cpp
39PyObject *encodeStatus(const vector<pair<int, int> > &metaStatus);
40
41/* ************ FILE EXAMPLE GENERATORS ************ */
42
43#include "filegen.hpp"
44BASED_ON(FileExampleGenerator, ExampleGenerator)
45
46#include "tabdelim.hpp"
47#include "c45inter.hpp"
48#include "basket.hpp"
49
50
51bool divDot(const string &name, string &before, string &after)
52{ string::const_iterator bi(name.begin()), ei(name.end());
53  for(; (ei!=bi) && (*(--ei)!='.'); );
54  if (*ei!='.') return false;
55 
56  before=string(bi, ei); after=string(ei++, name.end());
57  return true;
58}
59
60
61NO_PICKLE(BasketExampleGenerator)
62NO_PICKLE(C45ExampleGenerator)
63NO_PICKLE(FileExampleGenerator)
64NO_PICKLE(TabDelimExampleGenerator)
65NO_PICKLE(BasketFeeder)
66
67BASED_ON(BasketFeeder, Orange)
68
69
70
71
72PyObject *TabDelimExampleGenerator_new(PyTypeObject *type, PyObject *args, PyObject *keywords) BASED_ON(FileExampleGenerator, "(examples[, use=domain|varlist])")
73{ PyTRY
74    char *fileName;
75    int createNewOn = TVariable::Incompatible;
76    if (!PyArg_ParseTuple(args, "s|i:TabDelimExampleGenerator.__new__", &fileName, &createNewOn))
77      return NULL;
78
79    string name(fileName), b, a;
80    if (!divDot(name, b, a))
81      name+=".tab";
82   
83    vector<int> status;
84    vector<pair<int, int> > metaStatus;
85    TExampleGenerator *egen = mlnew TTabDelimExampleGenerator(name, false, false, createNewOn, status, metaStatus);
86    return Py_BuildValue("NNN", WrapNewOrange(egen, type), encodeStatus(status), encodeStatus(metaStatus));
87  PyCATCH
88}
89
90
91PyObject *BasketExampleGenerator_new(PyTypeObject *type, PyObject *args, PyObject *keywords) BASED_ON(FileExampleGenerator, "(examples[, use=domain])")
92{ PyTRY
93    char *fileName;
94    int createNewOn = TVariable::Incompatible;
95    if (!PyArg_ParseTuple(args, "s|i:BasketExampleGenerator.__new__", &fileName, &createNewOn))
96      return NULL;
97
98    string name(fileName), b, a;
99    if (!divDot(name, b, a))
100      name+=".basket";
101
102    vector<int> status;
103    vector<pair<int, int> > metaStatus;
104    TExampleGenerator *egen = mlnew TBasketExampleGenerator(name, PDomain(), createNewOn, status, metaStatus);
105    return Py_BuildValue("NNN", WrapNewOrange(egen, type), encodeStatus(status), encodeStatus(metaStatus));
106  PyCATCH
107}
108
109
110PyObject *BasketFeeder_clearCache(PyObject *, PyObject *) PYARGS(METH_O, "() -> None")
111{ PyTRY
112    TBasketFeeder::clearCache();
113    RETURN_NONE;
114  PyCATCH
115}
116
117
118
119PyObject *C45ExampleGenerator_new(PyTypeObject *type, PyObject *args, PyObject *keywords) BASED_ON(FileExampleGenerator, "(examples[, use=domain|varlist])")
120{ PyTRY
121    char *stem;
122    int createNewOn = TVariable::Incompatible;
123    if (!PyArg_ParseTuple(args, "s|i:C45ExampleGenerator.__new__", &stem, &createNewOn))
124      return NULL;
125
126    string domain, data;
127    string b, a;
128    if (divDot(stem, b, a))
129      { data=stem; domain=b+".names"; }
130    else
131      { data=string(stem)+".data"; domain=string(stem)+".names"; }
132
133    vector<int> status;
134    vector<pair<int, int> > metaStatus;
135    TExampleGenerator *egen = mlnew TC45ExampleGenerator(data, domain, createNewOn, status, metaStatus);
136    return Py_BuildValue("NNO", WrapNewOrange(egen, type), encodeStatus(status), encodeStatus(metaStatus));
137  PyCATCH
138}
139
140
141
142
143int pt_ExampleGenerator(PyObject *args, void *egen);
144
145void tabDelim_writeDomain(FILE *, PDomain, bool autodetect, char delim = '\t', bool listDiscreteValues = true);
146void tabDelim_writeExamples(FILE *, PExampleGenerator, char delim = '\t', const char *DK = NULL, const char *DC = NULL);
147
148
149FILE *openWReplacedExtension(const char *filename, const char *extension, const char *oldExtension)
150{
151  const char *newname = replaceExtension(filename, extension, oldExtension);
152  FILE *ostr = fopen(newname, "wt");
153  if (!ostr)
154    PyErr_Format(PyExc_SystemError, "cannot open file '%s'", newname);
155  mldelete const_cast<char *>(newname);
156  return ostr;
157}
158
159   
160FILE *openExtended(const char *filename, const char *defaultExtension)
161{
162  const char *extension = getExtension(filename);
163  const char *extended = extension ? filename : replaceExtension(filename, defaultExtension, NULL);
164  FILE *ostr = fopen(extended, "wt");
165  if (!ostr)
166    PyErr_Format(PyExc_SystemError, "cannot open file '%s'", extended);
167  if (!extension)
168    mldelete const_cast<char *>(extended);
169  return ostr;
170}
171
172
173int getStringIfExists(PyObject *keyws, const char *name, char *&res)
174{
175  PyObject *ldv = PyDict_GetItemString(keyws, name);
176  if (ldv) {
177    if (!PyString_Check(ldv)) {
178      PyErr_Format(PyExc_TypeError, "string value expected for '%s'", name);
179      return -1;
180    }
181   
182    res = PyString_AsString(ldv);
183    return 0;
184  }
185
186  return 1;
187}
188
189
190bool readUndefinedSpecs(PyObject *keyws, char *&DK, char *&DC)
191{
192  if (keyws) {
193    int res;
194
195    char *tmp;
196    res = getStringIfExists(keyws, "NA", tmp);
197    if (res == -1)
198      return false;
199    if (!res)
200      DK = DC = tmp;
201
202    res = getStringIfExists(keyws, "DC", DC);
203    if (res == -1)
204      return false;
205
206    res = getStringIfExists(keyws, "DK", DK);
207    if (res == -1)
208      return false;
209  }
210
211  return true;
212}
213
214
215PyObject *tabDelimBasedWrite(PyObject *args, PyObject *keyws, const char *defaultExtension, bool skipAttrTypes, char delim, bool listDiscreteValues = true)
216{ PyTRY
217    char *filename;
218    PExampleGenerator gen;
219
220    if (!PyArg_ParseTuple(args, "sO&", &filename, pt_ExampleGenerator, &gen))
221      PYERROR(PyExc_TypeError, "string and example generator expected", PYNULL);
222
223    if (skipAttrTypes && !gen->domain->classVar) {
224      PyErr_Format(PyExc_TypeError, "Format .%s cannot save classless data sets", defaultExtension);
225      return PYNULL;
226    }
227   
228    char *DK = NULL, *DC = NULL;
229    if (!readUndefinedSpecs(keyws, DK, DC))
230      return PYNULL;
231 
232    FILE *ostr = openExtended(filename, defaultExtension);
233    if (!ostr)
234      return PYNULL;
235
236    tabDelim_writeDomain(ostr, gen->domain, skipAttrTypes, delim, listDiscreteValues);
237    tabDelim_writeExamples(ostr, gen, delim, DK, DC);
238    fclose(ostr);
239
240    RETURN_NONE
241  PyCATCH
242}
243
244
245PyObject *saveTabDelimited(PyObject *, PyObject *args, PyObject *keyws) PYARGS(METH_VARARGS | METH_KEYWORDS, "(filename, examples[, list_discrete_values=1]) -> None")
246{
247  bool listDiscrete = true;
248
249  if (keyws) {
250    PyObject *ldv = PyDict_GetItemString(keyws, "list_discrete_values");
251    if (!ldv) {
252        ldv = PyDict_GetItemString(keyws, "listDiscreteValues");
253    }
254    listDiscrete = !ldv || (PyObject_IsTrue(ldv)!=0);
255  }
256
257  return tabDelimBasedWrite(args, keyws, "tab", false, '\t', listDiscrete);
258}
259
260PyObject *saveTxt(PyObject *, PyObject *args, PyObject *keyws) PYARGS(METH_VARARGS | METH_KEYWORDS, "(filename, examples) -> None")
261{
262  return tabDelimBasedWrite(args, keyws, "txt", true, '\t');
263}
264
265
266PyObject *saveCsv(PyObject *, PyObject *args, PyObject *keyws) PYARGS(METH_VARARGS | METH_KEYWORDS, "(filename, examples) -> None")
267{
268  return tabDelimBasedWrite(args, keyws, "csv", true, ',');
269}
270
271
272void c45_writeDomain(FILE *, PDomain);
273void c45_writeExamples(FILE *, PExampleGenerator);
274
275PyObject *saveC45(PyObject *, PyObject *args) PYARGS(METH_VARARGS, "(filename, examples) -> None")
276{ PyTRY
277    char *filename;
278    PExampleGenerator gen;
279
280    if (!PyArg_ParseTuple(args, "sO&", &filename, pt_ExampleGenerator, &gen))
281      PYERROR(PyExc_TypeError, "string and example generator expected", PYNULL)
282 
283    if (!gen->domain->classVar)
284      PYERROR(PyExc_SystemError, "C4.5 file cannot store classless data sets.", PYNULL);
285
286    if (gen->domain->classVar->varType!=TValue::INTVAR)
287      PYERROR(PyExc_SystemError, "Class in C4.5 file must be discrete.", PYNULL);
288
289    const char *oldExtension = getExtension(filename);
290
291    FILE *ostr;
292    ostr = openWReplacedExtension(filename, "names", oldExtension);
293    if (!ostr)
294      return PYNULL;
295    c45_writeDomain(ostr, gen->domain);
296    fclose(ostr);
297
298    ostr = openWReplacedExtension(filename, "data", oldExtension);
299    if (!ostr)
300      return PYNULL;
301    c45_writeExamples(ostr, gen);
302    fclose(ostr);
303
304    RETURN_NONE
305  PyCATCH
306}
307
308
309
310#include "spec_gen.hpp"
311
312
313void basket_writeExamples(FILE *, PExampleGenerator, set<int> &missing);
314void raiseWarning(bool, const char *s);
315
316PyObject *saveBasket(PyObject *, PyObject *args) PYARGS(METH_VARARGS, "(filename, examples) -> None")
317{ PyTRY
318    char *filename;
319    PExampleGenerator gen;
320
321    if (!PyArg_ParseTuple(args, "sO&:saveBasket", &filename, pt_ExampleGenerator, &gen))
322      return PYNULL;
323
324    if (gen->domain->variables->size())
325      PYERROR(PyExc_TypeError, ".basket format can only store meta-attribute values", PYNULL);
326
327    FILE *ostr = openExtended(filename, "basket");
328    if (!ostr)
329      return PYNULL;
330
331    set<int> missing;
332
333    try {
334      basket_writeExamples(ostr, gen, missing);
335    }
336    catch (...) {
337      fclose(ostr);
338      remove(filename);
339      throw;
340    }
341
342    fclose(ostr);
343
344    if (!missing.empty()) {
345      if (missing.size() == 1) {
346        char excbuf[512];
347        snprintf(excbuf, 512, "saveBasket: attribute with id %i was not found in Domain and has not been stored", *(missing.begin()));
348        raiseWarning(false, excbuf);
349      }
350
351      else {
352        string misss;
353        bool comma = false;
354        const_ITERATE(set<int>, mi, missing) {
355          if (comma)
356            misss += ", ";
357          else
358            comma = true;
359
360          char ns[20];
361          sprintf(ns, "%i", (*mi));
362          misss += ns;
363        }
364
365        char *excbuf = mlnew char[misss.length() + 128];
366        sprintf(excbuf, "saveBasket: attributes with ids not found in Domain have not been stored (%s)", misss.c_str());
367        try {
368          raiseWarning(false, excbuf);
369        }
370        catch (...) {
371          mldelete excbuf;
372          throw;
373        }
374
375        mldelete excbuf;
376      }
377    }
378
379    RETURN_NONE
380  PyCATCH
381}
382
383
384#include "lib_io.px"
Note: See TracBrowser for help on using the repository browser.