Changeset 4044:c00b2a7c519c in orange


Ignore:
Timestamp:
08/04/07 12:03:19 (7 years ago)
Author:
janezd <janez.demsar@…>
Branch:
default
Convert:
b2eeea68abb761eb2fba7f246fe6b413f009bd1b
Message:
  • removed retis and assistant file formats
  • data loaders now check the compliance of the existing attributes and report it
  • the user can now specify the degree of compliance for the attribute to be reused
  • TVariable now has a field defaultMetaId, which is set when a TMetaDescriptor with this attribute is created, and used by the data loader
  • TVariable now has a method hasValue
  • entire domains are no longer reused
  • sourceVars, sourceDomain, dontCheckStored, dontStore are now obsolete
  • function Orange_setattrDictionary can now be used to store non-builtin attributes into Orange.dict
Location:
source
Files:
20 edited

Legend:

Unmodified
Added
Removed
  • source/orange/Orange.vcproj

    r3584 r4044  
    6464                OutputFile="$(PYTHON)/lib/site-packages/orange/orange.pyd" 
    6565                AdditionalLibraryDirectories="" 
     66                GenerateDebugInformation="true" 
     67                ProgramDatabaseFile="$(IntDir)/$(ProjectName).pdb" 
    6668            /> 
    6769            <Tool 
     
    181183            > 
    182184            <File 
    183                 RelativePath="assistant.cpp" 
    184                 > 
    185             </File> 
    186             <File 
    187185                RelativePath="assoc.cpp" 
    188186                > 
     
    510508            <File 
    511509                RelativePath="redundancy.cpp" 
    512                 > 
    513             </File> 
    514             <File 
    515                 RelativePath="retisinter.cpp" 
    516510                > 
    517511            </File> 
     
    602596            > 
    603597            <File 
    604                 RelativePath="assistant.hpp" 
    605                 > 
    606             </File> 
    607             <File 
    608598                RelativePath="assoc.hpp" 
    609599                > 
     
    899889            <File 
    900890                RelativePath="relief.hpp" 
    901                 > 
    902             </File> 
    903             <File 
    904                 RelativePath="retisinter.hpp" 
    905891                > 
    906892            </File> 
  • source/orange/basket.cpp

    r3997 r4044  
    132132 
    133133 
    134 TBasketExampleGenerator::TBasketExampleGenerator(const string &datafile, PDomain sd, bool dcs, bool ds) 
     134TBasketExampleGenerator::TBasketExampleGenerator(const string &datafile, PDomain sd, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus) 
    135135: TFileExampleGenerator(datafile, mlnew TDomain()), 
    136   basketFeeder(new TBasketFeeder(sd, dcs, ds)) 
     136  basketFeeder(new TBasketFeeder(sd, false, false)) 
    137137{ 
    138138  basketFeeder->domain = domain; 
  • source/orange/basket.hpp

    r2775 r4044  
    5959  PBasketFeeder basketFeeder; 
    6060 
    61   TBasketExampleGenerator(const string &datafile, PDomain sourceDomain, bool dontCheckStored, bool dontStore); 
     61  TBasketExampleGenerator(const string &datafile, PDomain sourceDomain, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus); 
    6262 
    6363  virtual bool readExample(TFileExampleIteratorData &, TExample &); 
  • source/orange/c45inter.cpp

    r3997 r4044  
    3535TDomainDepot TC45ExampleGenerator::domainDepot; 
    3636 
    37 TC45ExampleGenerator::TC45ExampleGenerator(const string &datafile, const string &domainfile, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore) 
     37TC45ExampleGenerator::TC45ExampleGenerator(const string &datafile, const string &domainfile, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus) 
    3838: TFileExampleGenerator(datafile, PDomain()), 
    3939  skip (mlnew TBoolList()) 
    4040{ // domain needs to be initialized after skip! 
    41   domain = readDomain(domainfile, sourceVars, sourceDomain, dontCheckStored, dontStore); 
     41  domain = readDomain(domainfile, createNewOn, status, metaStatus); 
    4242} 
    4343 
     
    7575 
    7676// Reads the .names file. The format allow using different delimiters, not just those specified by the original format 
    77 PDomain TC45ExampleGenerator::readDomain(const string &stem, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore) 
     77PDomain TC45ExampleGenerator::readDomain(const string &stem, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus) 
    7878{ TFileExampleIteratorData fei(stem); 
    7979   
     
    123123  skip->push_back(false); 
    124124 
    125   if (sourceDomain) { 
    126     if (!domainDepot.checkDomain(sourceDomain.AS(TDomain), &attributeDescriptions, true, NULL)) 
    127       raiseError("given domain does not match the file"); 
    128     else 
    129       return sourceDomain; 
    130   } 
    131  
    132   return domainDepot.prepareDomain(&attributeDescriptions, true, NULL, sourceVars, NULL, dontStore, dontCheckStored); 
     125  return domainDepot.prepareDomain(&attributeDescriptions, true, NULL, createNewOn, status, metaStatus); 
    133126} 
    134127 
  • source/orange/c45inter.hpp

    r1337 r4044  
    3636  PBoolList skip;  //P a boolean list, one element per attribute, denoting which attributes to skip 
    3737 
    38   TC45ExampleGenerator(const string &datafile, const string &domainFile, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore); 
     38  TC45ExampleGenerator(const string &datafile, const string &domainFile, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus); 
    3939  TC45ExampleGenerator(const TC45ExampleGenerator &old); 
    4040 
    4141  virtual bool readExample(TFileExampleIteratorData &, TExample &); 
    4242 
    43   PDomain readDomain(const string &stem, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore); 
     43  PDomain readDomain(const string &stem, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus); 
    4444 
    4545private: 
  • source/orange/cls_misc.cpp

    r2700 r4044  
    175175} 
    176176     
    177  
     177     
     178PyObject *encodeStatus(const vector<int> &status) 
     179{ 
     180  PyObject *pystatus = PyList_New(status.size()); 
     181  int i = 0; 
     182  const_ITERATE(vector<int>, si, status) 
     183    PyList_SetItem(pystatus, i++, PyInt_FromLong(*si)); 
     184  return pystatus; 
     185} 
     186 
     187 
     188PyObject *encodeStatus(const vector<pair<int, int> > &metaStatus)   
     189{ 
     190  PyObject *pymetastatus = PyDict_New(); 
     191  for(vector<pair<int, int> >::const_iterator mii(metaStatus.begin()), mie(metaStatus.end()); mii != mie; mii++) { 
     192    PyObject *id = PyInt_FromLong(mii->first); 
     193    PyObject *status = PyInt_FromLong(mii->second); 
     194    PyDict_SetItem(pymetastatus, id, status); 
     195    Py_DECREF(id); 
     196    Py_DECREF(status); 
     197  } 
     198  return pymetastatus; 
     199} 
     200 
     201     
    178202PyObject *DomainDepot_prepareDomain(PyObject *self, PyObject *args, PyObject *) PYARGS(METH_VARARGS, "(list-of-names[, knownVars[, knownMetas[, dont-store[, dont-check-stored]]]])") 
    179203{ 
     
    182206    PVarList knownVars; 
    183207    TMetaVector knownMetas; 
    184     int dontStore = 0, dontCheckStored = 0; 
     208    int createNewOn = TVariable::Incompatible; 
    185209 
    186210    TDomainDepot::TAttributeDescriptions attributeDescriptions, metaDescriptions; 
    187211    bool hasClass; 
    188212 
    189     if (   !PyArg_ParseTuple(args, "O|O&Oii", &pynames, ccn_VarList, &knownVars, &pymetaVector, &dontStore, &dontCheckStored) 
     213    if (   !PyArg_ParseTuple(args, "O|i:DomainDepot.prepareDomain", &pynames, &createNewOn) 
    190214        || !decodeDescriptors(pynames, attributeDescriptions, metaDescriptions, hasClass)) 
    191215      return PYNULL; 
     
    198222    } 
    199223     
    200     bool domainIsNew; 
    201     int *metaIDs = mlnew int[metaDescriptions.size()]; 
    202     PDomain newDomain = ((TPyDomainDepot *)(self))->domainDepot->prepareDomain(&attributeDescriptions, hasClass, &metaDescriptions, knownVars, &knownMetas, dontStore!=0, dontCheckStored!=0, &domainIsNew, metaIDs); 
    203  
    204     return Py_BuildValue("NNi", WrapOrange(newDomain), codeMetaIDs(metaIDs, metaDescriptions.size()), domainIsNew ? 1 : 0); 
     224    vector<int> status; 
     225    vector<pair<int, int> > metaStatus; 
     226    PDomain newDomain = ((TPyDomainDepot *)(self))->domainDepot->prepareDomain(&attributeDescriptions, hasClass, &metaDescriptions, createNewOn, status, metaStatus); 
     227 
     228    return Py_BuildValue("NNN", WrapOrange(newDomain), encodeStatus(status), encodeStatus(metaStatus)); 
    205229  PyCATCH 
    206230} 
  • source/orange/cls_orange.cpp

    r3997 r4044  
    739739    return res; 
    740740  PyCATCH 
     741} 
     742 
     743 
     744int Orange_setattrDictionary(TPyOrange *self, const char *name, PyObject *args, bool warn) 
     745{ 
     746  PyObject *pyname = PyString_FromString(name); 
     747  int res = Orange_setattrDictionary(self, pyname, args, warn); 
     748  Py_DECREF(pyname); 
     749  return res; 
     750} 
     751 
     752int Orange_setattrDictionary(TPyOrange *self, PyObject* pyname, PyObject *args, bool warn) 
     753{ PyTRY 
     754    char *name = PyString_AsString(pyname); 
     755    if (args) { 
     756      /* Issue a warning unless name the name is in 'recognized_list' in some of the ancestors 
     757         or the instance's class only derived from some Orange's class, but is written in Python */ 
     758      if (warn && PyOrange_CheckType(self->ob_type)) { 
     759        char **recognized = NULL; 
     760        for(PyTypeObject *otype = self->ob_type; otype && (!recognized || !*recognized); otype = otype->tp_base) { 
     761          recognized = PyOrange_CheckType(otype) ? ((TOrangeType *)otype)->ot_recognizedattributes : NULL; 
     762          if (recognized) 
     763            for(; *recognized && strcmp(*recognized, name); recognized++); 
     764        } 
     765 
     766        if (!recognized || !*recognized) { 
     767          char sbuf[255]; 
     768          sprintf(sbuf, "'%s' is not a builtin attribute of '%s'", name, self->ob_type->tp_name); 
     769          if (PyErr_Warn(PyExc_OrangeAttributeWarning, sbuf)) 
     770            return -1; 
     771        } 
     772      } 
     773 
     774      if (!self->orange_dict) 
     775        self->orange_dict = PyOrange_DictProxy_New(self); 
     776 
     777      return PyDict_SetItem(self->orange_dict, pyname, args); 
     778    } 
     779    else { 
     780      if (self->orange_dict) 
     781        return PyDict_DelItem(self->orange_dict, pyname); 
     782      else { 
     783        PyErr_Format(PyExc_AttributeError, "instance of '%s' has no attribute '%s'", self->ob_type->tp_name, name); 
     784        return -1; 
     785      } 
     786    } 
     787  PyCATCH_1 
    741788} 
    742789 
     
    769816     
    770817    // Use instance's dictionary 
    771  
    772     char *name = PyString_AsString(pyname); 
    773     if (args) { 
    774       /* Issue a warning unless name the name is in 'recognized_list' in some of the ancestors 
    775          or the instance's class only derived from some Orange's class, but is written in Python */ 
    776       if (warn && PyOrange_CheckType(self->ob_type)) { 
    777         char **recognized = NULL; 
    778         for(PyTypeObject *otype = self->ob_type; otype && (!recognized || !*recognized); otype = otype->tp_base) { 
    779           recognized = PyOrange_CheckType(otype) ? ((TOrangeType *)otype)->ot_recognizedattributes : NULL; 
    780           if (recognized) 
    781             for(; *recognized && strcmp(*recognized, name); recognized++); 
    782         } 
    783  
    784         if (!recognized || !*recognized) { 
    785           char sbuf[255]; 
    786           sprintf(sbuf, "'%s' is not a builtin attribute of '%s'", name, self->ob_type->tp_name); 
    787           if (PyErr_Warn(PyExc_OrangeAttributeWarning, sbuf)) 
    788             return -1; 
    789         } 
    790       } 
    791  
    792       if (!self->orange_dict) 
    793         self->orange_dict = PyOrange_DictProxy_New(self); 
    794  
    795       return PyDict_SetItem(self->orange_dict, pyname, args); 
    796     } 
    797     else { 
    798       if (self->orange_dict) 
    799         return PyDict_DelItem(self->orange_dict, pyname); 
    800       else { 
    801         PyErr_Format(PyExc_AttributeError, "instance of '%s' has no attribute '%s'", self->ob_type->tp_name, name); 
    802         return -1; 
    803       } 
    804     } 
     818    return Orange_setattrDictionary(self, pyname, args, warn); 
     819     
    805820  PyCATCH_1 
    806821} 
  • source/orange/cls_orange.hpp

    r3997 r4044  
    3737ORANGE_API int Orange_setattr1(TPyOrange *self, PyObject *pyname, PyObject *args); 
    3838 
     39int Orange_setattrDictionary(TPyOrange *self, const char *name, PyObject *args, bool warn); 
     40int Orange_setattrDictionary(TPyOrange *self, PyObject *pyname, PyObject *args, bool warn); 
     41 
    3942ORANGE_API PyObject *packOrangeDictionary(PyObject *self); 
    4043/* Never tested!  
  • source/orange/domaindepot.cpp

    r3997 r4044  
    272272 
    273273PDomain TDomainDepot::prepareDomain(TAttributeDescriptions *attributes, bool hasClass, 
    274                                     TAttributeDescriptions *metas, PVarList knownVars, 
    275                                     const TMetaVector *knownMetas, 
    276                                     const bool dontStore, const bool dontCheckStored, 
    277                                     bool *domainIsNew, int *metaIDs) 
     274                                    TAttributeDescriptions *metas, const int createNewOn, 
     275                                    vector<int> &status, vector<pair<int, int> > &metaStatus) 
    278276{  
    279   if (!dontCheckStored) 
    280     ITERATE(list<TDomain *>, kdi, knownDomains) 
    281       if (checkDomain(*kdi, attributes, hasClass, metas, metaIDs)) { 
    282         if (domainIsNew) 
    283           *domainIsNew = false; 
    284         return *kdi; 
    285       } 
    286  
     277  int tStatus; 
     278 
     279  status.clear(); 
    287280  TVarList attrList; 
    288   int foo; 
    289281  PITERATE(TAttributeDescriptions, ai, attributes) { 
    290     PVariable newvar = makeVariable(*ai, foo, knownVars, knownMetas, false, false); 
    291     if ((*ai).ordered) 
    292       newvar->ordered = true; 
    293     attrList.push_back(newvar); 
     282    attrList.push_back(makeVariable(*ai, tStatus, createNewOn)); 
     283    status.push_back(tStatus); 
    294284  } 
    295285 
    296286  PDomain newDomain; 
    297  
    298287  PVariable classVar; 
    299288  if (hasClass) { 
     
    304293  newDomain = mlnew TDomain(classVar, attrList); 
    305294 
     295  metaStatus.clear(); 
    306296  if (metas) 
    307297    PITERATE(TAttributeDescriptions, mi, metas) { 
    308       int id; 
    309       PVariable var = makeVariable(*mi, id, knownVars, knownMetas, false, true); 
     298      PVariable var = makeVariable(*mi, tStatus, createNewOn); 
     299      int id = var->defaultMetaId; 
    310300      if (!id) 
    311301        id = getMetaID(); 
    312302      newDomain->metas.push_back(TMetaDescriptor(id, var)); 
    313       if (metaIDs) 
    314         *(metaIDs++) = id; 
     303      metaStatus.push_back(make_pair(id, tStatus)); 
    315304    } 
    316305     
    317   if (domainIsNew) 
    318     *domainIsNew = true; 
    319  
    320   if (!dontStore) { 
    321     newDomain->destroyNotifiers.push_back(TDomain::TDestroyNotification(&TDomainDepot::destroyNotifier, this)); 
    322     knownDomains.push_front(newDomain.getUnwrappedPtr()); 
    323   } 
    324  
    325306  return newDomain; 
    326307} 
     
    380361} 
    381362 
    382 PVariable TDomainDepot::createVariable(TAttributeDescription &desc) 
    383 { 
    384   TVariable *existing = TVariable::getExisting(desc.name, desc.varType, &desc.fixedOrderValues, &desc.values); 
    385   if (existing) 
    386     return existing; 
    387  
     363PVariable TDomainDepot::makeVariable(TAttributeDescription &desc, int &status, const int &createNewOn) 
     364{ 
     365  PVariable var = TVariable::make(desc.name, desc.varType, &desc.fixedOrderValues, &desc.values, createNewOn, &status); 
    388366   
    389   switch (desc.varType) { 
    390     case TValue::INTVAR: { 
    391       TStringList *values = mlnew TStringList(desc.fixedOrderValues); 
    392       PVariable var = mlnew TEnumVariable(desc.name, values); 
    393       augmentVariableValues(var, desc); 
    394       return var; 
    395     } 
    396  
    397     case TValue::FLOATVAR: 
    398       return mlnew TFloatVariable(desc.name); 
    399  
    400     case STRINGVAR: 
    401       return mlnew TStringVariable(desc.name); 
    402  
    403     case PYTHONVAR: 
    404       return createVariable_Python(desc.typeDeclaration, desc.name); 
    405   } 
    406  
    407   if (desc.varType==-1) 
    408     ::raiseErrorWho("makeVariable", "unknown type for attribute '%s'", desc.name.c_str()); 
    409  
    410   return (TVariable *)NULL; 
    411 } 
    412  
    413  
    414 PVariable TDomainDepot::makeVariable(TAttributeDescription &desc, int &id, PVarList knownVars, const TMetaVector *metas, bool dontCreateNew, bool preferMetas) 
    415 {  
    416   PVariable var; 
     367  if (!var) { 
     368    if (desc.varType == PYTHONVAR) { 
     369      var = createVariable_Python(desc.typeDeclaration, desc.name); 
     370      status = TVariable::NotFound; 
     371    } 
     372 
     373    if (!var) 
     374      ::raiseErrorWho("makeVariable", "unknown type for attribute '%s'", desc.name.c_str()); 
     375  } 
    417376   
    418   if (!preferMetas && knownVars) 
    419     const_PITERATE(TVarList, vi, knownVars) 
    420       if (   ((*vi)->name==desc.name) 
    421           && (    (desc.varType==-1) 
    422                || (desc.varType==STRINGVAR) && (*vi).is_derived_from(TStringVariable) 
    423                || ((*vi)->varType==desc.varType) 
    424              ) 
    425           && ((desc.varType!=PYTHONVAR) || pythonDeclarationMatches(desc.typeDeclaration, *vi)) 
    426           && checkValueOrder(*vi, desc) 
    427          ) { 
    428         id = 0; 
    429         var = *vi; 
    430       } 
    431  
    432   if (!var && metas) 
    433     const_PITERATE(TMetaVector, mi, metas) 
    434       if (   ((*mi).variable->name == desc.name) 
    435           && (    (desc.varType == -1) 
    436                || (desc.varType==STRINGVAR) && (*mi).variable.is_derived_from(TStringVariable) 
    437                || ((*mi).variable->varType==desc.varType) 
    438              ) 
    439           && ((desc.varType!=PYTHONVAR) || pythonDeclarationMatches(desc.typeDeclaration, (*mi).variable)) 
    440           && checkValueOrder((*mi).variable, desc) 
    441          ) { 
    442         id = (*mi).id; 
    443         var = (*mi).variable; 
    444       } 
    445  
    446   if (!var && preferMetas && knownVars) 
    447     const_PITERATE(TVarList, vi, knownVars) 
    448       if (   ((*vi)->name==desc.name) 
    449           && (    (desc.varType==-1) 
    450                || (desc.varType==STRINGVAR) && (*vi).is_derived_from(TStringVariable) 
    451                || ((*vi)->varType==desc.varType) 
    452              ) 
    453           && ((desc.varType!=PYTHONVAR) || pythonDeclarationMatches(desc.typeDeclaration, *vi)) 
    454           && checkValueOrder(*vi, desc) 
    455          ) { 
    456         id = 0; 
    457         var = *vi; 
    458       } 
    459  
    460   if (var) {   
    461     augmentVariableValues(var, desc); 
    462     return var; 
    463   } 
    464    
    465   id = 0; 
    466   return dontCreateNew ? PVariable() : createVariable(desc); 
    467 } 
     377  if (var && desc.ordered) 
     378    var->ordered = true; 
     379     
     380  return var; 
     381} 
     382 
     383 
  • source/orange/domaindepot.hpp

    r3997 r4044  
    7474 
    7575  PDomain prepareDomain(TAttributeDescriptions *attributes, bool hasClass, 
    76                         TAttributeDescriptions *metas, PVarList knownVars, const TMetaVector *knownMetas, 
    77                         const bool dontStore, const bool dontCheckStored, 
    78                         bool *domainIsNew = NULL, int *metaIDs = NULL); 
     76                        TAttributeDescriptions *metas, const int createNewOn, 
     77                        vector<int> &status, vector<pair<int, int> > &metaStatus); 
    7978 
    8079  static void destroyNotifier(TDomain *domain, void *); 
    8180 
    82   /* Creates a variable with given name and type. */ 
    83   static PVariable createVariable(TAttributeDescription &); 
    8481  static PVariable createVariable_Python(const string &typeDeclaration, const string &name); 
    85  
    86   /* Tries to find a variable the given name and type in knownVars or metaVector. 
    87      Any of these (or both) can be omitted. If the variable is found in metaVector, 
    88      the id is set as well; if not, id is set to 0. If the variable is not found, 
    89      a new one is created unless dontCreateNew is set to false. */ 
    90   static PVariable makeVariable(TAttributeDescription &, int &id, PVarList knownVars, const TMetaVector * = NULL, bool dontCreateNew = false, bool preferMetas = false); 
     82  static PVariable makeVariable(TAttributeDescription &desc, int &status, const int &createNewOn = TVariable::Incompatible); 
    9183 
    9284 
  • source/orange/functions.cpp

    r1602 r4044  
    3434#include "tabdelim.hpp" 
    3535#include "c45inter.hpp" 
    36 #include "retisinter.hpp" 
    3736#include "table.hpp" 
    3837#include "filter.hpp" 
  • source/orange/lib_io.cpp

    r3284 r4044  
    3737PDomain knownDomain(PyObject *keywords); // ibid 
    3838 
     39PyObject *encodeStatus(const vector<int> &Status);  // in cls_misc.cpp 
     40PyObject *encodeStatus(const vector<pair<int, int> > &metaStatus); 
     41 
    3942/* ************ FILE EXAMPLE GENERATORS ************ */ 
    4043 
     
    4447#include "tabdelim.hpp" 
    4548#include "c45inter.hpp" 
    46 #include "retisinter.hpp" 
    47 #include "assistant.hpp" 
    4849#include "basket.hpp" 
    4950 
     
    5960 
    6061 
    61 NO_PICKLE(AssistantExampleGenerator) 
    6262NO_PICKLE(BasketExampleGenerator) 
    6363NO_PICKLE(C45ExampleGenerator) 
    6464NO_PICKLE(FileExampleGenerator) 
    65 NO_PICKLE(RetisExampleGenerator) 
    6665NO_PICKLE(TabDelimExampleGenerator) 
    6766NO_PICKLE(BasketFeeder) 
     
    6968BASED_ON(BasketFeeder, Orange) 
    7069 
     70 
     71 
     72 
    7173PyObject *TabDelimExampleGenerator_new(PyTypeObject *type, PyObject *args, PyObject *keywords) BASED_ON(FileExampleGenerator, "(examples[, use=domain|varlist])") 
    7274{ PyTRY 
    7375    char *fileName; 
    74     if (!PyArg_ParseTuple(args, "s", &fileName)) 
    75       PYERROR(PyExc_TypeError, "TabDelimExampleGenerator expects a string argument", PYNULL) 
     76    int createNewOn = TVariable::Incompatible; 
     77    if (!PyArg_ParseTuple(args, "s|i:TabDelimExampleGenerator.__new__", &fileName, &createNewOn)) 
     78      return NULL; 
    7679 
    7780    string name(fileName), b, a; 
    7881    if (!divDot(name, b, a)) 
    7982      name+=".tab"; 
    80  
    81     return WrapNewOrange(mlnew TTabDelimExampleGenerator(name, false, false, knownVars(keywords), knownMetas(keywords), knownDomain(keywords), false, false), type); 
     83     
     84    vector<int> status; 
     85    vector<pair<int, int> > metaStatus; 
     86    TExampleGenerator *egen = mlnew TTabDelimExampleGenerator(name, false, false, createNewOn, status, metaStatus); 
     87    return Py_BuildValue("NNN", WrapNewOrange(egen, type), encodeStatus(status), encodeStatus(metaStatus)); 
    8288  PyCATCH 
    8389} 
     
    8793{ PyTRY 
    8894    char *fileName; 
    89     if (!PyArg_ParseTuple(args, "s", &fileName)) 
    90       PYERROR(PyExc_TypeError, "BasketExampleGenerator expects a string argument", PYNULL) 
     95    int createNewOn = TVariable::Incompatible; 
     96    if (!PyArg_ParseTuple(args, "s|i:BasketExampleGenerator.__new__", &fileName, &createNewOn)) 
     97      return NULL; 
    9198 
    9299    string name(fileName), b, a; 
     
    94101      name+=".basket"; 
    95102 
    96     return WrapNewOrange(mlnew TBasketExampleGenerator(name, knownDomain(keywords), false, false), type); 
     103    vector<int> status; 
     104    vector<pair<int, int> > metaStatus; 
     105    TExampleGenerator *egen = mlnew TBasketExampleGenerator(name, PDomain(), createNewOn, status, metaStatus); 
     106    return Py_BuildValue("NNN", WrapNewOrange(egen, type), encodeStatus(status), encodeStatus(metaStatus)); 
    97107  PyCATCH 
    98108} 
     
    107117 
    108118 
    109 PyObject *RetisExampleGenerator_new(PyTypeObject *type, PyObject *args, PyObject *keywords) BASED_ON(FileExampleGenerator, "(examples[, use=domain|varlist])") 
     119 
     120PyObject *C45ExampleGenerator_new(PyTypeObject *type, PyObject *args, PyObject *keywords) BASED_ON(FileExampleGenerator, "(examples[, use=domain|varlist])") 
    110121{ PyTRY 
    111122    char *stem; 
    112     if (!PyArg_ParseTuple(args, "s", &stem)) 
    113       PYERROR(PyExc_TypeError, "RetisExampleGenerator expects a string argument", PYNULL) 
    114      
    115     string domain, data; 
    116     string b, a; 
    117     if (divDot(stem, b, a)) 
    118       { data=stem; domain=b+".rdo"; } 
    119     else 
    120       { data=string(stem)+".rda"; domain=string(stem)+".rdo"; } 
    121        
    122     return WrapNewOrange(mlnew TRetisExampleGenerator(data, domain, knownVars(keywords), knownDomain(keywords), false, false), type); 
    123   PyCATCH 
    124 } 
    125  
    126  
    127 PyObject *C45ExampleGenerator_new(PyTypeObject *type, PyObject *args, PyObject *keywords) BASED_ON(FileExampleGenerator, "(examples[, use=domain|varlist])") 
    128 { PyTRY 
    129     char *stem; 
    130     if (!PyArg_ParseTuple(args, "s", &stem)) 
    131       PYERROR(PyExc_TypeError, "C45ExampleGenerator expects a string argument", PYNULL) 
     123    int createNewOn = TVariable::Incompatible; 
     124    if (!PyArg_ParseTuple(args, "s|i:C45ExampleGenerator.__new__", &stem, &createNewOn)) 
     125      return NULL; 
    132126 
    133127    string domain, data; 
     
    138132      { data=string(stem)+".data"; domain=string(stem)+".names"; } 
    139133 
    140     return WrapNewOrange(mlnew TC45ExampleGenerator(data, domain, knownVars(keywords), knownDomain(keywords), false, false), type); 
    141   PyCATCH 
    142 } 
    143  
    144  
    145 PyObject *AssistantExampleGenerator_new(PyTypeObject *type, PyObject *args, PyObject *keywords) BASED_ON(FileExampleGenerator, "(examples[, use=domain|varlist])") 
    146 { PyTRY 
    147     char *stem; 
    148     if (!PyArg_ParseTuple(args, "s", &stem)) 
    149       PYERROR(PyExc_TypeError, "AssistantExampleGenerator expects a string argument", PYNULL) 
    150  
    151     string domain, data; 
    152     if (strlen(stem)<=4) // we guess this is the xxxx part of ASDAxxxx.DAT 
    153       { domain="ASDO"+string(stem)+".DAT"; data="ASDA"+string(stem)+".DAT"; } 
    154     else if (strncmp(stem, "ASDA", 4)==0) 
    155       { domain="ASDO"+string(stem+4)+".DAT"; data=string(stem); } 
    156     else if (strncmp(stem, "ASDO", 4)==0) 
    157       { domain=string(stem); data="ASDA"+string(stem+4)+".DAT"; } 
    158     else // this is a longer name, but starting with ASDA 
    159       { domain="ASDO"+string(stem+4); data=string(stem); } 
    160  
    161     return WrapNewOrange(mlnew TAssistantExampleGenerator(data, domain, knownVars(keywords), knownDomain(keywords), false, false), type); 
    162   PyCATCH 
    163 } 
     134    vector<int> status; 
     135    vector<pair<int, int> > metaStatus; 
     136    TExampleGenerator *egen = mlnew TC45ExampleGenerator(data, domain, createNewOn, status, metaStatus); 
     137    return Py_BuildValue("NNO", WrapNewOrange(egen, type), encodeStatus(status), encodeStatus(metaStatus)); 
     138  PyCATCH 
     139} 
     140 
    164141 
    165142 
     
    328305 
    329306 
    330 void assistant_writeDomain(FILE *, PDomain); 
    331 void assistant_writeExamples(FILE *, PExampleGenerator); 
    332  
    333 PyObject *saveAssistant(PyObject *, PyObject *args) PYARGS(METH_VARARGS, "(filename, examples) -> None") 
    334 { PyTRY 
    335     char *filename; 
    336     PExampleGenerator gen; 
    337  
    338     if (!PyArg_ParseTuple(args, "sO&", &filename, pt_ExampleGenerator, &gen)) 
    339       PYERROR(PyExc_TypeError, "string and example generator expected", PYNULL) 
    340    
    341     if (!gen->domain->classVar) 
    342       PYERROR(PyExc_SystemError, "Assistant file cannot store classless data sets.", PYNULL); 
    343  
    344     if (gen->domain->classVar->varType!=TValue::INTVAR) 
    345       PYERROR(PyExc_SystemError, "Class in Assistant format must be discrete.", PYNULL); 
    346  
    347     FILE *ostr = fopen(("asdo" + string(filename)+".dat").c_str(), "wt"); 
    348     if (!ostr) { 
    349       PyErr_Format(PyExc_SystemError, "cannot open file 'asdo%s.dat'", filename); 
    350       return PYNULL; 
    351     } 
    352  
    353     assistant_writeDomain(ostr, gen->domain); 
    354     fclose(ostr); 
    355  
    356  
    357     ostr = fopen(("asda" + string(filename)+".dat").c_str(), "wt"); 
    358     if (!ostr) { 
    359       PyErr_Format(PyExc_SystemError, "cannot open file 'asda%s.dat'", filename); 
    360       return PYNULL; 
    361     } 
    362  
    363     assistant_writeExamples(ostr, gen); 
    364     fclose(ostr); 
    365  
    366     RETURN_NONE 
    367   PyCATCH 
    368 } 
    369  
    370  
    371  
    372 void retis_writeDomain(FILE *, PDomain); 
    373 void retis_writeExamples(FILE *, PExampleGenerator); 
    374307 
    375308#include "spec_gen.hpp" 
    376  
    377 PyObject *saveRetis(PyObject *, PyObject *args) PYARGS(METH_VARARGS, "(filename, examples) -> None") 
    378 { PyTRY 
    379     char *filename; 
    380     PExampleGenerator gen; 
    381  
    382     if (!PyArg_ParseTuple(args, "sO&", &filename, pt_ExampleGenerator, &gen)) 
    383       PYERROR(PyExc_TypeError, "string and example generator expected", PYNULL) 
    384    
    385     if (!gen->domain->classVar) 
    386       PYERROR(PyExc_SystemError, "Retis file cannot store classless data sets.", PYNULL); 
    387  
    388     if (gen->domain->classVar->varType!=TValue::FLOATVAR) 
    389       PYERROR(PyExc_SystemError, "Class in Retis domain must be continuous.", PYNULL); 
    390  
    391     TFilter_hasSpecial tfhs(true); 
    392     PExampleGenerator filtered=mlnew TFilteredGenerator(PFilter(tfhs), gen); 
    393     PExampleGenerator wnounk=mlnew TExampleTable(filtered); 
    394  
    395     FILE *ostr = fopen((string(filename)+".rdo").c_str(), "wt"); 
    396     if (!ostr) { 
    397       PyErr_Format(PyExc_SystemError, "cannot open file '%s.rdo'", filename); 
    398       return PYNULL; 
    399     } 
    400  
    401     retis_writeDomain(ostr, wnounk->domain); 
    402     fclose(ostr); 
    403  
    404     ostr = openExtended(filename, "rda"); 
    405     if (!ostr) 
    406       return PYNULL; 
    407  
    408     c45_writeExamples(ostr, wnounk); 
    409     fclose(ostr); 
    410  
    411     RETURN_NONE 
    412   PyCATCH 
    413 } 
    414309 
    415310 
  • source/orange/lib_kernel.cpp

    r3997 r4044  
    222222C_NAMED(FloatVariable, Variable, "([name=, startValue=, endValue=, stepValue=, distributed=, getValueFrom=])") 
    223223 
     224 
     225PyObject *MakeStatus() 
     226{ PyObject *mt=PyModule_New("MakeStatus"); 
     227  PyModule_AddIntConstant(mt, "OK", (int)TVariable::OK); 
     228  PyModule_AddIntConstant(mt, "MissingValues", (int)TVariable::MissingValues); 
     229  PyModule_AddIntConstant(mt, "NoRecognizedValues", (int)TVariable::NoRecognizedValues); 
     230  PyModule_AddIntConstant(mt, "Incompatible", (int)TVariable::Incompatible); 
     231  PyModule_AddIntConstant(mt, "NotFound", (int)TVariable::NotFound); 
     232  return mt; 
     233} 
     234 
     235PYCLASSCONSTANT(Variable, MakeStatus, MakeStatus()) 
     236 
     237 
    224238PyObject *Variable_getExisting(PyObject *, PyObject *args) PYARGS(METH_VARARGS | METH_STATIC, "(name, type[, fixedOrderValues[, otherValues]]) -> Variable | None") 
    225239{ 
     
    17931807 
    17941808 
    1795 TExampleTable         *readTable(char *filename, PVarList knownVars, TMetaVector *knownMetas, PDomain knownDomain, bool dontCheckStored, bool dontStore, const char *DK, const char *DC, bool noExcOnUnknown = false, bool noCodedDiscrete = false, bool noClass = false); 
    1796 TExampleGenerator *readGenerator(char *filename, PVarList knownVars, TMetaVector *knownMetas, PDomain knownDomain, bool dontCheckStored, bool dontStore, const char *DK, const char *DC, bool noExcOnUnknown = false, bool noCodedDiscrete = false, bool noClass = false); 
     1809TExampleTable         *readTable(char *filename, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus, const char *DK, const char *DC, bool noExcOnUnknown = false, bool noCodedDiscrete = false, bool noClass = false); 
     1810TExampleGenerator *readGenerator(char *filename, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus, const char *DK, const char *DC, bool noExcOnUnknown = false, bool noCodedDiscrete = false, bool noClass = false); 
     1811 
     1812PyObject *encodeStatus(const vector<int> &status); 
     1813PyObject *encodeStatus(const vector<pair<int, int> > &metaStatus); 
     1814 
     1815char *obsoleteFlags[] = {"dontCheckStored", "dontStore", "use", "useMetas", "domain", 0 }; 
    17971816 
    17981817PyObject *loadDataFromFile(PyTypeObject *type, char *filename, PyObject *argstuple, PyObject *keywords, bool generatorOnly = false) 
     
    18101829  PyErr_Clear(); 
    18111830 
    1812   bool dontCheckStored = hasFlag(keywords, "dontCheckStored") ? readBoolFlag(keywords, "dontCheckStored") : hasFlag(keywords, "use"); 
     1831  for(char * const *of = obsoleteFlags; *of; of++) 
     1832    if (hasFlag(keywords, *of)) 
     1833      raiseWarning("flag '%s' is not supported any longer", *of); 
     1834 
     1835  int createNewOn = TVariable::Incompatible; 
     1836  if (hasFlag(keywords, "createNewOn")) 
     1837    convertFromPython(PyDict_GetItemString(keywords, "createNewOn"), createNewOn); 
     1838 
    18131839  char *DK = NULL, *DC = NULL; 
    18141840  if (!readUndefinedSpecs(keywords, DK, DC)) 
     
    18161842 
    18171843  char *errs = NULL; 
     1844  vector<int> status; 
     1845  vector<pair<int, int> > metaStatus; 
    18181846  try { 
    18191847    TExampleGenerator *generator =  
    1820       generatorOnly ? readGenerator(filename, knownVars(keywords), knownMetas(keywords), knownDomain(keywords), dontCheckStored, readBoolFlag(keywords, "dontStore"), DK, DC, false, readBoolFlag(keywords, "noCodedDiscrete"), readBoolFlag(keywords, "noClass")) 
    1821                     : readTable(filename, knownVars(keywords), knownMetas(keywords), knownDomain(keywords), dontCheckStored, readBoolFlag(keywords, "dontStore"), DK, DC, false, readBoolFlag(keywords, "noCodedDiscrete"), readBoolFlag(keywords, "noClass")); 
    1822     if (generator) 
    1823       return WrapNewOrange(generator, type); 
     1848      generatorOnly ? readGenerator(filename, createNewOn, status, metaStatus, DK, DC, false, readBoolFlag(keywords, "noCodedDiscrete"), readBoolFlag(keywords, "noClass")) 
     1849                    : readTable(filename, createNewOn, status, metaStatus, DK, DC, false, readBoolFlag(keywords, "noCodedDiscrete"), readBoolFlag(keywords, "noClass")); 
     1850    if (generator) { 
     1851      PyObject *pygen = WrapNewOrange(generator, type); 
     1852      PyObject *pystatus = encodeStatus(status); 
     1853      PyObject *pymetastatus = encodeStatus(metaStatus); 
     1854      Orange_setattrDictionary((TPyOrange *)pygen, "attributeLoadStatus", pystatus, false); 
     1855      Orange_setattrDictionary((TPyOrange *)pygen, "metaAttributeLoadStatus", pymetastatus, false); 
     1856      Py_DECREF(pystatus); 
     1857      Py_DECREF(pymetastatus); 
     1858      return pygen; 
     1859    } 
    18241860  } 
    18251861  catch (mlexception err) {  
     
    23212357PyObject *saveTxt(PyObject *, PyObject *args, PyObject *keyws); 
    23222358PyObject *saveCsv(PyObject *, PyObject *args, PyObject *keyws); 
    2323 PyObject *saveRetis(PyObject *, PyObject *args); 
    2324 PyObject *saveAssistant(PyObject *, PyObject *args); 
    23252359PyObject *saveBasket(PyObject *, PyObject *args); 
    23262360 
     
    23652399  else if (!strcmp(extension, ".names") || !strcmp(extension, ".data") || !strcmp(extension, ".test")) 
    23662400    res = saveC45(NULL, newargs); 
    2367   else if (!strcmp(extension, ".rda") || !strcmp(extension, ".rdo")) 
    2368     res = saveRetis(NULL, newargs); 
    2369   else if (!strcmp(extension, ".dat")) 
    2370     res = saveAssistant(NULL, newargs); 
    23712401  else if (!strcmp(extension, ".basket")) 
    23722402    res = saveBasket(NULL, newargs); 
     
    26262656 
    26272657 
    2628 CONSTRUCTOR_KEYWORDS(ExampleTable, "domain use useMetas dontCheckStored dontStore filterMetas DC DK NA noClass noCodedDiscrete") 
     2658CONSTRUCTOR_KEYWORDS(ExampleTable, "domain use useMetas dontCheckStored dontStore filterMetas DC DK NA noClass noCodedDiscrete createNewOn") 
    26292659 
    26302660PyObject *ExampleTable_new(PyTypeObject *type, PyObject *argstuple, PyObject *keywords) BASED_ON(ExampleGenerator, "(filename | domain[, examples] | examples)") 
     
    26342664    char *filename = NULL; 
    26352665    if (PyArg_ParseTuple(argstuple, "s", &filename)) 
    2636       return loadDataFromFile(type, filename, argstuple, keywords); 
     2666      return loadDataFromFile(type, filename, argstuple, keywords, false); 
    26372667 
    26382668    PyErr_Clear(); 
  • source/orange/meta.cpp

    r2792 r4044  
    4949  variable(avar), 
    5050  optional(opt) 
    51 {} 
     51{ 
     52  if (!variable->defaultMetaId) 
     53    variable->defaultMetaId = id; 
     54} 
    5255 
    5356 
  • source/orange/readdata.cpp

    r3099 r4044  
    3939#include "tabdelim.hpp" 
    4040#include "c45inter.hpp" 
    41 #include "retisinter.hpp" 
    42 #include "assistant.hpp" 
    4341#include "basket.hpp" 
    4442 
     
    6058 
    6159 
    62 typedef enum {UNKNOWN, TXT, CSV, BASKET, TAB, TSV, C45, RETIS, ASSISTANT, EXCEL} TFileFormats; 
     60typedef enum {UNKNOWN, TXT, CSV, BASKET, TAB, TSV, C45, EXCEL} TFileFormats; 
    6361 
    6462char *fileTypes[][2] = {{"Tab-delimited", "*.tab"}, {"Tab-delimited (simplified)", "*.txt"}, {"Comma-separated", "*.csv"}, 
    65                        {"C45", "*.names"}, {"Retis", "*.rda"}, {"Assistant", "*.dat"}, {"Basket", "*.basket"}, 
     63                       {"C45", "*.names"}, {"Basket", "*.basket"}, 
    6664                       {NULL, NULL}}; 
    6765                        
    68 TExampleGenerator *readGenerator(char *filename, PVarList knownVars, TMetaVector *knownMetas, PDomain knownDomain, bool dontCheckStored, bool dontStore, const char *DK, const char *DC, bool noExcOnUnknown = false, bool noCodedDiscrete = false, bool noClass = false) 
     66TExampleGenerator *readGenerator(char *filename, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus, const char *DK, const char *DC, bool noExcOnUnknown = false, bool noCodedDiscrete = false, bool noClass = false) 
    6967{ char *ext, *hash; 
    7068  if (filename) { 
     
    8886  if (ext) { 
    8987    if (!strcmp(ext, ".txt")) 
    90       return mlnew TTabDelimExampleGenerator(filename, true, false, knownVars, knownMetas, knownDomain, dontCheckStored, dontStore, DK, DC, noCodedDiscrete, noClass); 
     88      return mlnew TTabDelimExampleGenerator(filename, true, false, createNewOn, status, metaStatus, DK, DC, noCodedDiscrete, noClass); 
    9189 
    9290    if (!strcmp(ext, ".csv")) 
    93       return mlnew TTabDelimExampleGenerator(filename, true, true, knownVars, knownMetas, knownDomain, dontCheckStored, dontStore, DK, DC, noCodedDiscrete, noClass); 
     91      return mlnew TTabDelimExampleGenerator(filename, true, true, createNewOn, status, metaStatus, DK, DC, noCodedDiscrete, noClass); 
    9492 
    9593    if (!strcmp(ext, ".tab") || !strcmp(ext, ".tsv")) 
    96       return mlnew TTabDelimExampleGenerator(filename, false, false, knownVars, knownMetas, knownDomain, dontCheckStored, dontStore, DK, DC); 
     94      return mlnew TTabDelimExampleGenerator(filename, false, false, createNewOn, status, metaStatus, DK, DC); 
    9795 
    9896    if (!strcmp(ext, ".basket")) 
    99       return mlnew TBasketExampleGenerator(filename, knownDomain, dontCheckStored, dontStore); 
     97      return mlnew TBasketExampleGenerator(filename, PDomain(), createNewOn, status, metaStatus); 
    10098 
    10199    if (!strcmp(ext, ".data") || !strcmp(ext, ".names") || !strcmp(ext, ".test")) 
    102100      return mlnew TC45ExampleGenerator(strcmp(ext, ".names") ? filename : string(filename, ext) + ".data", 
    103101                                                         string(filename, ext) + ".names", 
    104                                                          knownVars, knownDomain, dontCheckStored, dontStore); 
    105  
    106     if (!strcmp(ext, ".rda") || !strcmp(ext, ".rdo")) 
    107       return mlnew TRetisExampleGenerator(string(filename, ext) + ".rda", 
    108                                                            string(filename, ext) + ".rdo", 
    109                                                            knownVars, knownDomain, dontCheckStored, dontStore); 
    110  
    111     if (!strcmp(ext, ".dat")) { 
    112       char *stem; 
    113       for(stem = ext; (stem!=filename) && (*stem!=':') && (*stem!='\\'); stem--); 
    114       if (stem!=filename) 
    115         stem++; 
    116       if (!strncmp(stem, "asd", 3) || ( (stem[3]!='o') && (stem[4]!='a') )) 
    117         raiseError("invalid assistant filename (it should start with 'asdo' or 'asda')"); 
    118  
    119       stem += 3; 
    120       return mlnew TAssistantExampleGenerator(string(filename, stem) + "a" + string(stem+1, ext),  
    121                                                                string(filename, stem) + "o" + string(stem+1, ext), 
    122                                                                knownVars, knownDomain, dontCheckStored, dontStore); 
    123     } 
    124  
     102                                                         createNewOn, status, metaStatus); 
    125103    #ifdef INCLUDE_EXCEL 
    126104    if ((hash-ext==4) && !strncmp(ext, ".xls", 4)) 
     
    168146  CHECKFF(".tsv", TSV); 
    169147  CHECKFF(".names", C45); 
    170   CHECKFF(".rdo", RETIS); 
    171148 
    172149  #ifdef INCLUDE_EXCEL 
     
    193170 
    194171 
    195   /* Assistant is annoying: if path+stem is given, asd[ao] must be inserted in between */ 
    196   char *stem; 
    197   #ifdef _MSC_VER 
    198   for(stem = filename+strlen(filename); (stem != filename) && (*stem != '\\') && (*stem != ':') && (*stem != '/'); stem--); 
    199   #else 
    200   for(stem = filename+strlen(filename); (stem != filename) && (*stem != '/'); stem--); 
    201   #endif 
    202   if (stem!=filename) 
    203     stem++; 
    204    
    205   if (fileExists(string(filename, stem) + "asdo" + string(stem)+".dat")) 
    206     if (fileFormat != UNKNOWN) 
    207       raiseError("Multiple files with stem '%s' exist; specify the complete file name", filename); 
    208     else 
    209       fileFormat = ASSISTANT; 
    210  
    211  
    212172  string sfilename(filename); 
    213173 
    214174  switch (fileFormat) { 
    215175    case TXT:  
    216       return mlnew TTabDelimExampleGenerator(sfilename+".txt", true, false, knownVars, knownMetas, knownDomain, dontCheckStored, dontStore, DK, DC, noCodedDiscrete, noClass); 
     176      return mlnew TTabDelimExampleGenerator(sfilename+".txt", true, false, createNewOn, status, metaStatus, DK, DC, noCodedDiscrete, noClass); 
    217177 
    218178    case CSV: 
    219       return mlnew TTabDelimExampleGenerator(sfilename+".csv", true, true, knownVars, knownMetas, knownDomain, dontCheckStored, dontStore, DK, DC, noCodedDiscrete, noClass); 
     179      return mlnew TTabDelimExampleGenerator(sfilename+".csv", true, true, createNewOn, status, metaStatus, DK, DC, noCodedDiscrete, noClass); 
    220180 
    221181    case TAB: 
    222       return mlnew TTabDelimExampleGenerator(sfilename+".tab", false, false, knownVars, knownMetas, knownDomain, dontCheckStored, dontStore, DK, DC); 
     182      return mlnew TTabDelimExampleGenerator(sfilename+".tab", false, false, createNewOn, status, metaStatus, DK, DC); 
    223183 
    224184    case TSV: 
    225       return mlnew TTabDelimExampleGenerator(sfilename+".tsv", false, false, knownVars, knownMetas, knownDomain, dontCheckStored, dontStore, DK, DC); 
     185      return mlnew TTabDelimExampleGenerator(sfilename+".tsv", false, false, createNewOn, status, metaStatus, DK, DC); 
    226186 
    227187    case BASKET: 
    228       return mlnew TBasketExampleGenerator(sfilename+".basket", knownDomain, dontCheckStored, dontStore); 
     188      return mlnew TBasketExampleGenerator(sfilename+".basket", PDomain(), createNewOn, status, metaStatus); 
    229189 
    230190    case C45: 
    231       return mlnew TC45ExampleGenerator(sfilename + ".data", sfilename + ".names", knownVars, knownDomain, dontCheckStored, dontStore); 
    232  
    233     case RETIS: 
    234       return mlnew TRetisExampleGenerator(sfilename + ".rda", sfilename + ".rdo", knownVars, knownDomain, dontCheckStored, dontStore); 
    235  
    236     case ASSISTANT: { 
    237       return mlnew TAssistantExampleGenerator(string(filename, stem) + "asda" + string(stem)+".dat", 
    238                                                                string(filename, stem) + "asdo" + string(stem)+".dat", 
    239                                                                knownVars, knownDomain, dontCheckStored, dontStore); 
    240     } 
     191      return mlnew TC45ExampleGenerator(sfilename + ".data", sfilename + ".names", createNewOn, status, metaStatus); 
     192 
    241193 
    242194    #ifdef INCLUDE_EXCEL 
     
    255207 
    256208 
    257 TExampleTable *readTable(char *filename, PVarList knownVars, TMetaVector *knownMetas, PDomain knownDomain, bool dontCheckStored, bool dontStore, const char *DK, const char *DC, bool noExcOnUnknown = false, bool noCodedDiscrete = false, bool noClass = false) 
     209TExampleTable *readTable(char *filename, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus, const char *DK, const char *DC, bool noExcOnUnknown = false, bool noCodedDiscrete = false, bool noClass = false) 
    258210{ 
    259   TExampleGenerator *gen = readGenerator(filename, knownVars, knownMetas, knownDomain, dontCheckStored, dontStore, DK, DC, noExcOnUnknown, noCodedDiscrete, noClass); 
     211  TExampleGenerator *gen = readGenerator(filename, createNewOn, status, metaStatus, DK, DC, noExcOnUnknown, noCodedDiscrete, noClass); 
    260212  if (!gen) 
    261213    return NULL; 
  • source/orange/tabdelim.cpp

    r3997 r4044  
    6464 
    6565 
    66 TTabDelimExampleGenerator::TTabDelimExampleGenerator(const string &afname, bool autoDetect, bool acsv, PVarList sourceVars, TMetaVector *sourceMetas, PDomain sourceDomain, bool dontCheckStored, bool dontStore, const char *aDK, const char *aDC, bool noCodedDiscrete, bool noClass) 
     66TTabDelimExampleGenerator::TTabDelimExampleGenerator(const string &afname, bool autoDetect, bool acsv, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus, const char *aDK, const char *aDC, bool noCodedDiscrete, bool noClass) 
    6767: TFileExampleGenerator(afname, PDomain()), 
    6868  attributeTypes(mlnew TIntList()), 
     
    7575{  
    7676  // domain needs to be initialized after attributeTypes, DCs, classPos, headerLines 
    77   domain = readDomain(afname, autoDetect, sourceVars, sourceMetas, sourceDomain, dontCheckStored, dontStore, noCodedDiscrete, noClass); 
     77  domain = readDomain(afname, autoDetect, createNewOn, status, metaStatus, noCodedDiscrete, noClass); 
    7878 
    7979  TFileExampleIteratorData fei(afname); 
     
    308308} 
    309309 
    310 PDomain TTabDelimExampleGenerator::readDomain(const string &stem, const bool autoDetect, PVarList sourceVars, TMetaVector *sourceMetas, PDomain sourceDomain, bool dontCheckStored, bool dontStore, bool noCodedDiscrete, bool noClass) 
     310PDomain TTabDelimExampleGenerator::readDomain(const string &stem, const bool autoDetect, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus, bool noCodedDiscrete, bool noClass) 
    311311{  
    312312  // non-NULL when this cannot be tab file (reason given as result) 
     
    367367     
    368368  if (basketPos >= 0) 
    369     basketFeeder = mlnew TBasketFeeder(sourceDomain, dontCheckStored, false); 
     369//    basketFeeder = mlnew TBasketFeeder(sourceDomain, createNewOn == TVariable::OK, false); 
     370    basketFeeder = mlnew TBasketFeeder(PDomain(), createNewOn == TVariable::OK, false); 
    370371     
    371   if (sourceDomain) { 
     372/*  if (sourceDomain) { 
    372373    if (!domainDepot_tab.checkDomain(sourceDomain.AS(TDomain), &attributeDescriptions, classPos >= 0, NULL)) 
    373374      raiseError("given domain does not match the file"); 
     
    377378    return sourceDomain; 
    378379  } 
    379  
    380   int *metaIDs = mlnew int[metaDescriptions.size()]; 
    381   PDomain newDomain = domainDepot_tab.prepareDomain(&attributeDescriptions, classPos>-1, &metaDescriptions, sourceVars, sourceMetas, false, dontCheckStored, NULL, metaIDs); 
    382  
    383   int *mid = metaIDs; 
     380*/ 
     381  PDomain newDomain = domainDepot_tab.prepareDomain(&attributeDescriptions, classPos>-1, &metaDescriptions, createNewOn, status, metaStatus); 
     382 
     383  vector<pair<int, int> >::const_iterator mid(metaStatus.begin()); 
    384384  PITERATE(TIntList, ii, attributeTypes) 
    385385    if (*ii == 1) 
    386       *ii = *(mid++); 
    387  
    388   mldelete metaIDs; 
     386      *ii = mid++ ->first; 
    389387 
    390388  if (basketFeeder) 
  • source/orange/tabdelim.hpp

    r3997 r4044  
    8888 
    8989  TTabDelimExampleGenerator::TTabDelimExampleGenerator(const TTabDelimExampleGenerator &old); 
    90   TTabDelimExampleGenerator(const string &, bool autoDetect, bool csv, PVarList sourceVars = PVarList(), 
    91                             TMetaVector *sourceMetas = NULL, PDomain sourceDomain = PDomain(), 
    92                             bool dontCheckStored = false, bool dontStore = false, 
     90  TTabDelimExampleGenerator(const string &, bool autoDetect, bool csv,  
     91                            const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus, 
    9392                            const char *aDK = NULL, const char *aDC = NULL, bool noCodedDiscrete = false, bool noClass = false); 
    9493  ~TTabDelimExampleGenerator(); 
     
    9998 
    10099  char *mayBeTabFile(const string &stem); 
    101   PDomain readDomain(const string &stem, const bool autoDetect, PVarList sourceVars, TMetaVector *sourceMetas, PDomain sourceDomain, bool dontCheckStored, bool dontStore, bool noCodedDiscrete, bool noClass); 
     100  PDomain readDomain(const string &stem, const bool autoDetect, const int createNewOn, vector<int> &status, vector<pair<int, int> > &metaStatus, bool noCodedDiscrete, bool noClass); 
    102101  void readTxtHeader(const string &stem, TDomainDepot::TAttributeDescriptions &); 
    103102  void readTabHeader(const string &stem, TDomainDepot::TAttributeDescriptions &); 
  • source/orange/vars.cpp

    r4017 r4044  
    4040#include "random.hpp" 
    4141#include "orvector.hpp" 
     42#include "stringvars.hpp" 
    4243 
    4344 
     
    5960const char *putAtBeginning[] = {"no", "none", "absent", "normal", 0}; 
    6061 
    61 TVariable *TVariable::getExisting(const string &name, const int &varType, TStringList *fixedOrderValues, set<string> *values) 
     62TVariable *TVariable::getExisting(const string &name, const int &varType, TStringList *fixedOrderValues, set<string> *values, 
     63                                  const int failOn, int *status) 
    6264{ 
    6365  if ((fixedOrderValues && fixedOrderValues->size() ) && (varType != TValue::INTVAR)) 
    6466    ::raiseErrorWho("Variable", "cannot specify the value list for non-discrete attributes"); 
    6567     
     68  if (failOn == TVariable::OK) { 
     69    if (status) 
     70      *status = TVariable::OK; 
     71    return NULL; 
     72  } 
     73   
     74  vector<pair<TVariable *, int> > candidates; 
     75  TStringList::const_iterator vvi, vve; 
     76   
     77  ITERATE(list<TVariable *>, vi, TVariable::allVariables) { 
     78    if (((*vi)->varType == varType) && ((*vi)->name == name)) { 
     79      int tempStat = TVariable::OK; 
     80 
     81      // non-discrete attributes are always ok, 
     82      // discrete ones need further checking if they have any defined values 
     83      TEnumVariable *evar = dynamic_cast<TEnumVariable *>(*vi); 
     84      if (evar && evar->values->size()) { 
     85       
     86        if (fixedOrderValues && !evar->checkValuesOrder(*fixedOrderValues)) 
     87          tempStat = TVariable::Incompatible; 
     88           
     89        if ((tempStat == TVariable::OK)  
     90            && (values && values->size() || fixedOrderValues && fixedOrderValues->size())) { 
     91          for(vvi = evar->values->begin(), vve = evar->values->end(); 
     92              (vvi != vve) 
     93               && (!values || (values->find(*vvi) == values->end())) 
     94               && (!fixedOrderValues || (find(fixedOrderValues->begin(), fixedOrderValues->end(), *vvi) == fixedOrderValues->end())); 
     95              vvi++); 
     96          if (vvi == vve) 
     97            tempStat = TVariable::NoRecognizedValues; 
     98         } 
     99          
     100         if ((tempStat == TVariable::OK) && fixedOrderValues) { 
     101           for(vvi = fixedOrderValues->begin(), vve = fixedOrderValues->end(); 
     102               (vvi != vve) && evar->hasValue(*vvi); 
     103               vvi++); 
     104           if (vvi != vve) 
     105             tempStat = TVariable::MissingValues; 
     106         } 
     107           
     108         if ((tempStat == TVariable::OK) && values) { 
     109           set<string>::const_iterator vsi(values->begin()), vse(values->end()); 
     110           for(; (vsi != vse) && evar->hasValue(*vsi); vsi++); 
     111           if (vsi != vse) 
     112             tempStat = TVariable::MissingValues; 
     113         } 
     114       } 
     115     
     116      candidates.push_back(make_pair(*vi, tempStat)); 
     117      if (tempStat == TVariable::OK) 
     118        break; 
     119    } 
     120  } 
     121 
    66122  TVariable *var = NULL; 
     123 
     124  int intStatus; 
     125  if (!status) 
     126    status = &intStatus; 
     127  *status = TVariable::NotFound; 
    67128   
    68   ITERATE(list<TVariable *>, vi, TVariable::allVariables) 
    69     if (((*vi)->varType == varType) && ((*vi)->name == name)) { 
    70       var = *vi; 
    71       break; 
    72     } 
     129  const int actFailOn = failOn > TVariable::Incompatible ? TVariable::Incompatible : failOn; 
     130  for(vector<pair<TVariable *, int> >::const_iterator ci(candidates.begin()), ce(candidates.end()); 
     131      ci != ce; ci++) 
     132    if (ci->second < *status) { 
     133      *status = ci->second; 
     134      if (*status < actFailOn) 
     135        var = ci->first; 
     136    } 
     137 
     138  return var; 
     139} 
     140 
     141 
     142TVariable *TVariable::make(const string &name, const int &varType, TStringList *fixedOrderValues, set<string> *values, 
     143                           const int createNewOn, int *status) 
     144{ 
     145  int intStatus; 
     146  if (!status) 
     147    status = &intStatus; 
     148 
     149  TVariable *var; 
     150  if (createNewOn == TVariable::OK) { 
     151    var = NULL; 
     152    *status = TVariable::OK; 
     153  } 
     154  else 
     155    var = getExisting(name, varType, fixedOrderValues, values, createNewOn, status); 
    73156     
     157  if (!var) { 
     158      switch (varType) { 
     159        case TValue::INTVAR: 
     160          var = mlnew TEnumVariable(name); 
     161          break; 
     162 
     163        case TValue::FLOATVAR: 
     164          var = mlnew TFloatVariable(name); 
     165          break; 
     166 
     167        case STRINGVAR: 
     168          var = mlnew TStringVariable(name); 
     169          break; 
     170     } 
     171  } 
     172 
    74173  TEnumVariable *evar = dynamic_cast<TEnumVariable *>(var); 
    75174  if (evar) {  
    76     if (fixedOrderValues) { 
    77       if (!evar->checkValuesOrder(*fixedOrderValues)) 
    78         ::raiseErrorWho("Variable", "a discrete variable named '%s' already exists, but with different order of values", name.c_str()); 
     175    if (fixedOrderValues) 
    79176      const_PITERATE(TStringList, si, fixedOrderValues) 
    80177        evar->addValue(*si); 
    81     } 
    82        
     178   
    83179    if (values) { 
    84180      vector<string> sorted; 
     
    99195  getValueFromLocked(false), 
    100196  DC_value(varType, valueDC), 
    101   DK_value(varType, valueDK) 
     197  DK_value(varType, valueDK), 
     198  defaultMetaId(0) 
    102199{} 
    103200 
     
    109206  getValueFromLocked(false), 
    110207  DC_value(varType, valueDC), 
    111   DK_value(varType, valueDK) 
     208  DK_value(varType, valueDK), 
     209  defaultMetaId(0) 
    112210{ name = aname; }; 
    113211 
     
    331429 
    332430 
     431bool TEnumVariable::hasValue(const string &s) 
     432{ 
     433  if (!valuesTree.empty()) 
     434    return valuesTree.lower_bound(s) != valuesTree.end(); 
     435     
     436  PITERATE(TStringList, vli, values) 
     437    if (*vli == s) 
     438      return true; 
     439       
     440  return false; 
     441} 
     442 
     443 
    333444/*  Converts a value from string representation to TValue by searching for it in value list. 
    334445    If value is not found, it is added to the list if 'autoValues'==true, else exception is raised. */ 
  • source/orange/vars.hpp

    r3997 r4044  
    6262  bool ordered; //P variable values are ordered 
    6363  bool distributed; //P variable values are distributions 
     64  int defaultMetaId; //P default (proposed, suggested...) meta id for this variable 
    6465 
    6566  PRandomGenerator randomGenerator; //P random generator for random values (initialized when first needed) 
     
    9394  } 
    9495 
    95   static TVariable *getExisting(const string &name, const int &varType, TStringList *fixedOrderValues = NULL, set<string> *values = NULL); 
     96  /* Status codes for getExisting and make. The codes refer to the difference between 
     97     the requested and the existing variable. 
     98     OK                  the new variable contains at least one of the existing values, 
     99                         and no new values; there is no problem with their order 
     100     MissingValues       the new variable contains at least one of the existing values, 
     101                         and some new oness; there is no problem with their order 
     102     NoRecognizedValues  the new variable contains no existing values 
     103     Incompatible        the new variable prescribes an order of values which is 
     104                         incompatible with the existing 
     105     NotFound            the variable with that name and type doesn't exist yet 
     106  */ 
     107  enum { OK, MissingValues, NoRecognizedValues, Incompatible, NotFound }; 
    96108   
     109  /* This will search for an existing variable and return it unless the status (above) 
     110     equals or exceeds the failOn argument, Incompatible or NotFound. 
     111     The status is return if status!=NULL */ 
     112  static TVariable *getExisting(const string &name, const int &varType, TStringList *fixedOrderValues = NULL, set<string> *values = NULL, 
     113                                const int failOn = Incompatible, int *status = NULL); 
     114                                 
     115  /* Gets an existing variable or makes a new one. A new one is made if there is no 
     116     existing variable by that name or its status (above) equals or exceeds createNewOne. 
     117     The returned status equals to the result of the search for an existing variable, 
     118     except if createNewOn==OK, in which case status is always OK.  */ 
     119  static TVariable *make(const string &name, const int &varType, TStringList *fixedOrderValues = NULL, set<string> *value = NULL, 
     120                                const int createNewOn = Incompatible, int *status = NULL); 
     121                                 
    97122  TVariable(const int &avarType = TValue::NONE, const bool &ordered = false); 
    98123  TVariable(const string &aname, const int &avarType=TValue::NONE, const bool &ordered = false); 
     
    144169 
    145170  void addValue(const string &); 
     171  bool hasValue(const string &); 
    146172 
    147173  virtual bool   firstValue(TValue &val) const; 
  • source/pyxtract/pyxtract_macros.hpp

    r2732 r4044  
    3232#define PYCLASSCONSTANT_INT(classname, constname, intconst) 
    3333#define PYCLASSCONSTANT_FLOAT(classname, constname, intconst) 
     34#define PYCLASSCONSTANT(classname, constname, oconst) 
    3435 
    3536#define PYPROPERTIES(x) 
Note: See TracChangeset for help on using the changeset viewer.