Changeset 10559:82ca9a90b38e in orange for Orange/data/io.py


Ignore:
Timestamp:
03/16/12 16:05:22 (2 years ago)
Author:
Ales Erjavec <ales.erjavec@…>
Branch:
default
rebase_source:
46c934e23680f729a691ba08dd998e400f98f433
Message:

More conservative variable type guessing.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • Orange/data/io.py

    r10550 r10559  
    574574    if n is None: 
    575575        n = len(values) or 1 
    576     return (float(cont) / n) > cutoff 
     576    return (float(cont) / n) >= cutoff 
    577577 
    578578 
     
    580580    """ Is variable with ``values`` in column (``n`` rows) a discrete variable.  
    581581    """ 
    582     return not is_variable_cont(values, n) 
    583  
    584 def is_variable_string(values, n=None, cutuff=0.1): 
     582    return not is_variable_cont(values, n, cutoff=1.0 - cutoff) 
     583 
     584def is_variable_string(values, n=None, cutuff=0.75): 
    585585    """ Is variable with ``values`` in column (``n`` rows) a string variable.  
    586586    """ 
    587     return False 
     587    if n is None: 
     588        n = len(values) 
     589    return float(len(set(values))) / (n or 1.0) > cutoff 
    588590 
    589591def load_csv(file, create_new_on=MakeStatus.Incompatible,  
     
    717719            variables[ind] = make(var_def.name, Orange.feature.Type.Discrete, [], values, create_new_on) 
    718720        elif isinstance(var_def, _var_placeholder): 
    719             if is_variable_cont(values): 
     721            if is_variable_cont(values, cutoff=1.0): 
    720722                variables[ind] = make(var_def.name, Orange.feature.Type.Continuous, [], [], create_new_on) 
    721             elif is_variable_discrete(values): 
     723            elif is_variable_discrete(values, cutoff=0.0): 
    722724                variables[ind] = make(var_def.name, Orange.feature.Type.Discrete, [], values, create_new_on) 
    723725            elif is_variable_string(values): 
    724726                variables[ind] = make(var_def.name, Orange.feature.Type.String, [], [], create_new_on) 
    725727            else: 
    726                 raise ValueError("Strange column in the data") 
     728                # Treat it as a string anyway 
     729                variables[ind] = make(var_def.name, Orange.feature.Type.String, [], [], create_new_on) 
    727730 
    728731    attribute_load_status = [] 
Note: See TracChangeset for help on using the changeset viewer.