source: orange/orange/orngCI.py @ 6538:a5f65d7f0b2c

Revision 6538:a5f65d7f0b2c, 19.6 KB checked in by Mitar <Mitar@…>, 4 years ago (diff)

Made XPM version of the icon 32x32.

Line 
1import random
2import orange, orngMisc, orngLookup
3
4
5def __learnConstructor(cls, examples, bound, weightID, argkw):
6  fm = apply(cls, (), argkw)
7  if examples:
8    if not bound:
9      raise TypeError, "bound set not given"
10    fm = fm(examples, bound, weightID)
11  else:
12    if bound:
13      raise TypeError, "invalid example set"
14  return fm
15
16 
17######################################################
18# Minimal complexity decomposition
19
20
21def FeatureByMinComplexity(examples=None, bound=None, weightID=0, **argkw):
22  return __learnConstructor(FeatureByMinComplexityClass, examples, bound, weightID, argkw)
23
24class FeatureByMinComplexityClass:
25  NoCompletion = orange.FeatureByMinComplexity.NoCompletion
26  CompletionByDefault = orange.FeatureByMinComplexity.CompletionByDefault
27  CompletionByBayes = orange.FeatureByMinComplexity.CompletionByBayes
28
29  def __init__(self, **keyw):
30    """(colorIG=, complete=)"""
31    self.__dict__.update(keyw)
32    self.instance = None
33
34  def __setattr__(self, name, value):
35    if name in ["colorIG", "complete"]:
36        self.instance = None
37    self.__dict__[name] = value
38
39  def createInstance(self):
40    self.instance = orange.FeatureByMinComplexity()
41    if hasattr(self, "colorIG"):
42      self.instance.colorIG = self.colorIG
43    if hasattr(self, "complete"):
44      self.instance.complete = self.complete
45    return self.instance
46
47
48  def __call__(self, table, bound, weightID=0):
49    if not self.instance:
50      self.createInstance()
51    return self.instance(table, bound, "", weightID)
52
53
54######################################################
55# Minimal error decomposition (and other IM-based methods)
56
57def FeatureByIM(examples=None, bound=None, weightID=0, **argkw):
58  return __learnConstructor(FeatureByIMClass, examples, bound, weightID, argkw)
59
60
61class FeatureByIMClass:
62  NoCompletion = orange.FeatureByIM.NoCompletion
63  CompletionByDefault = orange.FeatureByIM.CompletionByDefault
64  CompletionByBayes = orange.FeatureByIM.CompletionByBayes
65
66  def __init__(self, **keyw):
67    self.__dict__.update(keyw)
68    self.instance = None
69
70  def __setattr__(self, name, value):
71    if name in ["IMconstructor", "completion", "clustersFromIM", "stopCriterion", "columnAssessor", "measure", "m"]:
72        self.instance = None
73    self.__dict__[name] = value
74
75  def createInstance(self):   
76    self.instance = fim = orange.FeatureByIM()
77       
78    fim.IMconstructor = getattr(self, "IMconstructor", orange.IMBySorting())
79    cfi = fim.clustersFromIM = getattr(self, "clustersFromIM", orange.ClustersFromIMByAssessor())
80
81    if hasattr(self, "columnAssessor"):
82      cfi.columnAssessor = self.columnAssessor
83    elif not hasattr(self, "clustersFromIM"):
84      if (hasattr(self, "measure")):
85        cfi.columnAssessor = getattr(self, "columnAssessor", orange.ColumnAssessor_Measure())
86      else:
87        cfi.columnAssessor = getattr(self, "columnAssessor", orange.ColumnAssessor_m())
88
89    if hasattr(self, "measure"):
90      if not hasattr(cfi.columnAssessor, "measure"):
91        raise AttributeError, "invalid combination of columnAssessor arguments (cannot set 'measure')"
92      cfi.columnAssessor.measure = self.measure
93    elif hasattr(self, "m"):
94      if not hasattr(cfi.columnAssessor, "m"):
95        raise AttributeError, "invalid combination of columnAssessor arguments (cannot set 'm')"
96      cfi.columnAssessor.m = self.m
97
98    if hasattr(self, "stopCriterion"):
99      cfi.stopCriterion = self.stopCriterion
100    elif not hasattr(self, "clustersFromIM"):
101      if hasattr(self, "stopCriterion"):
102        cfi.stopCriterion = self.stopCriterion
103      else:
104        if hasattr(self, "n"):
105          cfi.stopCriterion = orange.StopIMClusteringByAssessor_n()
106        # the second term means "if it's clustersFromIM has columnAssessor attribute and it is (derived from) orange.ColumnAssessor_Kramer
107        elif getattr(self, "binary", 0) or isinstance(getattr(cfi, "columnAssessor", None), orange.ColumnAssessor_Kramer):
108          cfi.stopCriterion = orange.StopIMClusteringByAssessor_binary()
109        else:
110          cfi.stopCriterion = getattr(self, "stopCriterion", orange.StopIMClusteringByAssessor_noProfit())
111         
112    if hasattr(self, "minProfitProportion"):
113      if not hasattr(cfi.stopCriterion, "minProfitProportion"):
114        raise AttributeError, "invalid combination of stopping criteria (cannot set 'minProfitProportion')"
115      cfi.stopCriterion.minProfitProportion = self.minProfitProportion
116    elif hasattr(self, "n"):
117      if not hasattr(cfi.stopCriterion, "n"):
118        raise AttributeError, "invalid combination of stopping criteria (cannot set 'n')"
119      cfi.stopCriterion.n = self.n
120
121    if hasattr(self, "completion"):
122      fim.completion = self.completion
123       
124    return fim
125
126 
127  def __call__(self, table, bound, weightID=0):
128    if not self.instance:
129      self.createInstance()
130    return self.instance(table, bound, "", weightID)
131
132
133FeatureByMinError = FeatureByIM
134
135######################################################
136# Kramer's algorithm (and similar distribution-based methods)
137
138
139def FeatureByKramer(examples=None, bound=None, weightID=0, **argkw):
140  return __learnConstructor(FeatureByKramerClass, examples, bound, weightID, argkw)
141
142
143class FeatureByKramerClass:
144  NoCompletion = orange.FeatureByIM.NoCompletion
145  CompletionByDefault = orange.FeatureByIM.CompletionByDefault
146  CompletionByBayes = orange.FeatureByIM.CompletionByBayes
147
148  def __init__(self, **keyw):
149    self.__dict__.update(keyw)
150    self.instance = None
151
152  def __setattr__(self, name, value):
153    if name in ["clustersFromDistributions", "stopCriterion", "distributionAssessor", "measure", "m", "minProfitProportion"]:
154        self.instance = None
155    self.__dict__[name] = value
156
157  def createInstance(self):   
158    self.instance = fim = orange.FeatureByDistributions()
159       
160    cfd = fim.clustersFromDistributions = getattr(self, "clustersFromDistributions", orange.ClustersFromDistributionsByAssessor())
161
162    if hasattr(self, "distributionAssessor"):
163      cfd.distributionAssessor= self.distributionAssessor
164    elif not hasattr(self, "classifierFromDistributions") and not hasattr(self, "clustersFromDistributions"):
165      if (hasattr(self, "measure")):
166        cfd.distributionAssessor = getattr(self, "distributionAssessor", orange.DistributionAssessor_Measure())
167      elif (hasattr(self, "m")):
168        cfd.distributionAssessor = getattr(self, "distributionAssessor", orange.DistributionAssessor_m())
169      else:
170        cfd.distributionAssessor = getattr(self, "distributionAssessor", orange.DistributionAssessor_Kramer())
171
172    if hasattr(self, "measure"):
173      if not hasattr(fim.clustersFromDistributions.distributionAssessor, "measure"):
174        raise AttributeError, "invalid combination of distributionAssessor arguments (cannot set 'measure')"
175      cfd.distributionAssessor.measure = self.measure
176    elif hasattr(self, "m"):
177      if not hasattr(fim.clustersFromDistributions.distributionAssessor, "m"):
178        raise AttributeError, "invalid combination of distributionAssessor arguments (cannot set 'm')"
179      cfd.distributionAssessor.m = self.m
180
181    if hasattr(self, "stopCriterion"):
182      cfd.stopCriterion=self.stopCriterion
183    elif getattr(self, "binary", 0):
184      if hasattr(self, "n") or hasattr(self, "minProfitProportion"):
185        raise AttributeError, "invalid combination of stopping criteria"
186      else:
187        cfd.stopCriterion = orange.StopDistributionClustering_binary()
188    elif hasattr(self, "n"):
189      if hasattr(self, "minProfitProportion"):
190        raise AttributeError, "invalid combination of stopping criteria"
191      else:
192        cfd.stopCriterion = orange.StopDistributionClustering_n()
193    elif hasattr(self, "minProfitProportion") or not isinstance(getattr(cfd, "distributionAssessor", None), orange.DistributionAssessor_Kramer):
194      cfd.stopCriterion = orange.StopDistributionClustering_noProfit()
195    else:
196      cfd.stopCriterion = orange.StopDistributionClustering_binary()
197       
198    if hasattr(self, "minProfitProportion"):
199      if not hasattr(fim.clustersFromDistributions.stopCriterion, "minProfitProportion"):
200        raise AttributeError, "invalid combination of stopping criteria (cannot set 'minProfitProportion')"
201      cfd.stopCriterion.minProfitProportion = self.minProfitProportion
202    elif hasattr(self, "n"):
203      if not hasattr(fim.clustersFromDistributions.stopCriterion, "n"):
204        raise AttributeError, "invalid combination of stopping criteria (cannot set 'n')"
205      cfd.stopCriterion.n = self.n
206
207    return fim
208
209
210  def __call__(self, table, bound, weightID=0):
211    if not self.instance:
212            self.createInstance()
213    return self.instance(table, bound, "", weightID)
214
215
216######################################################
217# Constructive induction by random merge
218
219
220def FeatureByRandom(examples=None, bound=None, weightID=0, **argkw):
221  return __learnConstructor(FeatureByRandomClass, examples, bound, weightID, argkw)
222
223
224class FeatureByRandomClass:
225  def __init__(self, **keyw):
226    self.__dict__.update(keyw)
227    if not hasattr(self, "n"):
228      self.n = 2
229    self.instance = self
230
231  def createInstance(self):
232    return self
233 
234  def __call__(self, table, bound, weight=0):
235    bound = [table.domain[a] for a in bound]
236    newattr = orange.EnumVariable(reduce(lambda x,y:x+"-"+y, [a.name for a in bound]), values = ["r%i" % i for i in range(self.n)])
237    if not len(bound):
238      raise AttributeError, "no bound attributes"
239
240    newattr.getValueFrom = orngLookup.lookupFromBound(newattr, [table.domain[x] for x in bound])
241    lookupTable = newattr.getValueFrom.lookupTable = [random.randint(0, self.n-1) for i in newattr.getValueFrom.lookupTable]
242
243    return newattr, random.randint(0, 100)
244
245
246######################################################
247# Constructive induction by random merge
248
249def FeatureByCartesianProduct(examples=None, bound=None, weightID=0, **argkw):
250  return __learnConstructor(FeatureByCartesianProductClass, examples, bound, weightID, argkw)
251
252class FeatureByCartesianProductClass:
253  def __init__(self, **keyw):
254    self.__dict__.update(keyw)
255    if not hasattr(self, "measure"):
256      self.measure = None
257    self.instance = self
258
259  def createInstance(self):
260    return self
261
262  def __call__(self, table, bound, weightID=0):
263    if not len(bound):
264      raise AttributeError, "no bound attributes"
265
266    bound = [table.domain[a] for a in bound]
267    newVar = orange.EnumVariable("-".join([a.name for a in bound]))
268
269    if (len(bound)==1):
270      newVar.values = list(bound[0].values)
271      clsfr = orange.ClassifierByLookupTable(newVar, bound[0])
272    else:
273      import orngMisc
274      for vs in orngMisc.LimitedCounter([len(a.values) for a in bound]):
275        newVar.values.append("-".join([bound[i].values[v] for i, v in enumerate(vs)]))
276      clsfr = orange.ClassifierByLookupTable(newVar, bound)
277     
278##    elif (len(bound)==2):
279##      for v1 in bound[0].values:
280##        for v2 in bound[1].values:
281##          newVar.values.append(v1+"-"+v2)
282##      clsfr = orange.ClassifierByLookupTable2(newVar, bound[0], bound[1])
283##    elif (len(bound)==3):
284##      for v1 in bound[0].values:
285##        for v2 in bound[1].values:
286##          for v3 in bound[2].values:
287##            newVar.values.append(v1+"-"+v2+"-"+v3)
288##      clsfr = orange.ClassifierByLookupTable3(newVar, bound[0], bound[1], bound[2])
289##    else:
290##      raise AttributeError, "cannot deal with more than 3 bound attributes"
291
292    for i in range(len(newVar.values)):
293      clsfr.lookupTable[i] = orange.Value(newVar, i)
294
295    newVar.getValueFrom = clsfr
296
297    if self.measure:
298      meas = self.measure(newVar, table)
299    else:
300      meas = 0
301    return newVar, meas
302
303  def getLookupTableIndex(self, valArray):
304    if len(valArray) == 1:
305      return valArray[0]
306    elif len(valArray) == 2:
307      return clsfr.noOfValues1 * valArray[0] + valArray[1]
308    elif len(valArray) == 3:
309      return ((clsfr.noOfValues1 * valArray[0]) + valArray[1]) * clsfr.noOfValues2 + valArray[2]
310    elif len(valArray) > 3:
311      tmp = 0
312      for i in range(len(clsfr.noOfValues)-1):
313        tmp = (tmp + valArray[i]) * clsfr.noOfValues[i]
314      return tmp + valArray[-1]
315
316######################################################
317# Feature construction for removal of redundant values
318
319class AttributeRedundanciesRemover:
320  def __init__(self, **keyw):
321    self.__dict__.update(keyw)
322
323  def __call__(self, data, weight):
324    if hasattr(self, "inducer"):
325      inducer = self.inducer
326    else:
327      if hasattr(self, "m"):
328        inducer = FeatureByMinError()
329      else:
330        inducer = FeatureByMinComplexity()
331
332    if hasattr(self, "m"):
333      if not hasattr(inducer, m):
334        raise TypeError, "invalid combination of arguments ('m' is given, but 'inducer' does not need it)"
335      inducer.m = self.m
336
337    import orngEvalAttr
338    measure = getattr(self, "measure", orange.MeasureAttribute_relief(m=5, k=10))
339    ordered = orngEvalAttr.OrderAttributesByMeasure(measure)(data, weight)
340
341    for attr in ordered:
342      newattr = inducer(data, [attr], weight)[0]
343      if len(newattr.values) < len(attr.values):
344        newset = filter(lambda x: x!=attr, data.domain.attributes)
345        if len(newattr.values)>1:
346          newset.append(newattr)
347          newattr.name = attr.name + "'"
348        data = data.select(newset + [data.domain.classVar])
349
350    return data
351
352######################################################
353# Feature generators and structure inducers
354
355def FeatureGenerator(examples = None, weightID = 0, **argkw):
356  fm = apply(FeatureGeneratorClass, (), argkw)
357  if examples:
358      fm = fm(examples, weightID)
359  return fm
360
361class FeatureGeneratorClass:
362  def __init__(self, **keyw):
363    self.__dict__.update(keyw)
364
365  def __call__(self, data, weightID=0):
366    if not hasattr(self, "featureInducer"):
367      raise AttributeError, "'featureInducer' not set"
368   
369    ssgen = getattr(self, "subsetsGenerator")
370    if not ssgen:
371      ssgen = orange.SubsetsGenerator_constSize(2)
372    if not ssgen.reset(data.domain.attributes):
373      return []
374
375    return [self.featureInducer(data, bound, weightID) for bound in ssgen]
376
377
378def StructureInducer(examples=None, weightID=0, **argkw):
379  fm = apply(StructureInducerClass, (), argkw)
380  if examples:
381      fm = fm(examples, weightID)
382  return fm
383
384 
385class StructureInducerClass:
386  def __init__(self, **keyw):
387    self.__dict__.update(keyw)
388
389    for i in ["redundancyRemover", "alternativeMeasure", "learnerForUnknown", "subsetsGenerator"]:
390      if not hasattr(self, i):
391        setattr(self, i, None)
392
393  def __call__(self, data, weight=0):
394    import orngLookup
395   
396    if self.alternativeMeasure:
397      raise SystemError, "alternativeMeasure not implemented yet"
398
399    keepDuplicates = getattr(self, "keepDuplicates", 0)
400
401    data = orange.ExampleTable(data)
402    if not weight:
403      # This is here for backward compatibility
404      if hasattr(self, "weight"):
405        weight = self.weight
406      else:
407        weight = orange.newmetaid()
408        data.addMetaAttribute(weight)
409
410    if self.redundancyRemover:
411      data = self.redundancyRemover(data, weight)
412    if not keepDuplicates:
413      data.removeDuplicates(weight)
414
415    induced = 0
416    featureGenerator = FeatureGenerator(featureInducer=self.featureInducer, subsetsGenerator = self.subsetsGenerator)
417   
418    while(1):
419      newFeatures = featureGenerator(data, weight)
420      if not newFeatures or not len(newFeatures):
421        break
422
423      best = orngMisc.selectBest(newFeatures, orngMisc.compare2_lastBigger)[0]
424      if len(best.getValueFrom.boundset()) == len(data.domain.attributes):
425        break
426     
427      induced += 1
428      best.name = "c%d" % induced
429     
430      data = replaceWithInduced(best, data)
431      if not keepDuplicates:
432        data.removeDuplicates(weight)
433
434    if self.learnerForUnknown:
435      learnerForUnknown = self.learnerForUnknown
436    else:
437      learnerForUnknown = orange.BayesLearner()
438
439    return orngLookup.lookupFromExamples(data, weight, learnerForUnknown)
440
441
442######################################################
443# HINT: both, the original algorithms for inducing a structure
444
445def HINT(examples=None, weightID=0, **argkw):
446  fm = apply(HINTClass, (), argkw)
447  if examples:
448      fm = fm(examples, weightID)
449  return fm
450
451class HINTClass:
452  def __init__(self, **keyw):
453    self.__dict__.update(keyw)
454
455  def __call__(self, data, weight=0):
456    import orngWrap
457   
458    type=getattr(self, "type", "auto")
459
460    if hasattr(self, "boundsize"):
461      if type(self)==int:
462        subgen=orange.SubsetsGenerator_constSize(B = self.boundsize)
463      else:
464        subgen=orange.SubsetsGenerator_minMaxSize(min = self.boundsize[0], max = self.boundsize[1])
465    else:
466        subgen=orange.SubsetsGenerator_constSize(B = 2)
467       
468
469    if type=="auto":
470      im=orange.IMBySorting(data, [])
471      if im.fuzzy():
472        type="error"
473      else:
474        type="complexity"
475
476    inducer=StructureInducer(removeDuplicates = 1,
477                             redundancyRemover = AttributeRedundanciesRemover(),
478                             learnerForUnknown = orange.MajorityLearner()
479                           )
480
481    if type=="complexity":
482      inducer.featureInducer = FeatureByMinComplexity()
483      return inducer(data, weight)
484
485    elif type=="error":
486      ms=getattr(self, "m", orange.frange(0.1)+orange.frange(1.2, 3.0, 0.2)+orange.frange(4.0, 10.0, 1.0))
487   
488      inducer.redundancyRemover.inducer=inducer.featureInducer = FeatureByMinError()
489
490      # it's the same object for redundancy remover and the real inducer, so we can tune just one
491      return orngWrap.Tune1Parameter(
492          parameter = "featureInducer.m",
493          values = ms,
494          object = inducer,
495          returnWhat = orngWrap.Tune1Parameter.returnClassifier
496      )(data, weight)
497     
498      print inducer.featureInducer.m, inducer.redundancyRemover.inducer.m
499      return inducer(data, weight)
500
501
502
503def replaceWithInduced(attr, table):
504  return table.select(filter(lambda x, b=list(attr.getValueFrom.boundset()): not b.count(x), table.domain.attributes)+[attr, table.domain.classVar])
505
506
507def addAnAttribute(attr, table):
508  return table.select(table.domain.attributes+[attr, table.domain.classVar])
509
510
511###########################
512
513def printHierarchy(cblt):
514    if isinstance(cblt, orange.Variable):
515      printHierarchy1(0, cblt)
516    else:
517      # cblt does not necessarily equal cblt.classVar.getValueFrom,
518      # hence a special case
519      me = cblt.classVar
520      print "%s/%i %s" % (me.name, len(me.values), me.values)
521      try:
522        bound = cblt.boundset()
523        for i in bound:
524          printHierarchy1(1, i)
525      except:
526        pass
527
528def printHierarchy1(dep, me):
529    print '  '*dep*3 + ("%s/%i %s" % (me.name, len(me.values), me.values))
530    try:
531        bound=me.getValueFrom.boundset()
532    except:
533        return
534    for i in bound:
535        printHierarchy1(dep+1, i)
536
537
538def dotHierarchy(file, cblt):
539    fopened=0
540    if type(file)==str:
541        file=open(file, "wt")
542        fopened=1
543
544    file.write('digraph G {\n')
545
546    if isinstance(cblt, orange.Variable):
547      dotHierarchy1(file, "a", cblt)
548    else:
549      # cblt does not necessarily equal cblt.classVar.getValueFrom,
550      # hence a special case
551      myname = "a"
552      me = cblt.classVar
553      bound = cblt.boundset()
554      file.write('  '*len(myname) + ' %s [label="%s/%d", shape=plaintext]\n' % (myname, me.name, len(me.values)))
555      for i in range(len(bound)):
556          subname="%s%i" % (myname, i)
557          file.write('   %s -> %s \n' % (myname, subname))
558          dotHierarchy1(file, subname, bound[i])
559
560    file.write("}\n")
561   
562    if fopened:
563        file.close()
564
565def dotHierarchy1(file, myname, me):
566    file.write('  '*len(myname) + ' %s [label="%s/%d", shape=plaintext]\n' % (myname, me.name, len(me.values)))
567    try:
568        bound=me.getValueFrom.boundset()
569    except:
570        return
571    for i in range(len(bound)):
572        subname="%s%i" % (myname, i)
573        file.write('  '*len(myname) + (' %s -> %s \n' % (myname, subname)))
574        dotHierarchy1(file, subname, bound[i])
Note: See TracBrowser for help on using the repository browser.