Changeset 3671:2695d18bf2e0 in orange
 Timestamp:
 05/18/07 18:24:20 (7 years ago)
 Branch:
 default
 Convert:
 8b2dbdaef01d6b521ecebdef5bcf899f45c5636c
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/orngEvalAttr.py
r1 r3671 1 1 ### Janez 030214: Added weights 2 2 ### Inform Blaz and remove this comment 3 import orange 4 5 class OrderAttributesByMeasure: 6 def __init__(self, measure=None): 7 self.measure=measure 8 9 def __call__(self, data, weight): 10 if self.measure: 11 measure=self.measure 12 else: 13 measure=orange.MeasureAttribute_relief(m=5,k=10) 14 15 measured=[(attr, measure(attr, data, None, weight)) for attr in data.domain.attributes] 16 measured.sort(lambda x, y: cmp(x[1], y[1])) 17 return [x[0] for x in measured] 18 19 def MeasureAttribute_Distance(attr = None, data = None): 20 m = MeasureAttribute_DistanceClass() 21 if attr != None and data != None: 22 return m(attr, data) 23 else: 24 return m 3 25 4 26 5 class OrderAttributesByMeasure: 6 def __init__(self, measure=None): 7 self.measure=measure 27 # measure 1D as described in Strojno ucenje (Kononenko) 28 class MeasureAttribute_DistanceClass(orange.MeasureAttribute): 29 def __call__(self, attr, data, aprioriDist = None, weightID = None): 30 import numpy 31 from orngContingency import Entropy 32 if attr in data.domain: # if we receive attr as string we have to convert to variable 33 attr = data.domain[attr] 34 attrClassCont = orange.ContingencyAttrClass(attr, data) 35 dist = [] 36 for vals in attrClassCont.values(): 37 dist += list(vals) 38 classAttrEntropy = Entropy(numpy.array(dist)) 39 infoGain = orange.MeasureAttribute_info(attr, data) 40 if classAttrEntropy > 0: 41 return float(infoGain) / classAttrEntropy 42 else: 43 return 0 8 44 9 def __call__(self, data, weight): 10 if self.measure: 11 measure=self.measure 45 # attribute quality measure based on the minimum description length principle 46 def MeasureAttribute_MDL(attr = None, data = None): 47 m = MeasureAttribute_MDLClass() 48 if attr != None and data != None: 49 return m(attr, data) 12 50 else: 13 measure=orange.MeasureAttribute_relief(m=5,k=10) 14 15 measured=[(attr, measure(attr, data, None, weight)) for attr in data.domain.attributes] 16 measured.sort(lambda x, y: cmp(x[1], y[1])) 17 return [x[0] for x in measured] 51 return m 52 53 class MeasureAttribute_MDLClass(orange.MeasureAttribute): 54 def __call__(self, attr, data, aprioriDist = None, weightID = None): 55 attrClassCont = orange.ContingencyAttrClass(attr, data) 56 classDist = orange.Distribution(data.domain.classVar, data).values() 57 nCls = len(classDist) 58 nEx = len(data) 59 priorMDL = logMultipleCombs(nEx, classDist) + logMultipleCombs(nEx+nCls1, [nEx, nCls1]) 60 postPart1 = [logMultipleCombs(sum(attrClassCont[key]), attrClassCont[key].values()) for key in attrClassCont.keys()] 61 postPart2 = [logMultipleCombs(sum(attrClassCont[key])+nCls1, [sum(attrClassCont[key]), nCls1]) for key in attrClassCont.keys()] 62 ret = priorMDL 63 for val in postPart1 + postPart2: 64 ret = val 65 return ret / max(1, nEx) 18 66 19 67 68 # compute n! / k1! * k2! * k3! * ... kc! 69 # ks = [k1, k2, ...] 70 def logMultipleCombs(n, ks): 71 import math 72 m = max(ks) 73 ks.remove(m) 74 resArray = [] 75 for (start, end) in [(m+1, n+1)] + [(1, k+1) for k in ks]: 76 ret = 0 77 curr = 1 78 for val in range(int(start), int(end)): 79 curr *= val 80 if curr > 1e40: 81 ret += math.log(curr) 82 curr = 1 83 ret += math.log(curr) 84 resArray.append(ret) 85 ret = resArray[0] 86 for val in resArray[1:]: 87 ret = val 88 return ret 89 90 91 92 ##if __name__=="__main__": 93 ## data = orange.ExampleTable(r"E:\Development\Orange Datasets\UCI\zoo.tab") 94 ## #newFeature, quality = FeatureByCartesianProduct(data, ["sex", "age"]) 95 ## #MeasureAttribute_Distance()(newFeature, data) 96 ## #print logMultipleCombs(200, [70,30,100]) 97 ## #import orngCI 98 ## #newFeature, quality = orngCI.FeatureByIM(data, ["milk", "airborne"], binary = 0, measure = MeasureAttribute_MDL()) 99
Note: See TracChangeset
for help on using the changeset viewer.