Changeset 7905:b4e10affc40f in orange


Ignore:
Timestamp:
05/16/11 12:31:11 (3 years ago)
Author:
matija <matija.polajnar@…>
Branch:
default
Convert:
ef88c0c0e1b15a8f41732de3d9bde881e8601ea7
Message:

orngLinProj refactoring, closes ticket #750.

Location:
orange
Files:
2 added
5 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/__init__.py

    r7834 r7905  
    4747 
    4848_import("projection") 
     49_import("projection.linear") 
    4950_import("projection.mds") 
    5051_import("projection.som") 
  • orange/doc/Orange/rst/Orange.projection.rst

    r7765 r7905  
    66   :maxdepth: 1 
    77 
     8   Orange.projection.linear 
    89   Orange.projection.mds 
    910   orange.projection.som 
  • orange/fixes/fix_changed_names.py

    r7895 r7905  
    457457           "orange.MakeRandomIndices2": "Orange.data.sample.SubsetIndices2", 
    458458 
     459           "orngLinProj.FAST_IMPLEMENTATION": "Orange.projection.linear.FAST_IMPLEMENTATION", 
     460           "orngLinProj.SLOW_IMPLEMENTATION": "Orange.projection.linear.SLOW_IMPLEMENTATION", 
     461           "orngLinProj.LDA_IMPLEMENTATION": "Orange.projection.linear.LDA_IMPLEMENTATION", 
     462           "orngLinProj.LAW_LINEAR": "Orange.projection.linear.LAW_LINEAR", 
     463           "orngLinProj.LAW_SQUARE": "Orange.projection.linear.LAW_SQUARE", 
     464           "orngLinProj.LAW_GAUSSIAN": "Orange.projection.linear.LAW_GAUSSIAN", 
     465           "orngLinProj.LAW_KNN": "Orange.projection.linear.LAW_KNN", 
     466           "orngLinProj.LAW_LINEAR_PLUS": "Orange.projection.linear.LAW_LINEAR_PLUS", 
     467           "orngLinProj.DR_PCA": "Orange.projection.linear.DR_PCA", 
     468           "orngLinProj.DR_SPCA": "Orange.projection.linear.DR_SPCA", 
     469           "orngLinProj.DR_PLS": "Orange.projection.linear.DR_PLS", 
     470           "orngLinProj.normalize": "Orange.projection.linear.normalize", 
     471           "orngLinProj.center": "Orange.projection.linear.center", 
     472           "orngLinProj.FreeViz": "Orange.projection.linear.FreeViz", 
     473           "orngLinProj.createPLSProjection": "Orange.projection.linear.create_pls_projection", 
     474           "orngLinProj.createPCAProjection": "Orange.projection.linear.create_pca_projection", 
     475           "orngLinProj.FreeVizClassifier": "Orange.projection.linear.FreeVizClassifier", 
     476           "orngLinProj.FreeVizLearner": "Orange.projection.linear.FreeVizLearner", 
     477           "orngLinProj.S2NHeuristicLearner": "Orange.projection.linear.S2NHeuristicLearner", 
    459478           } 
    460479 
  • orange/fixes/fix_orange_imports.py

    r7761 r7905  
    5050           "orngClustering": "Orange.clustering", 
    5151           "orngLookup": "Orange.classification.lookup", 
     52           "orngLinProj": "Orange.projection.linear", 
    5253           } 
    5354 
  • orange/orngLinProj.py

    r6538 r7905  
    1 import orangeom, orange 
    2 import math, random, numpy 
    3 from numpy.linalg import inv, pinv, eig      # matrix inverse and eigenvectors 
    4 from orngScaleLinProjData import orngScaleLinProjData 
    5 import orngVisFuncts 
    6 try: 
    7     import numpy.ma as MA 
    8 except: 
    9     import numpy.core.ma as MA 
    10  
    11 #implementation 
    12 FAST_IMPLEMENTATION = 0 
    13 SLOW_IMPLEMENTATION = 1 
    14 LDA_IMPLEMENTATION = 2 
    15  
    16 LAW_LINEAR = 0 
    17 LAW_SQUARE = 1 
    18 LAW_GAUSSIAN = 2 
    19 LAW_KNN = 3 
    20 LAW_LINEAR_PLUS = 4 
    21  
    22 DR_PCA = 0 
    23 DR_SPCA = 1 
    24 DR_PLS = 2 
    25  
    26 def normalize(x): 
    27     return x / numpy.linalg.norm(x) 
    28  
    29 def center(matrix): 
    30     '''centers all variables, i.e. subtracts averages in colomns 
    31     and divides them by their standard deviations''' 
    32     n,m = numpy.shape(matrix) 
    33     return (matrix - numpy.multiply(matrix.mean(axis = 0), numpy.ones((n,m))))/numpy.std(matrix, axis = 0) 
    34  
    35  
    36 class FreeViz: 
    37     def __init__(self, graph = None): 
    38         if not graph: 
    39             graph = orngScaleLinProjData() 
    40         self.graph = graph 
    41  
    42         self.implementation = 0 
    43         self.attractG = 1.0 
    44         self.repelG = 1.0 
    45         self.law = LAW_LINEAR 
    46         self.restrain = 0 
    47         self.forceBalancing = 0 
    48         self.forceSigma = 1.0 
    49         self.mirrorSymmetry = 1 
    50         self.useGeneralizedEigenvectors = 1 
    51  
    52         # s2n heuristics parameters 
    53         self.stepsBeforeUpdate = 10 
    54         self.s2nSpread = 5 
    55         self.s2nPlaceAttributes = 50 
    56         self.s2nMixData = None 
    57         self.autoSetParameters = 1 
    58         self.classPermutationList = None 
    59         self.attrsNum = [5, 10, 20, 30, 50, 70, 100, 150, 200, 300, 500, 750, 1000] 
    60         #attrsNum = [5, 10, 20, 30, 50, 70, 100, 150, 200, 300, 500, 750, 1000, 2000, 3000, 5000, 10000, 50000] 
    61  
    62     def clearData(self): 
    63         self.s2nMixData = None 
    64         self.classPermutationList = None 
    65  
    66     def setStatusBarText(self, *args): 
    67         pass 
    68  
    69     def showAllAttributes(self): 
    70         self.graph.anchorData = [(0,0, a.name) for a in self.graph.dataDomain.attributes] 
    71         self.radialAnchors() 
    72  
    73     def getShownAttributeList(self): 
    74         return [anchor[2] for anchor in self.graph.anchorData] 
    75  
    76     def radialAnchors(self): 
    77         attrList = self.getShownAttributeList() 
    78         if not attrList: return 
    79         phi = 2*math.pi/len(attrList) 
    80         self.graph.anchorData = [(math.cos(i*phi), math.sin(i*phi), a) for i, a in enumerate(attrList)] 
    81  
    82  
    83     def randomAnchors(self): 
    84         if not self.graph.haveData: return 
    85         attrList = self.getShownAttributeList() 
    86         if not attrList: return 
    87  
    88         if self.restrain == 0: 
    89             def ranch(i, label): 
    90                 r = 0.3+0.7*random.random() 
    91                 phi = 2*math.pi*random.random() 
    92                 return (r*math.cos(phi), r*math.sin(phi), label) 
    93  
    94         elif self.restrain == 1: 
    95             def ranch(i, label): 
    96                 phi = 2*math.pi*random.random() 
    97                 return (math.cos(phi), math.sin(phi), label) 
    98  
    99         else: 
    100             def ranch(i, label): 
    101                 r = 0.3+0.7*random.random() 
    102                 phi = 2*math.pi * i / max(1, len(attrList)) 
    103                 return (r*math.cos(phi), r*math.sin(phi), label) 
    104  
    105         anchors = [ranch(*a) for a in enumerate(attrList)] 
    106  
    107         if not self.restrain == 1: 
    108             maxdist = math.sqrt(max([x[0]**2+x[1]**2 for x in anchors])) 
    109             anchors = [(x[0]/maxdist, x[1]/maxdist, x[2]) for x in anchors] 
    110  
    111         if not self.restrain == 2 and self.mirrorSymmetry: 
    112             #### Need to rotate and mirror here 
    113             pass 
    114  
    115         self.graph.anchorData = anchors 
    116  
    117     def optimizeSeparation(self, steps = 10, singleStep = False, distances=None): 
    118         # check if we have data and a discrete class 
    119         if not self.graph.haveData or len(self.graph.rawData) == 0 or not (self.graph.dataHasClass or distances): 
    120             return 
    121         ai = self.graph.attributeNameIndex 
    122         attrIndices = [ai[label] for label in self.getShownAttributeList()] 
    123         if not attrIndices: return 
    124  
    125         if self.implementation == FAST_IMPLEMENTATION: 
    126             return self.optimize_FAST_Separation(steps, singleStep, distances) 
    127  
    128         if self.__class__ != FreeViz: from PyQt4.QtGui import qApp 
    129         if singleStep: steps = 1 
    130         if self.implementation == SLOW_IMPLEMENTATION:  impl = self.optimize_SLOW_Separation 
    131         elif self.implementation == LDA_IMPLEMENTATION: impl = self.optimize_LDA_Separation 
    132         XAnchors = None; YAnchors = None 
    133  
    134         for c in range((singleStep and 1) or 50): 
    135             for i in range(steps): 
    136                 if self.__class__ != FreeViz and self.cancelOptimization == 1: return 
    137                 self.graph.anchorData, (XAnchors, YAnchors) = impl(attrIndices, self.graph.anchorData, XAnchors, YAnchors) 
    138             if self.__class__ != FreeViz: qApp.processEvents() 
    139             if hasattr(self.graph, "updateGraph"): self.graph.updateData() 
    140             #self.recomputeEnergy() 
    141  
    142     def optimize_FAST_Separation(self, steps = 10, singleStep = False, distances=None): 
    143         optimizer = [orangeom.optimizeAnchors, orangeom.optimizeAnchorsRadial, orangeom.optimizeAnchorsR][self.restrain] 
    144         ai = self.graph.attributeNameIndex 
    145         attrIndices = [ai[label] for label in self.getShownAttributeList()] 
    146         if not attrIndices: return 
    147  
    148         # repeat until less than 1% energy decrease in 5 consecutive iterations*steps steps 
    149         positions = [numpy.array([x[:2] for x in self.graph.anchorData])] 
    150         neededSteps = 0 
    151  
    152         validData = self.graph.getValidList(attrIndices) 
    153         nValid = sum(validData)  
    154         if not nValid: 
    155             return 0 
    156  
    157         data = numpy.compress(validData, self.graph.noJitteringScaledData, axis=1) 
    158         data = numpy.transpose(data).tolist() 
    159         if self.__class__ != FreeViz: from PyQt4.QtGui import qApp 
    160  
    161         if distances: 
    162             if nValid != len(validData): 
    163                 classes = orange.SymMatrix(nValid) 
    164                 r = 0 
    165                 for ro, vr in enumerate(validData): 
    166                     if not vr: 
    167                         continue 
    168                     c = 0 
    169                     for co, vr in enumerate(validData): 
    170                         if vr: 
    171                             classes[r, c] = distances[ro, co] 
    172                             c += 1 
    173                     r += 1   
    174             else: 
    175                 classes = distances 
    176         else: 
    177             classes = numpy.compress(validData, self.graph.originalData[self.graph.dataClassIndex]).tolist() 
    178         while 1: 
    179             self.graph.anchorData = optimizer(data, classes, self.graph.anchorData, attrIndices, 
    180                                               attractG = self.attractG, repelG = self.repelG, law = self.law, 
    181                                               sigma2 = self.forceSigma, dynamicBalancing = self.forceBalancing, steps = steps, 
    182                                               normalizeExamples = self.graph.normalizeExamples, 
    183                                               contClass = 2 if distances else self.graph.dataHasContinuousClass, 
    184                                               mirrorSymmetry = self.mirrorSymmetry) 
    185             neededSteps += steps 
    186  
    187             if self.__class__ != FreeViz: 
    188                 qApp.processEvents() 
    189  
    190             if hasattr(self.graph, "updateData"): 
    191                 self.graph.potentialsBmp = None 
    192                 self.graph.updateData() 
    193  
    194             positions = positions[-49:]+[numpy.array([x[:2] for x in self.graph.anchorData])] 
    195             if len(positions)==50: 
    196                 m = max(numpy.sum((positions[0]-positions[49])**2), 0) 
    197                 if m < 1e-3: break 
    198             if singleStep or (self.__class__ != FreeViz and self.cancelOptimization): 
    199                 break 
    200         return neededSteps 
    201  
    202     def optimize_LDA_Separation(self, attrIndices, anchorData, XAnchors = None, YAnchors = None): 
    203         if not self.graph.haveData or len(self.graph.rawData) == 0 or not self.graph.dataHasDiscreteClass:  
    204             return anchorData, (XAnchors, YAnchors) 
    205         classCount = len(self.graph.dataDomain.classVar.values) 
    206         validData = self.graph.getValidList(attrIndices) 
    207         selectedData = numpy.compress(validData, numpy.take(self.graph.noJitteringScaledData, attrIndices, axis = 0), axis = 1) 
    208  
    209         if XAnchors == None: 
    210             XAnchors = numpy.array([a[0] for a in anchorData], numpy.float) 
    211         if YAnchors == None: 
    212             YAnchors = numpy.array([a[1] for a in anchorData], numpy.float) 
    213  
    214         transProjData = self.graph.createProjectionAsNumericArray(attrIndices, validData = validData, XAnchors = XAnchors, YAnchors = YAnchors, scaleFactor = self.graph.scaleFactor, normalize = self.graph.normalizeExamples, useAnchorData = 1) 
    215         if transProjData == None: 
    216             return anchorData, (XAnchors, YAnchors) 
    217  
    218         projData = numpy.transpose(transProjData) 
    219         x_positions, y_positions, classData = projData[0], projData[1], projData[2] 
    220  
    221         averages = [] 
    222         for i in range(classCount): 
    223             ind = classData == i 
    224             xpos = numpy.compress(ind, x_positions);  ypos = numpy.compress(ind, y_positions) 
    225             xave = numpy.sum(xpos)/len(xpos);         yave = numpy.sum(ypos)/len(ypos) 
    226             averages.append((xave, yave)) 
    227  
    228         # compute the positions of all the points. we will try to move all points so that the center will be in the (0,0) 
    229         xCenterVector = -numpy.sum(x_positions) / len(x_positions) 
    230         yCenterVector = -numpy.sum(y_positions) / len(y_positions) 
    231         centerVectorLength = math.sqrt(xCenterVector*xCenterVector + yCenterVector*yCenterVector) 
    232  
    233         meanDestinationVectors = [] 
    234  
    235         for i in range(classCount): 
    236             xDir = 0.0; yDir = 0.0; rs = 0.0 
    237             for j in range(classCount): 
    238                 if i==j: continue 
    239                 r = math.sqrt((averages[i][0] - averages[j][0])**2 + (averages[i][1] - averages[j][1])**2) 
    240                 if r == 0.0: 
    241                     xDir += math.cos((i/float(classCount))*2*math.pi) 
    242                     yDir += math.sin((i/float(classCount))*2*math.pi) 
    243                     r = 0.0001 
    244                 else: 
    245                     xDir += (1/r**3) * ((averages[i][0] - averages[j][0])) 
    246                     yDir += (1/r**3) * ((averages[i][1] - averages[j][1])) 
    247                 #rs += 1/r 
    248             #actualDirAmpl = math.sqrt(xDir**2 + yDir**2) 
    249             #s = abs(xDir)+abs(yDir) 
    250             #xDir = rs * (xDir/s) 
    251             #yDir = rs * (yDir/s) 
    252             meanDestinationVectors.append((xDir, yDir)) 
    253  
    254  
    255         maxLength = math.sqrt(max([x**2 + y**2 for (x,y) in meanDestinationVectors])) 
    256         meanDestinationVectors = [(x/(2*maxLength), y/(2*maxLength)) for (x,y) in meanDestinationVectors]     # normalize destination vectors to some normal values 
    257         meanDestinationVectors = [(meanDestinationVectors[i][0]+averages[i][0], meanDestinationVectors[i][1]+averages[i][1]) for i in range(len(meanDestinationVectors))]    # add destination vectors to the class averages 
    258         #meanDestinationVectors = [(x + xCenterVector/5, y + yCenterVector/5) for (x,y) in meanDestinationVectors]   # center mean values 
    259         meanDestinationVectors = [(x + xCenterVector, y + yCenterVector) for (x,y) in meanDestinationVectors]   # center mean values 
    260  
    261         FXs = numpy.zeros(len(x_positions), numpy.float)        # forces 
    262         FYs = numpy.zeros(len(x_positions), numpy.float) 
    263  
    264         for c in range(classCount): 
    265             ind = (classData == c) 
    266             numpy.putmask(FXs, ind, meanDestinationVectors[c][0] - x_positions) 
    267             numpy.putmask(FYs, ind, meanDestinationVectors[c][1] - y_positions) 
    268  
    269         # compute gradient for all anchors 
    270         GXs = numpy.array([sum(FXs * selectedData[i]) for i in range(len(anchorData))], numpy.float) 
    271         GYs = numpy.array([sum(FYs * selectedData[i]) for i in range(len(anchorData))], numpy.float) 
    272  
    273         m = max(max(abs(GXs)), max(abs(GYs))) 
    274         GXs /= (20*m); GYs /= (20*m) 
    275  
    276         newXAnchors = XAnchors + GXs 
    277         newYAnchors = YAnchors + GYs 
    278  
    279         # normalize so that the anchor most far away will lie on the circle 
    280         m = math.sqrt(max(newXAnchors**2 + newYAnchors**2)) 
    281         newXAnchors /= m 
    282         newYAnchors /= m 
    283  
    284         #self.parentWidget.updateGraph() 
    285  
    286         """ 
    287         for a in range(len(anchorData)): 
    288             x = anchorData[a][0]; y = anchorData[a][1]; 
    289             self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [x, x+GXs[a]], yData = [y, y+GYs[a]], forceFilledSymbols = 1, lineWidth=3) 
    290  
    291         for i in range(classCount): 
    292             self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [averages[i][0], meanDestinationVectors[i][0]], yData = [averages[i][1], meanDestinationVectors[i][1]], forceFilledSymbols = 1, lineWidth=3) 
    293             self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, xData = [averages[i][0], averages[i][0]], yData = [averages[i][1], averages[i][1]], forceFilledSymbols = 1, lineWidth=5) 
    294         """ 
    295         #self.parentWidget.graph.repaint() 
    296         #self.graph.anchorData = [(newXAnchors[i], newYAnchors[i], anchorData[i][2]) for i in range(len(anchorData))] 
    297         #self.graph.updateData(attrs, 0) 
    298         return [(newXAnchors[i], newYAnchors[i], anchorData[i][2]) for i in range(len(anchorData))], (newXAnchors, newYAnchors) 
    299  
    300  
    301     def optimize_SLOW_Separation(self, attrIndices, anchorData, XAnchors = None, YAnchors = None): 
    302         if not self.graph.haveData or len(self.graph.rawData) == 0 or not self.graph.dataHasDiscreteClass:  
    303             return anchorData, (XAnchors, YAnchors) 
    304         validData = self.graph.getValidList(attrIndices) 
    305         selectedData = numpy.compress(validData, numpy.take(self.graph.noJitteringScaledData, attrIndices, axis = 0), axis = 1) 
    306  
    307         if XAnchors == None: 
    308             XAnchors = numpy.array([a[0] for a in anchorData], numpy.float) 
    309         if YAnchors == None: 
    310             YAnchors = numpy.array([a[1] for a in anchorData], numpy.float) 
    311  
    312         transProjData = self.graph.createProjectionAsNumericArray(attrIndices, validData = validData, XAnchors = XAnchors, YAnchors = YAnchors, scaleFactor = self.graph.scaleFactor, normalize = self.graph.normalizeExamples, useAnchorData = 1) 
    313         if transProjData == None: 
    314             return anchorData, (XAnchors, YAnchors) 
    315  
    316         projData = numpy.transpose(transProjData) 
    317         x_positions = projData[0]; x_positions2 = numpy.array(x_positions) 
    318         y_positions = projData[1]; y_positions2 = numpy.array(y_positions) 
    319         classData = projData[2]  ; classData2 = numpy.array(classData) 
    320  
    321         FXs = numpy.zeros(len(x_positions), numpy.float)        # forces 
    322         FYs = numpy.zeros(len(x_positions), numpy.float) 
    323         GXs = numpy.zeros(len(anchorData), numpy.float)        # gradients 
    324         GYs = numpy.zeros(len(anchorData), numpy.float) 
    325  
    326         rotateArray = range(len(x_positions)); rotateArray = rotateArray[1:] + [0] 
    327         for i in range(len(x_positions)-1): 
    328             x_positions2 = numpy.take(x_positions2, rotateArray) 
    329             y_positions2 = numpy.take(y_positions2, rotateArray) 
    330             classData2 = numpy.take(classData2, rotateArray) 
    331             dx = x_positions2 - x_positions 
    332             dy = y_positions2 - y_positions 
    333             rs2 = dx**2 + dy**2 
    334             rs2 += numpy.where(rs2 == 0.0, 0.0001, 0.0)    # replace zeros to avoid divisions by zero 
    335             rs = numpy.sqrt(rs2) 
    336  
    337             F = numpy.zeros(len(x_positions), numpy.float) 
    338             classDiff = numpy.where(classData == classData2, 1, 0) 
    339             numpy.putmask(F, classDiff, 150*self.attractG*rs2) 
    340             numpy.putmask(F, 1-classDiff, -self.repelG/rs2) 
    341             FXs += F * dx / rs 
    342             FYs += F * dy / rs 
    343  
    344         # compute gradient for all anchors 
    345         GXs = numpy.array([sum(FXs * selectedData[i]) for i in range(len(anchorData))], numpy.float) 
    346         GYs = numpy.array([sum(FYs * selectedData[i]) for i in range(len(anchorData))], numpy.float) 
    347  
    348         m = max(max(abs(GXs)), max(abs(GYs))) 
    349         GXs /= (20*m); GYs /= (20*m) 
    350  
    351         newXAnchors = XAnchors + GXs 
    352         newYAnchors = YAnchors + GYs 
    353  
    354         # normalize so that the anchor most far away will lie on the circle 
    355         m = math.sqrt(max(newXAnchors**2 + newYAnchors**2)) 
    356         newXAnchors /= m 
    357         newYAnchors /= m 
    358         return [(newXAnchors[i], newYAnchors[i], anchorData[i][2]) for i in range(len(anchorData))], (newXAnchors, newYAnchors) 
    359  
    360  
    361     # ############################################################### 
    362     # S2N HEURISTIC FUNCTIONS 
    363     # ############################################################### 
    364  
    365  
    366  
    367     # place a subset of attributes around the circle. this subset must contain "good" attributes for each of the class values 
    368     def s2nMixAnchors(self, setAttributeListInRadviz = 1): 
    369         # check if we have data and a discrete class 
    370         if not self.graph.haveData or len(self.graph.rawData) == 0 or not self.graph.dataHasDiscreteClass:  
    371             self.setStatusBarText("S2N only works on data with a discrete class value") 
    372             return 
    373  
    374         # compute the quality of attributes only once 
    375         if self.s2nMixData == None: 
    376             rankedAttrs, rankedAttrsByClass = orngVisFuncts.findAttributeGroupsForRadviz(self.graph.rawData, orngVisFuncts.S2NMeasureMix()) 
    377             self.s2nMixData = (rankedAttrs, rankedAttrsByClass) 
    378             classCount = len(rankedAttrsByClass) 
    379             attrs = rankedAttrs[:(self.s2nPlaceAttributes/classCount)*classCount]    # select appropriate number of attributes 
    380         else: 
    381             classCount = len(self.s2nMixData[1]) 
    382             attrs = self.s2nMixData[0][:(self.s2nPlaceAttributes/classCount)*classCount] 
    383  
    384         if len(attrs) == 0: 
    385             self.setStatusBarText("No discrete attributes found") 
    386             return 0 
    387  
    388         arr = [0]       # array that will tell where to put the next attribute 
    389         for i in range(1,len(attrs)/2): arr += [i,-i] 
    390  
    391         phi = (2*math.pi*self.s2nSpread)/(len(attrs)*10.0) 
    392         anchorData = []; start = [] 
    393         arr2 = arr[:(len(attrs)/classCount)+1] 
    394         for cls in range(classCount): 
    395             startPos = (2*math.pi*cls)/classCount 
    396             if self.classPermutationList: cls = self.classPermutationList[cls] 
    397             attrsCls = attrs[cls::classCount] 
    398             tempData = [(arr2[i], math.cos(startPos + arr2[i]*phi), math.sin(startPos + arr2[i]*phi), attrsCls[i]) for i in range(min(len(arr2), len(attrsCls)))] 
    399             start.append(len(anchorData) + len(arr2)/2) # starting indices for each class value 
    400             tempData.sort() 
    401             anchorData += [(x, y, name) for (i, x, y, name) in tempData] 
    402  
    403         anchorData = anchorData[(len(attrs)/(2*classCount)):] + anchorData[:(len(attrs)/(2*classCount))] 
    404         self.graph.anchorData = anchorData 
    405         attrNames = [anchor[2] for anchor in anchorData] 
    406  
    407         if self.__class__ != FreeViz: 
    408             if setAttributeListInRadviz: 
    409                 self.parentWidget.setShownAttributeList(attrNames) 
    410             self.graph.updateData(attrNames) 
    411             self.graph.repaint() 
    412         return 1 
    413  
    414     # find interesting linear projection using PCA, SPCA, or PLS 
    415     def findProjection(self, method, attrIndices = None, setAnchors = 0, percentDataUsed = 100): 
    416         if not self.graph.haveData: return 
    417         ai = self.graph.attributeNameIndex 
    418         if attrIndices == None: 
    419             attributes = self.getShownAttributeList() 
    420             attrIndices = [ai[label] for label in attributes] 
    421         if len(attrIndices) == 0: return None 
    422  
    423         validData = self.graph.getValidList(attrIndices) 
    424         if sum(validData) == 0: return None 
    425  
    426         dataMatrix = numpy.compress(validData, numpy.take(self.graph.noJitteringScaledData, attrIndices, axis = 0), axis = 1) 
    427         if self.graph.dataHasClass: 
    428             classArray = numpy.compress(validData, self.graph.noJitteringScaledData[self.graph.dataClassIndex]) 
    429  
    430         if percentDataUsed != 100: 
    431             indices = orange.MakeRandomIndices2(self.graph.rawData, 1.0-(float(percentDataUsed)/100.0)) 
    432             try: 
    433                 dataMatrix = numpy.compress(indices, dataMatrix, axis = 1) 
    434             except: 
    435                 pass 
    436             if self.graph.dataHasClass: 
    437                 classArray = numpy.compress(indices, classArray) 
    438  
    439         vectors = None 
    440         if method == DR_PCA: 
    441             vals, vectors = createPCAProjection(dataMatrix, NComps = 2, useGeneralizedEigenvectors = self.useGeneralizedEigenvectors) 
    442         elif method == DR_SPCA and self.graph.dataHasClass: 
    443             vals, vectors = createPCAProjection(dataMatrix, classArray, NComps = 2, useGeneralizedEigenvectors = self.useGeneralizedEigenvectors) 
    444         elif method == DR_PLS and self.graph.dataHasClass: 
    445             dataMatrix = dataMatrix.transpose() 
    446             classMatrix = numpy.transpose(numpy.matrix(classArray)) 
    447             vectors = createPLSProjection(dataMatrix, classMatrix, 2) 
    448             vectors = vectors.T 
    449  
    450         # test if all values are 0, if there is an invalid number in the array and if there are complex numbers in the array 
    451         if vectors == None or not vectors.any() or False in numpy.isfinite(vectors) or False in numpy.isreal(vectors): 
    452             self.setStatusBarText("Unable to compute anchor positions for the selected attributes")   
    453             return None 
    454  
    455         xAnchors = vectors[0] 
    456         yAnchors = vectors[1] 
    457  
    458         m = math.sqrt(max(xAnchors**2 + yAnchors**2)) 
    459  
    460         xAnchors /= m 
    461         yAnchors /= m 
    462         names = self.graph.attributeNames 
    463         attributes = [names[attrIndices[i]] for i in range(len(attrIndices))] 
    464  
    465         if setAnchors: 
    466             self.graph.setAnchors(list(xAnchors), list(yAnchors), attributes) 
    467             self.graph.updateData() 
    468             self.graph.repaint() 
    469         return xAnchors, yAnchors, (attributes, attrIndices) 
    470  
    471  
    472  
    473 def createPLSProjection(X,Y, Ncomp = 2): 
    474     '''Predict Y from X using first Ncomp principal components''' 
    475  
    476     # data dimensions 
    477     n, mx = numpy.shape(X) 
    478     my = numpy.shape(Y)[1] 
    479  
    480     # Z-scores of original matrices 
    481     YMean = Y.mean() 
    482     X,Y = center(X), center(Y) 
    483  
    484     P = numpy.empty((mx,Ncomp)) 
    485     W = numpy.empty((mx,Ncomp)) 
    486     C = numpy.empty((my,Ncomp)) 
    487     T = numpy.empty((n,Ncomp)) 
    488     U = numpy.empty((n,Ncomp)) 
    489     B = numpy.zeros((Ncomp,Ncomp)) 
    490  
    491     E,F = X,Y 
    492  
    493     # main algorithm 
    494     for i in range(Ncomp): 
    495  
    496         u = numpy.random.random_sample((n,1)) 
    497         w = normalize(numpy.dot(E.T,u)) 
    498         t = normalize(numpy.dot(E,w)) 
    499         c = normalize(numpy.dot(F.T,t)) 
    500  
    501         dif = t 
    502         # iterations for loading vector t 
    503         while numpy.linalg.norm(dif) > 10e-16: 
    504             c = normalize(numpy.dot(F.T,t)) 
    505             u = numpy.dot(F,c) 
    506             w = normalize(numpy.dot(E.T,u)) 
    507             t0 = normalize(numpy.dot(E,w)) 
    508             dif = t - t0 
    509             t = t0 
    510  
    511         T[:,i] = t.T 
    512         U[:,i] = u.T 
    513         C[:,i] = c.T 
    514         W[:,i] = w.T 
    515  
    516         b = numpy.dot(t.T,u)[0,0] 
    517         B[i][i] = b 
    518         p = numpy.dot(E.T,t) 
    519         P[:,i] = p.T 
    520         E = E - numpy.dot(t,p.T) 
    521         xx = b * numpy.dot(t,c.T) 
    522         F = F - xx 
    523  
    524     # esimated Y 
    525     #YE = numpy.dot(numpy.dot(T,B),C.T)*numpy.std(Y, axis = 0) + YMean 
    526     #Y = Y*numpy.std(Y, axis = 0)+ YMean 
    527     #BPls = numpy.dot(numpy.dot(numpy.linalg.pinv(P.T),B),C.T) 
    528  
    529     return W 
    530  
    531 # if no class data is provided we create PCA projection 
    532 # if there is class data then create SPCA projection 
    533 def createPCAProjection(dataMatrix, classArray = None, NComps = -1, useGeneralizedEigenvectors = 1): 
    534     if type(dataMatrix) == numpy.ma.core.MaskedArray: 
    535         dataMatrix = numpy.array(dataMatrix) 
    536     if classArray != None and type(classArray) == numpy.ma.core.MaskedArray: 
    537         classArray = numpy.array(classArray) 
    538          
    539     dataMatrix = numpy.transpose(dataMatrix) 
    540  
    541     s = numpy.sum(dataMatrix, axis=0)/float(len(dataMatrix)) 
    542     dataMatrix -= s       # substract average value to get zero mean 
    543  
    544     if classArray != None and useGeneralizedEigenvectors: 
    545         covarMatrix = numpy.dot(numpy.transpose(dataMatrix), dataMatrix) 
    546         try: 
    547             matrix = inv(covarMatrix) 
    548         except: 
    549             return None, None 
    550         matrix = numpy.dot(matrix, numpy.transpose(dataMatrix)) 
    551     else: 
    552         matrix = numpy.transpose(dataMatrix) 
    553  
    554     # compute dataMatrixT * L * dataMatrix 
    555     if classArray != None: 
    556         # define the Laplacian matrix 
    557         L = numpy.zeros((len(dataMatrix), len(dataMatrix))) 
    558         for i in range(len(dataMatrix)): 
    559             for j in range(i+1, len(dataMatrix)): 
    560                 L[i,j] = -int(classArray[i] != classArray[j]) 
    561                 L[j,i] = -int(classArray[i] != classArray[j]) 
    562  
    563         s = numpy.sum(L, axis=0)      # doesn't matter which axis since the matrix L is symmetrical 
    564         for i in range(len(dataMatrix)): 
    565             L[i,i] = -s[i] 
    566  
    567         matrix = numpy.dot(matrix, L) 
    568  
    569     matrix = numpy.dot(matrix, dataMatrix) 
    570  
    571     vals, vectors = eig(matrix) 
    572     if vals.dtype.kind == "c":       # if eigenvalues are complex numbers then do nothing 
    573          return None, None 
    574     vals = list(vals) 
    575      
    576     if NComps == -1: 
    577         NComps = len(vals) 
    578     NComps = min(NComps, len(vals)) 
    579      
    580     retVals = [] 
    581     retIndices = [] 
    582     for i in range(NComps): 
    583         retVals.append(max(vals)) 
    584         bestInd = vals.index(max(vals)) 
    585         retIndices.append(bestInd) 
    586         vals[bestInd] = -1 
    587      
    588     return retVals, numpy.take(vectors.T, retIndices, axis = 0)         # i-th eigenvector is the i-th column in vectors so we have to transpose the array 
    589  
    590  
    591  
    592 # ############################################################################# 
    593 # class that represents FreeViz classifier 
    594 class FreeVizClassifier(orange.Classifier): 
    595     def __init__(self, data, freeviz): 
    596         self.FreeViz = freeviz 
    597  
    598         if self.FreeViz.__class__ != FreeViz: 
    599             self.FreeViz.parentWidget.setData(data) 
    600             self.FreeViz.parentWidget.showAllAttributes = 1 
    601         else: 
    602             self.FreeViz.graph.setData(data) 
    603             self.FreeViz.showAllAttributes() 
    604  
    605         #self.FreeViz.randomAnchors() 
    606         self.FreeViz.radialAnchors() 
    607         self.FreeViz.optimizeSeparation() 
    608  
    609         graph = self.FreeViz.graph 
    610         ai = graph.attributeNameIndex 
    611         labels = [a[2] for a in graph.anchorData] 
    612         indices = [ai[label] for label in labels] 
    613  
    614         validData = graph.getValidList(indices) 
    615         domain = orange.Domain([graph.dataDomain[i].name for i in indices]+[graph.dataDomain.classVar.name], graph.dataDomain) 
    616         offsets = [graph.attrValues[graph.attributeNames[i]][0] for i in indices] 
    617         normalizers = [graph.getMinMaxVal(i) for i in indices] 
    618         selectedData = numpy.take(graph.originalData, indices, axis = 0) 
    619         averages = numpy.average(numpy.compress(validData, selectedData, axis=1), 1) 
    620         classData = numpy.compress(validData, graph.originalData[graph.dataClassIndex])         
    621  
    622         graph.createProjectionAsNumericArray(indices, useAnchorData = 1, removeMissingData = 0, validData = validData, jitterSize = -1) 
    623         self.classifier = orange.P2NN(domain, numpy.transpose(numpy.array([numpy.compress(validData, graph.unscaled_x_positions), numpy.compress(validData, graph.unscaled_y_positions), classData])), graph.anchorData, offsets, normalizers, averages, graph.normalizeExamples, law=1)         
    624  
    625     # for a given example run argumentation and find out to which class it most often fall 
    626     def __call__(self, example, returnType=orange.Classifier.GetValue): 
    627         #example.setclass(0) 
    628         return self.classifier(example, returnType) 
    629  
    630  
    631 class FreeVizLearner(orange.Learner): 
    632     def __init__(self, freeviz = None): 
    633         if not freeviz: 
    634             freeviz = FreeViz() 
    635         self.FreeViz = freeviz 
    636         self.name = "FreeViz Learner" 
    637  
    638     def __call__(self, examples, weightID = 0): 
    639         return FreeVizClassifier(examples, self.FreeViz) 
    640  
    641  
    642  
    643 class S2NHeuristicLearner(orange.Learner): 
    644     def __init__(self, freeviz = None): 
    645         if not freeviz: 
    646             freeviz = FreeViz() 
    647         self.FreeViz = freeviz 
    648         self.name = "S2N Feature Selection Learner" 
    649  
    650     def __call__(self, examples, weightID = 0): 
    651         return S2NHeuristicClassifier(examples, self.FreeViz) 
    652  
     1from Orange.projection.linear import * 
Note: See TracChangeset for help on using the changeset viewer.