source: orange/orange/orngVizRank.py @ 8822:9b0090cabacc

Revision 8822:9b0090cabacc, 91.2 KB checked in by matejd <matejd@…>, 3 years ago (diff)

Modified scaling and panning factors; fixed a bug data not being shown at startup due to initializeGL being called multiple times

Line 
1import orange, sys, random, statc
2import orngVisFuncts, orngTest, orngStat
3from math import sqrt
4import os, operator
5from math import sqrt
6import numpy, time
7from copy import copy, deepcopy
8from orngLinProj import FreeViz
9from orngScaleData import getVariableValuesSorted
10
11# used for outlier detection
12VIZRANK_POINT = 0
13CLUSTER_POINT = 1
14VIZRANK_MOSAIC = 2
15
16# quality measure
17CLASS_ACCURACY = 0
18AVERAGE_CORRECT = 1
19BRIER_SCORE = 2
20AUC = 3
21measuresDict = {CLASS_ACCURACY: "Classification accuracy", AVERAGE_CORRECT: "Average probability of correct classification",
22                BRIER_SCORE: "Brier score", AUC: "Area under curve (AUC)"}
23
24# testing method
25LEAVE_ONE_OUT = 0
26TEN_FOLD_CROSS_VALIDATION = 1
27TEST_ON_LEARNING_SET = 2
28
29# results in the list
30ACCURACY = 0
31OTHER_RESULTS = 1
32LEN_TABLE = 2
33ATTR_LIST = 3
34TRY_INDEX = 4
35GENERAL_DICT = 5
36
37OTHER_ACCURACY = 0
38OTHER_PREDICTIONS = 1
39OTHER_DISTRIBUTION = 2
40
41# evaluation algorithm
42ALGORITHM_KNN = 0
43ALGORITHM_HEURISTIC = 1
44
45NUMBER_OF_INTERVALS = 6  # number of intervals to use when discretizing. used when using the very fast heuristic
46
47# attrCont
48CONT_MEAS_NONE = 0
49CONT_MEAS_RELIEFF = 1
50CONT_MEAS_S2N = 2
51CONT_MEAS_S2NMIX = 3
52
53# attrDisc
54DISC_MEAS_NONE = 0
55DISC_MEAS_RELIEFF = 1
56DISC_MEAS_GAIN = 2
57DISC_MEAS_GINI = 3
58
59DETERMINISTIC_ALL = 0
60GAMMA_ALL = 1
61GAMMA_SINGLE = 2
62
63PROJOPT_NONE = 0
64PROJOPT_SPCA = 1
65PROJOPT_PLS = 2
66
67contMeasuresDiscClass = [("None", None), ("ReliefF", orange.MeasureAttribute_relief(k=10, m=50)),
68                ("Signal to Noise Ratio", orngVisFuncts.S2NMeasure()), ("Signal to Noise OVA", orngVisFuncts.S2NMeasureMix())]
69
70discMeasuresDiscClass = [("None", None), ("ReliefF", orange.MeasureAttribute_relief(k=10, m=50)),
71                ("Gain ratio", orange.MeasureAttribute_gainRatio()), ("Gini index", orange.MeasureAttribute_gini())]
72
73contMeasuresNoClass = [("None", None)]
74discMeasuresNoClass = [("None", None)]
75
76contMeasuresContClass = [("None", None)]
77discMeasuresContClass = [("None", None)]
78
79
80# array of testing methods. used by calling python's apply method depending on the value of self.testingMethod
81testingMethods = [orngTest.leaveOneOut, orngTest.crossValidation, orngTest.learnAndTestOnLearnData]
82
83# visualization methods
84SCATTERPLOT = 1
85RADVIZ = 2
86LINEAR_PROJECTION = 3
87POLYVIZ = 4
88SCATTERPLOT3D = 5
89SPHEREVIZ3D = 6
90LINEAR_PROJECTION3D = 7
91KNN_IN_ORIGINAL_SPACE = 10
92
93# optimization type
94EXACT_NUMBER_OF_ATTRS = 0
95MAXIMUM_NUMBER_OF_ATTRS = 1
96
97class VizRank:
98    def __init__(self, visualizationMethod, graph = None):
99        if not graph:
100            if visualizationMethod == SCATTERPLOT:
101                import orngScaleScatterPlotData
102                graph = orngScaleScatterPlotData.orngScaleScatterPlotData()
103            elif visualizationMethod == RADVIZ:
104                import orngScaleLinProjData
105                graph = orngScaleLinProjData.orngScaleLinProjData()
106                graph.normalize_examples = 1
107            elif visualizationMethod in [LINEAR_PROJECTION, KNN_IN_ORIGINAL_SPACE]:
108                import orngScaleLinProjData
109                graph = orngScaleLinProjData.orngScaleLinProjData()
110                graph.normalize_examples = 0
111            elif visualizationMethod == POLYVIZ:
112                import orngScalePolyvizData
113                graph = orngScalePolyvizData.orngScalePolyvizData()
114                graph.normalize_examples = 1
115            elif visualizationMethod == SCATTERPLOT3D:
116                from Orange.preprocess.scaling import ScaleScatterPlotData3D
117                graph = ScaleScatterPlotData3D()
118            elif visualizationMethod == SPHEREVIZ3D:
119                from Orange.preprocess.scaling import ScaleLinProjData3D
120                graph = ScaleLinProjData3D()
121                graph.normalize_examples = 1
122            elif visualizationMethod == LINEAR_PROJECTION3D:
123                from Orange.preprocess.scaling import ScaleLinProjData3D
124                graph = ScaleLinProjData3D()
125                graph.normalize_examples = 0
126            else:
127                print "an invalid visualization method was specified. VizRank can not run."
128                return
129
130        random.seed(0)      # always use the same seed to make results repeatable
131        self.graph = graph
132        self.freeviz = FreeViz(graph)
133        self.visualizationMethod = visualizationMethod
134
135        self.results = []
136        self.arguments = []                                 # a list of arguments
137
138        self.kValue = 10
139        self.percentDataUsed = 100
140        self.qualityMeasure = AVERAGE_CORRECT
141        self.qualityMeasureCluster = 0      ### TO DO: fix it
142        self.qualityMeasureContClass = 0    ### TO DO: fix it
143        self.testingMethod = TEN_FOLD_CROSS_VALIDATION
144        self.optimizationType = MAXIMUM_NUMBER_OF_ATTRS
145        self.attributeCount = 4
146        self.evaluationAlgorithm = ALGORITHM_KNN
147        self.attrCont = CONT_MEAS_RELIEFF
148        self.attrDisc = DISC_MEAS_RELIEFF
149        self.attrContNoClass = 0
150        self.attrDiscNoClass = 0
151        self.attrDiscContClass = 0
152        self.attrContContClass = 0
153       
154        self.attrSubsetSelection = GAMMA_ALL                # how do we find attribute subsets to evaluate - deterministic according to attribute ranking score or using gamma distribution - if using gamma, do we want to evaluate all possible permutations of attributes or only one
155        self.projOptimizationMethod = PROJOPT_NONE          # None, supervisedPCA, partial least square
156        self.useExampleWeighting = 0                        # weight examples, so that the class that has a low number of examples will have higher weights
157        self.evaluationData = {}
158        self.evaluationData["triedCombinations"] = {}
159
160        self.externalLearner = None                         # do we use knn or some external learner
161        self.selectedClasses = []                           # which classes are we trying to separate
162        self.learnerName = "VizRank Learner"
163        #self.onlyOnePerSubset = 1                           # save only the best placement of attributes in radviz
164        self.maxResultListLen = 100000                      # number of projections to store in a list
165        self.abortCurrentOperation = 0
166        self.minNumOfExamples = 0                           # if a dataset has less than this number of examples we don't consider that projection
167
168        # when to stop evaluation. when first criterion holds, evaluation stops
169        self.timeLimit = 0              # if greater than 0 then this is the number of minutes that VizRank will use to evaluate projections
170        self.projectionLimit = 0        # if greater than 0 then this is the number of projections that will be evaluated with VizRank
171        self.evaluatedProjectionsCount = 0
172
173        # when to stop local optimization?
174        self.optimizeTimeLimit = 0
175        self.optimizeProjectionLimit = 0
176        self.optimizedProjectionsCount = 0
177
178        if visualizationMethod == SCATTERPLOT: self.parentName = "Scatterplot"
179        elif visualizationMethod == RADVIZ:    self.parentName = "Radviz"
180        elif visualizationMethod == LINEAR_PROJECTION:  self.parentName = "Linear Projection"
181        elif visualizationMethod == POLYVIZ:            self.parentName = "Polyviz"
182
183        self.argumentCount = 1              # number of arguments used when classifying
184        #self.argumentValueFormula = 1       # how to compute argument value
185
186        self.locOptOptimizeProjectionByPermutingAttributes = 1      # try to improve projection by switching pairs of attributes in a projection
187        self.locOptAllowAddingAttributes = 0                        # do we allow increasing the number of visualized attributes
188        self.locOptMaxAttrsInProj = 20                              # if self.locOptAllowAddingAttributes == 1 then what is the maximum number of attributes in a projection
189        self.locOptAttrsToTry = 50                                 # number of best ranked attributes to try
190        self.locOptProjCount = 20                                   # try to locally optimize this number of best ranked projections
191
192        self.rankArgumentsByStrength = 0  # how do you want to compute arguments. if 0 then we go through the top ranked projection and classify. If 1 we rerank projections to projections with strong class prediction and use them for classification
193        self.storeEachPermutation = 0       # do we want to save information for each fold when evaluating projection - used to compute VizRank's accuracy
194
195        # 0 - set to sqrt(N)
196        # 1 - set to N / c
197        self.kValueFormula = 1
198        self.autoSetTheKValue = 1       # automatically set the value k
199       
200        self.saveEvaluationResults = 0
201        self.evaluationResults = {}
202
203
204    def clearResults(self):
205        self.results = []
206        self.evaluationResults = {}
207        self.evaluationData = {}    # clear all previous data about tested permutations and stuff
208        self.evaluationData["triedCombinations"] = {}
209
210    def clearArguments(self):
211        self.arguments = []
212
213    def removeTooSimilarProjections(self, allowedPercentOfEqualAttributes = 70):
214        i=0
215        while i < len(self.results):
216            if self.results[i][TRY_INDEX] != -1 and self.existsABetterSimilarProjection(i, allowedPercentOfEqualAttributes):
217                self.results.pop(i)
218            else:
219                i += 1
220
221    # test if one of the projections in self.results[0:index] are similar to the self.results[index] projection
222    def existsABetterSimilarProjection(self, index, allowedPercentOfEqualAttributes = 70):
223        testAttrs = self.results[index][ATTR_LIST]
224        for i in range(index):
225            attrs = self.results[i][ATTR_LIST]
226            equalAttrs = [attr in attrs for attr in testAttrs]
227            if 100*sum(equalAttrs) > allowedPercentOfEqualAttributes * float(len(testAttrs)):
228                return 1
229        return 0
230
231    def getkValue(self, kValueFormula = -1):
232        if not self.graph.have_data: return 1
233        if kValueFormula == -1:
234            kValueFormula = self.kValueFormula
235        if kValueFormula == 0 or not self.graph.data_has_discrete_class or self.graph.data_has_continuous_class:
236            kValue = int(sqrt(len(self.graph.raw_data)))
237        else:
238            kValue = int(len(self.graph.raw_data) / max(1, len(self.graph.data_domain.classVar.values)))    # k = N / c (c = # of class values)
239        return kValue
240
241    def createkNNLearner(self, k = -1, kValueFormula = -1):
242        if k == -1:
243            if kValueFormula == -1 or not self.graph.have_data or len(self.graph.raw_data) == 0:
244                kValue = self.kValue
245            else:
246                kValue = self.getkValue(kValueFormula)
247
248            if self.percentDataUsed != 100:
249                kValue = int(kValue * self.percentDataUsed / 100.0)
250        else:
251            kValue = k
252
253        return orange.kNNLearner(k = kValue, rankWeight = 0, distanceConstructor = orange.ExamplesDistanceConstructor_Euclidean(normalize=0))
254
255
256    def setData(self, data):
257        self.clearResults()
258        self.selectedClasses = []
259        if self.__class__ == VizRank:
260            self.graph.setData(data, self.graph.raw_subset_data)
261
262        if not self.graph.data_has_discrete_class:
263            return
264
265        self.selectedClasses = range(len(self.graph.data_domain.classVar.values))
266
267        if self.autoSetTheKValue:
268            self.kValue = self.getkValue(self.kValueFormula)
269
270        self.correctSettingsIfNecessary()
271
272    # save subsetdata. first example from this dataset can be used with argumentation - it can find arguments for classifying the example to the possible class values
273    def setSubsetData(self, subData):
274        if self.__class__ == VizRank:
275            self.graph.setData(self.graph.raw_data, subData)
276        self.clearArguments()
277
278    def getEvaluatedAttributes(self):       
279        if self.graph.data_has_discrete_class:
280            return orngVisFuncts.evaluateAttributesDiscClass(self.graph.raw_data, contMeasuresDiscClass[self.attrCont][1], discMeasuresDiscClass[self.attrDisc][1])
281        elif self.graph.data_has_continuous_class:
282            return orngVisFuncts.evaluateAttributesContClass(self.graph.raw_data, contMeasuresContClass[self.attrContContClass][1], discMeasuresContClass[self.attrDiscContClass][1])
283        else:
284            return orngVisFuncts.evaluateAttributesNoClass(self.graph.raw_data, contMeasuresNoClass[self.attrContNoClass][1], discMeasuresNoClass[self.attrDiscNoClass][1])
285       
286
287    # return a function that is appropriate to find the best projection in a list in respect to the selected quality measure
288    def getMaxFunct(self):
289        if self.graph.data_has_discrete_class and self.qualityMeasure == BRIER_SCORE: return min
290        else: return max
291
292    def addResult(self, accuracy, other_results, lenTable, attrList, tryIndex, generalDict = {}, results=None):
293        self.insertItem(self.findTargetIndex(accuracy), accuracy, other_results, lenTable, attrList, tryIndex, generalDict)
294
295    # use bisection to find correct index
296    def findTargetIndex(self, accuracy):
297        funct = self.getMaxFunct()
298        top = 0; bottom = len(self.results)
299
300        while (bottom-top) > 1:
301            mid  = (bottom + top)/2
302            if funct(accuracy, self.results[mid][ACCURACY]) == accuracy: bottom = mid
303            else: top = mid
304
305        if len(self.results) == 0: return 0
306        if funct(accuracy, self.results[top][ACCURACY]) == accuracy:
307            return top
308        else:
309            return bottom
310
311    # insert new result - give parameters: accuracy of projection, number of examples in projection and list of attributes.
312    def insertItem(self, index, accuracy, other_results, lenTable, attrList, tryIndex, generalDict = {}, updateStatusBar = 0):
313        if index < self.maxResultListLen:
314            self.results.insert(index, (accuracy, other_results, lenTable, attrList, tryIndex, generalDict))
315
316
317    # kNNClassifyData - compute classification error for every example in table
318    def kNNClassifyData(self, table):
319        if len(table) == 0:
320            return [], []
321
322        # check if we have a discrete class
323        if not table.domain.classVar or not table.domain.classVar.varType == orange.VarTypes.Discrete:
324            return [], []
325
326        if self.externalLearner: learner = self.externalLearner
327        else:                    learner = self.createkNNLearner()
328        results = apply(testingMethods[self.testingMethod], [[learner], table])
329
330        returnTable = []
331
332        if table.domain.classVar.varType == orange.VarTypes.Discrete:
333            probabilities = numpy.zeros((len(table), len(table.domain.classVar.values)), numpy.float)
334            lenClassValues = len(list(table.domain.classVar.values))
335            if self.qualityMeasure in [AVERAGE_CORRECT, AUC]:       # for AUC we have no way of computing the prediction accuracy for each example
336                for i in range(len(results.results)):
337                    res = results.results[i]
338                    returnTable.append(res.probabilities[0][res.actualClass])
339                    probabilities[i] = res.probabilities[0]
340            elif self.qualityMeasure == BRIER_SCORE:
341                for i in range(len(results.results)):
342                    res = results.results[i]
343                    s = sum([val*val for val in res.probabilities[0]])
344                    returnTable.append((s + 1 - 2*res.probabilities[0][res.actualClass])/float(lenClassValues))
345                    probabilities[i] = res.probabilities[0]
346            elif self.qualityMeasure == CLASS_ACCURACY:
347                for i in range(len(results.results)):
348                    res = results.results[i]
349                    returnTable.append(res.probabilities[0][res.actualClass] == max(res.probabilities[0]))
350                    probabilities[i] = res.probabilities[0]
351            else:
352                print "unknown quality measure for kNNClassifyData"
353        else:
354            probabilities = None
355            # for continuous class we can't compute brier score and classification accuracy
356            for res in results.results:
357                if not res.probabilities[0]: returnTable.append(0)
358                else:                        returnTable.append(res.probabilities[0].density(res.actualClass))
359
360        return returnTable, probabilities
361
362    # kNNEvaluate - evaluate class separation in the given projection using a heuristic or k-NN method
363    def kNNComputeAccuracy(self, table):
364        # select a subset of the data if necessary
365        if self.percentDataUsed != 100:
366            indices = orange.MakeRandomIndices2(table, 1.0-float(self.percentDataUsed)/100.0)
367            testTable = table.select(indices)
368        else:
369            testTable = table
370
371        if len(testTable) == 0: return 0, 0
372
373        if self.evaluationAlgorithm == ALGORITHM_KNN or self.externalLearner:
374            if self.externalLearner: learner = self.externalLearner
375            else:                    learner = self.createkNNLearner(); weight = 0
376
377            if self.useExampleWeighting and testTable.domain.classVar and testTable.domain.classVar.varType == orange.VarTypes.Discrete:
378                testTable, weightID = orange.Preprocessor_addClassWeight(testTable, equalize=1)
379                results = apply(testingMethods[self.testingMethod], [[learner], (testTable, weightID)])
380            else:
381                results = apply(testingMethods[self.testingMethod], [[learner], testTable])
382
383            if self.saveEvaluationResults:
384                self.evaluationResults = results
385                #self.classifier =
386
387            # compute classification success using selected measure
388            if testTable.domain.classVar.varType == orange.VarTypes.Discrete:
389                return self.computeAccuracyFromResults(testTable, results)
390
391            # for continuous class we can't compute brier score and classification accuracy
392            else:
393                val = 0.0
394                if not results.results or not results.results[0].probabilities[0]: return 0, 0
395                for res in results.results:  val += res.probabilities[0].density(res.actualClass)
396                if len(results.results) > 0: val/= float(len(results.results))
397                return 100.0*val, (100.0*val)
398
399        # ###############################
400        # do we want to use very fast heuristic
401        # ###############################
402        elif self.evaluationAlgorithm == ALGORITHM_HEURISTIC:
403            # if input attributes are continuous (may be discrete for evaluating scatterplots, where we dicretize the whole domain...)
404            if testTable.domain[0].varType == orange.VarTypes.Continuous and testTable.domain[1].varType == orange.VarTypes.Continuous:
405                discX = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
406                discY = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
407                testTable = testTable.select([discX, discY, testTable.domain.classVar])
408
409            currentClassDistribution = [int(v) for v in orange.Distribution(testTable.domain.classVar, testTable)]
410            prediction = [0.0 for i in range(len(testTable.domain.classVar.values))]
411
412            # create a new attribute that is a cartesian product of the two visualized attributes
413            nattr = orange.EnumVariable(values=[str(i) for i in range(NUMBER_OF_INTERVALS*NUMBER_OF_INTERVALS)])
414            nattr.getValueFrom = orange.ClassifierByLookupTable2(nattr, testTable.domain[0], testTable.domain[1])
415            for i in range(len(nattr.getValueFrom.lookupTable)): nattr.getValueFrom.lookupTable[i] = i
416
417            for dist in orange.ContingencyAttrClass(nattr, testTable):
418                dist = list(dist)
419                if sum(dist) == 0: continue
420                m = max(dist)
421                prediction[dist.index(m)] += m * m / float(sum(dist))
422
423            prediction = [val*100.0 for val in prediction]             # turn prediction array into percents
424            acc = sum(prediction) / float(max(1, len(testTable)))               # compute accuracy for all classes
425            val = 0.0; s = 0.0
426            for index in self.selectedClasses:                          # compute accuracy for selected classes
427                val += prediction[index]
428                s += currentClassDistribution[index]
429            for i in range(len(prediction)):
430                prediction[i] /= float(max(1, currentClassDistribution[i]))    # turn to probabilities
431            return val/float(max(1,s)), (acc, prediction, currentClassDistribution)
432        else:
433            return 0, 0     # in case of an invalid value
434
435
436    def computeAccuracyFromResults(self, table, results):
437        prediction = [0.0 for i in range(len(table.domain.classVar.values))]
438        countsByFold =  [0 for i in range(results.numberOfIterations)]
439
440        if self.qualityMeasure == AVERAGE_CORRECT:
441            for res in results.results:
442                if not res.probabilities[0]: continue
443                prediction[res.actualClass] += res.probabilities[0][res.actualClass]
444                countsByFold[res.iterationNumber] += 1
445            prediction = [val*100.0 for val in prediction]
446
447        elif self.qualityMeasure == BRIER_SCORE:
448            #return orngStat.BrierScore(results)[0], results
449            for res in results.results:
450                if not res.probabilities[0]: continue
451                prediction[res.actualClass] += sum([prob*prob for prob in res.probabilities[0]]) - 2*res.probabilities[0][res.actualClass] + 1
452                countsByFold[res.iterationNumber] += 1
453
454        elif self.qualityMeasure == CLASS_ACCURACY:
455            #return 100*orngStat.CA(results)[0], results
456            for res in results.results:
457                prediction[res.actualClass] += res.classes[0]==res.actualClass
458                countsByFold[res.iterationNumber] += 1
459            prediction = [val*100.0 for val in prediction]
460        elif self.qualityMeasure == AUC:
461            aucResult = orngStat.AUC(results)
462            if aucResult:
463                return aucResult[0], None
464            else:
465                return 0, None
466
467        # compute accuracy only for classes that are selected as interesting. other class values do not participate in projection evaluation
468        acc = sum(prediction) / float(max(1, len(results.results)))                 # accuracy over all class values
469        classes = self.selectedClasses or range(len(self.graph.data_domain.classVar.values))
470        val = sum([prediction[index] for index in classes])    # accuracy over all selected classes
471
472        currentClassDistribution = [int(v) for v in orange.Distribution(table.domain.classVar, table)]
473        s = sum([currentClassDistribution[index] for index in classes])
474
475        prediction = [prediction[i] / float(max(1, currentClassDistribution[i])) for i in range(len(prediction))] # turn to probabilities
476       
477        return val/max(1, float(s)), (acc, prediction, list(currentClassDistribution))
478
479
480    # Argumentation functions
481    def findArguments(self, example):
482        self.clearArguments()
483        if not self.graph.have_data or not self.graph.data_has_class or len(self.results) == 0:
484            if len(self.results) == 0: print 'To classify an example using VizRank you first have to evaluate some projections.'
485            return orange.MajorityLearner(self.graph.raw_data)(example, orange.GetBoth)
486
487        self.arguments = [[] for i in range(len(self.graph.data_domain.classVar.values))]
488        vals = [0.0 for i in range(len(self.arguments))]
489
490        if self.rankArgumentsByStrength == 1:
491            for index in range(min(len(self.results), self.argumentCount + 50)):
492                classValue, dist = self.computeClassificationForExample(index, example, kValue = len(self.graph.raw_data))
493                if classValue and dist:
494                    for i in range(len(self.arguments)):
495                        self.arguments[i].insert(self.getArgumentIndex(dist[i], i), (dist[i], dist, self.results[index][ATTR_LIST], index))
496
497            for i in range(len(self.arguments)):
498                arr = self.arguments[i]
499                arr.sort()
500                arr.reverse()
501                arr = arr[:self.argumentCount]
502                self.arguments[i] = arr
503                vals[i] = sum([arg[0] for arg in arr])
504        else:
505            usedArguments = 0; index = 0
506            while usedArguments < self.argumentCount and index < len(self.results):
507                classValue, dist = self.computeClassificationForExample(index, example, kValue = self.getkValue(kValueFormula = 0))
508                if classValue and dist:
509                    for i in range(len(self.arguments)):
510                        self.arguments[i].insert(self.getArgumentIndex(dist[i], i), (dist[i], dist, self.results[index][ATTR_LIST], index))
511                        vals[i] += dist[i]
512                    usedArguments += 1
513                index += 1
514
515        suma = sum(vals)
516        if suma == 0:
517            dist = orange.Distribution(self.graph.data_domain.classVar.name, self.graph.raw_data)
518            vals = [dist[i] for i in range(len(dist))]; suma = sum(vals)
519
520        classValue = example.domain.classVar[vals.index(max(vals))]
521        dist = orange.DiscDistribution([val/float(suma) for val in vals])
522        dist.variable = self.graph.data_domain.classVar
523        return classValue, dist
524
525
526    def computeClassificationForExample(self, projectionIndex, example, kValue = -1):
527        (accuracy, other_results, lenTable, attrList, tryIndex, generalDict) = self.results[projectionIndex]
528
529        if 1 in [example[attr].isSpecial() for attr in attrList]: return None, None
530
531        attrIndices = [self.graph.attribute_name_index[attr] for attr in attrList]
532        attrVals = [self.graph.scale_example_value(example, ind) for ind in attrIndices]
533
534        table = self.graph.create_projection_as_example_table(attrIndices, settingsDict = generalDict)
535        [xTest, yTest] = self.graph.get_projected_point_position(attrIndices, attrVals, settingsDict = generalDict)
536
537        learner = self.externalLearner or self.createkNNLearner(k = kValue)
538        if self.useExampleWeighting: table, weightID = orange.Preprocessor_addClassWeight(table, equalize=1)
539        else: weightID = 0
540
541        classifier = learner(table, weightID)
542        classVal, dist = classifier(orange.Example(table.domain, [xTest, yTest, "?"]), orange.GetBoth)
543        return classVal, dist
544
545
546    def getArgumentIndex(self, value, classValue):
547        top = 0; bottom = len(self.arguments[classValue])
548        while (bottom-top) > 1:
549            mid  = (bottom + top)/2
550            if max(value, self.arguments[classValue][mid][0]) == value: bottom = mid
551            else: top = mid
552
553        if len(self.arguments[classValue]) == 0: return 0
554        if max(value, self.arguments[classValue][top][0]) == value:  return top
555        else:                                                        return bottom
556
557    def correctSettingsIfNecessary(self):
558        if not self.graph.have_data: return
559        # check if we have discrete attributes. if yes, then make sure we are not using s2nMix measure and GAMMA_SINGLE
560        if orange.VarTypes.Discrete in [attr.varType for attr in self.graph.data_domain.attributes]:
561            if self.attrCont == CONT_MEAS_S2NMIX:           self.attrCont = CONT_MEAS_S2N
562            if self.attrSubsetSelection == GAMMA_SINGLE:    self.attrSubsetSelection = GAMMA_ALL
563
564    def isEvaluationCanceled(self):
565        stop = 0
566        if self.timeLimit > 0: stop = (time.time() - self.startTime) / 60 >= self.timeLimit
567        if self.projectionLimit > 0: stop = stop or self.evaluatedProjectionsCount >= self.projectionLimit
568        return stop
569
570    def isOptimizationCanceled(self):
571        stop = 0
572        if self.optimizeTimeLimit > 0: stop = (time.time() - self.startTime) / 60 >= self.optimizeTimeLimit
573        if self.optimizeProjectionLimit > 0: stop = stop or self.optimizedProjectionsCount >= self.optimizeProjectionLimit
574        return stop
575
576
577    # get a new subset of attributes. if attributes are not evaluated yet then evaluate them and save info to evaluationData dict.
578    def selectNextAttributeSubset(self, minLength, maxLength):
579        z = self.evaluationData.get("z", minLength-1)
580        u = self.evaluationData.get("u", minLength-1)
581        self.evaluationData["combinations"] = []
582        self.evaluationData["index"] = 0
583
584        # if we use heuristic to find attribute orders
585        if self.attrCont == CONT_MEAS_S2NMIX or self.attrSubsetSelection == GAMMA_SINGLE:
586            if not self.evaluationData.has_key("attrs"):
587                attributes, attrsByClass = orngVisFuncts.findAttributeGroupsForRadviz(self.graph.raw_data, orngVisFuncts.S2NMeasureMix())
588                attributes = [self.graph.attribute_name_index[name] for name in attributes]
589                attrsByClass = [[self.graph.attribute_name_index[name] for name in arr] for arr in attrsByClass]
590                self.evaluationData["attrs"] = (attributes, attrsByClass)
591            else:
592                attributes, attrsByClass = self.evaluationData["attrs"]
593
594            if z >= len(attributes): return None      # did we already try all the attributes
595            numClasses = len(self.graph.data_domain.classVar.values)
596            if self.attrSubsetSelection in [GAMMA_ALL, GAMMA_SINGLE]:
597                combinations = self.getAttributeSubsetUsingGammaDistribution(u+1)
598            else:
599                combinations = orngVisFuncts.combinations(range(z), u)
600                for i in range(len(combinations))[::-1]:
601                    comb = combinations[i] + [z]
602                    counts = [0] * numClasses
603                    for ind in comb: counts[ind%numClasses] += 1
604                    if max(counts) - min(counts) > 1:
605                        combinations.pop(i)     # ignore combinations that don't have approximately the same number of attributes for each class value
606                        continue
607                    attrList = [[] for c in range(numClasses)]
608                    for ind in comb: attrList[ind % numClasses].append(attributes[ind])
609                    combinations[i] = attrList
610
611        # no heuristic. try all combinations of a group of attributes
612        else:
613            if not self.evaluationData.has_key("attrs"):
614                # evaluate attributes
615                evaluatedAttributes = self.getEvaluatedAttributes()
616                attributes = [self.graph.attribute_name_index[name] for name in evaluatedAttributes]
617                self.evaluationData["attrs"] = attributes
618                self.totalPossibilities = 0
619
620                # build list of indices for permutations of different number of attributes
621                permutationIndices = {}
622                for i in range(minLength, maxLength+1):
623                    if i > len(attributes): continue        # if we don't have enough attributes
624                    if self.projOptimizationMethod != 0 or self.visualizationMethod == KNN_IN_ORIGINAL_SPACE:
625                        permutationIndices[i] = [range(i)]
626                    else:
627                        permutationIndices[i] = orngVisFuncts.generateDifferentPermutations(range(i))
628                    self.totalPossibilities += orngVisFuncts.combinationsCount(i, len(attributes)) * len(permutationIndices[i])
629##                sys.stderr.write("selectNextAttributeSubset " + str(permutationIndices.keys()) + "\n")
630                self.evaluationData["permutationIndices"] = permutationIndices
631            else:
632                attributes = self.evaluationData["attrs"]
633
634            # do we have enough attributes at all?
635            if len(attributes) < u+1:
636                combinations = []
637            else:
638                # if we don't want to use any heuristic
639                if self.attrCont == CONT_MEAS_NONE and self.attrDisc == DISC_MEAS_NONE:
640                    combination = []
641                    while len(combination) < u+1:
642                        v = random.randint(0, len(self.graph.data_domain.attributes)-1)
643                        if v not in combination: combination.append(v)
644                    combinations = [combination]
645                elif self.attrSubsetSelection == DETERMINISTIC_ALL:
646                    if z >= len(attributes): return None      # did we already try all the attributes
647                    combinations = orngVisFuncts.combinations(attributes[:z], u)
648                    map(list.append, combinations, [attributes[z]] * len(combinations))     # append the z-th attribute to all combinations in the list
649                elif self.attrSubsetSelection in [GAMMA_ALL, GAMMA_SINGLE]:
650                    combinations = self.getAttributeSubsetUsingGammaDistribution(u+1)
651
652        # update values for the number of attributes
653        u += 1
654        self.evaluationData["u"] = (u >= maxLength and minLength-1) or u
655        if self.attrSubsetSelection == DETERMINISTIC_ALL:
656            self.evaluationData["z"] = (u >= maxLength and z+1) or z
657
658        self.evaluationData["combinations"] = combinations
659        return combinations
660
661    # use gamma distribution to select a subset of attrCount attributes. if we want to use heuristic to find attribute order then
662    # apply gamma distribution on attribute lists for each class value.
663    # before returning a subset of attributes also test if this subset was already tested. if yes, then try to generate a new subset (repeat this max 50 times)
664    def getAttributeSubsetUsingGammaDistribution(self, attrCount):
665        maxTries = 100
666        triedDict = self.evaluationData.get("triedCombinations", {})
667        projCountWidth = len(triedDict.keys()) / 1000
668
669        if self.attrCont == CONT_MEAS_S2NMIX or self.attrSubsetSelection == GAMMA_SINGLE:
670            numClasses = len(self.graph.data_domain.classVar.values)
671            attributes, attrsByClass = self.evaluationData["attrs"]
672
673            for i in range(maxTries):
674                attrList = [[] for c in range(numClasses)]; attrs = []
675                tried = 0
676                while len(attrs) < min(attrCount, len(self.graph.data_domain.attributes)):
677                    ind = tried%numClasses
678                    #ind = random.randint(0, numClasses-1)       # warning: this can generate uneven groups for each class value!!!
679                    attr = attrsByClass[ind][int(random.gammavariate(1, 5 + i/10 + projCountWidth))%len(attrsByClass[ind])]
680                    if attr not in attrList[ind]:
681                        attrList[ind].append(attr)
682                        attrs.append(attr)
683                    tried += 1
684                attrs.sort()
685                if not triedDict.has_key(tuple(attrs)) and len(attrs) == attrCount:
686                    self.evaluationData["triedCombinations"][tuple(attrs)] = 1     # this is not the best, since we don't want to save used combinations since we only test one permutation
687                    #return [filter(None, attrList)]        # problem: using filter removes value 0 from the array, which means that the attribute ranked as best wont be in the projections
688                    return [attrList]
689        else:
690            attributes = self.evaluationData["attrs"]
691            for i in range(maxTries):
692                attrList = []
693                while len(attrList) < min(attrCount, len(attributes)):
694                    attr = attributes[int(random.gammavariate(1,5 + (len(attributes)/1000) + projCountWidth))%len(attributes)]
695                    if attr not in attrList:
696                        attrList.append(attr)
697                attrList.sort()
698                if not triedDict.has_key(tuple(attrList)):
699                    triedDict[tuple(attrList)] = 1
700                    #return [filter(None, attrList)]        # problem: using filter removes value 0 from the array, which means that the attribute ranked as best wont be in the projections
701                    return [attrList]
702        return None
703
704    # generate possible permutations of the current attribute subset. use evaluationData dict to find which attribute subset to use.
705    def getNextPermutations(self):
706        combinations = self.evaluationData["combinations"]
707        index  = self.evaluationData["index"]
708        if not combinations or index >= len(combinations):
709            return None     # did we test all the projections
710
711        combination = combinations[index]
712        permutations = []
713
714        if self.attrCont == CONT_MEAS_S2NMIX or self.attrSubsetSelection == GAMMA_SINGLE:
715            # if we don't want to test all placements then we only create a permutation of groups and attributes in each group
716            if self.attrSubsetSelection == GAMMA_SINGLE:
717                permutations = [reduce(operator.add, combination)]
718                usedPerms = {tuple(permutations[0]):1}
719                for c in range(10):
720                    combination = [[group.pop(random.randint(0, len(group)-1)) for num in range(len(group))] for group in [combination.pop(random.randint(0, len(combination)-1)) for i in range(len(combination))]]
721                    comb = reduce(operator.add, combination)
722                    if not usedPerms.has_key(tuple(comb)):
723                        usedPerms[tuple(comb)] = 1
724                        permutations.append(comb)
725
726            # create only one permutation, because its all we need
727            elif self.projOptimizationMethod != 0 or self.visualizationMethod == KNN_IN_ORIGINAL_SPACE:
728                permutations.append(reduce(operator.add, combination))
729            else:
730                for proj in orngVisFuncts.createProjections(len(self.graph.data_domain.classVar.values), sum([len(group) for group in combination])):
731                    try: permutations.append([combination[i][j] for (i,j) in proj])
732                    except: pass
733        else:
734            permutationIndices = self.evaluationData["permutationIndices"]
735##            sys.stderr.write("getNextPermutations " + str(permutationIndices.keys()) + "\n")
736            permutations = [[combination[val] for val in ind] for ind in permutationIndices[len(combination)]]
737
738        self.evaluationData["index"] = index + 1
739        return permutations
740
741    def computeTotalHeight(self, node):
742        if node.branches: 
743            return node.height * (node.last - node.first) + sum([self.computeTotalHeight(n) for n in node.branches])
744        else:
745            return node.height
746
747    def evaluateProjection(self, data):
748        if self.graph.data_has_discrete_class:
749            return self.kNNComputeAccuracy(data)
750        elif self.graph.data_has_continuous_class:
751            return 0
752        else:
753            matrix = orange.SymMatrix(len(data))
754            matrix.setattr('items', data)
755            dist = orange.ExamplesDistanceConstructor_Euclidean(data)
756            for i in range(len(data)):
757                for j in range(i+1):
758                    matrix[i, j] = dist(data[i], data[j])
759            root = orange.HierarchicalClustering(matrix, linkage = orange.HierarchicalClustering.Ward, overwriteMatrix = 0)
760            val = self.computeTotalHeight(root)
761            return val, (val)
762           
763
764    # ##########################################################################
765    # MAIN FUNCTION FOR EVALUATING PROJECTIONS
766    # ##########################################################################
767    def evaluateProjections(self, clearPreviousProjections = 1):
768        random.seed(0)      # always use the same seed to make results repeatable
769        if not self.graph.have_data: return 0
770       
771        # TO DO: remove the following line when we add support for cont class
772        if not self.graph.data_has_discrete_class: return 0
773        self.correctSettingsIfNecessary()
774        if self.timeLimit == self.projectionLimit == 0 and self.__class__.__name__ == "VizRank":
775            print "Evaluation of projections was started without any time or projection restrictions. To prevent an indefinite projection evaluation a time limit of 2 hours was set."
776            self.timeLimit = 2 * 60
777
778        self.startTime = time.time()
779
780        if clearPreviousProjections:
781            self.evaluatedProjectionsCount = 0
782            self.optimizedProjectionsCount = 0
783            self.evaluationData = {}            # clear all previous data about tested permutations and stuff
784            self.evaluationData["triedCombinations"] = {}
785            self.clearResults()
786
787        self.clearArguments()
788        maxFunct = self.getMaxFunct()
789       
790        if self.__class__ != VizRank:
791            from PyQt4.QtGui import qApp
792
793#        if not self.graph.data_has_discrete_class:
794#            print "Projections can be evaluated only for data with a discrete class."
795#            return 0
796
797        if self.visualizationMethod == SCATTERPLOT:
798            evaluatedAttributes = self.getEvaluatedAttributes()
799            contVars = [orange.FloatVariable(attr.name) for attr in self.graph.data_domain.attributes]
800            attrCount = len(self.graph.data_domain.attributes)
801
802            count = len(evaluatedAttributes)*(len(evaluatedAttributes)-1)/2
803            strCount = orngVisFuncts.createStringFromNumber(count)
804           
805            for i in range(len(evaluatedAttributes)):
806                attr1 = self.graph.attribute_name_index[evaluatedAttributes[i]]
807                for j in range(i):
808                    attr2 = self.graph.attribute_name_index[evaluatedAttributes[j]]
809                    self.evaluatedProjectionsCount += 1
810                    if self.isEvaluationCanceled():
811                        return self.evaluatedProjectionsCount
812
813                    table = self.graph.create_projection_as_example_table([attr1, attr2])
814                    if len(table) < self.minNumOfExamples: continue
815                    accuracy, other_results = self.evaluateProjection(table)
816                    generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
817                    self.addResult(accuracy, other_results, len(table), [self.graph.data_domain[attr1].name, self.graph.data_domain[attr2].name], self.evaluatedProjectionsCount, generalDict=generalDict)
818
819                    if self.__class__ != VizRank:
820                        self.setStatusBarText("Evaluated %s/%s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount), strCount))
821                        self.parentWidget.progressBarSet(100.0*self.evaluatedProjectionsCount/max(1,float(count)))
822
823        # #################### RADVIZ, LINEAR_PROJECTION  ################################
824        elif self.visualizationMethod in (RADVIZ, LINEAR_PROJECTION, POLYVIZ, KNN_IN_ORIGINAL_SPACE):
825            if self.projOptimizationMethod != 0:
826                self.freeviz.useGeneralizedEigenvectors = 1
827                self.graph.normalize_examples = 0
828
829            # variables and domain for the table
830            domain = orange.Domain([orange.FloatVariable("xVar"), orange.FloatVariable("yVar"), orange.EnumVariable(self.graph.data_domain.classVar.name, values = getVariableValuesSorted(self.graph.data_domain.classVar))])
831            minLength = (self.optimizationType == EXACT_NUMBER_OF_ATTRS and self.attributeCount) or 3
832            maxLength = self.attributeCount
833            classListFull = self.graph.original_data[self.graph.data_class_index]
834
835            # each call to selectNextAttributeSubset gets a new combination of attributes in a range from minLength to maxLength. if we return None for a given number of attributes this
836            # doesn't mean yet that there are no more possible combinations. it may be just that we wanted a combination of 6 attributes in a domain with 4 attributes. therefore we have
837            # to try maxLength-minLength+1 times and if we fail every time then there are no more valid projections
838
839            newProjectionsExist = 1
840            while newProjectionsExist:
841                for experiment in range(maxLength-minLength+1):
842                    if self.selectNextAttributeSubset(minLength, maxLength): break
843                    newProjectionsExist = 0
844                permutations = self.getNextPermutations()
845                while permutations:
846                    attrIndices = permutations[0]
847
848                    # if we use SPCA, PLS or KNN_IN_ORIGINAL_SPACE
849                    if self.projOptimizationMethod != 0 or self.visualizationMethod == KNN_IN_ORIGINAL_SPACE:
850                        if self.visualizationMethod == KNN_IN_ORIGINAL_SPACE:
851                            table = self.graph.raw_data.select([self.graph.data_domain[attr] for attr in attrIndices] + [self.graph.data_domain.classVar] )
852                            xanchors, yanchors = self.graph.create_xanchors(len(attrIndices)), self.graph.create_yanchors(len(attrIndices))
853                            attrNames = [self.graph.data_domain[attr].name for attr in attrIndices]
854                        else:
855                            projections = self.freeviz.findProjection(self.projOptimizationMethod, attrIndices, set_anchors = 0, percentDataUsed = self.percentDataUsed)
856                            if projections != None:
857                                xanchors, yanchors, (attrNames, newIndices) = projections
858                                table = self.graph.create_projection_as_example_table(newIndices, domain = domain, XAnchors = xanchors, YAnchors = yanchors)
859                        if len(table) < self.minNumOfExamples: continue
860                        self.evaluatedProjectionsCount += 1
861                        accuracy, other_results = self.evaluateProjection(table)
862                        generalDict = {"XAnchors": list(xanchors), "YAnchors": list(yanchors), "Results": self.evaluationResults} if self.saveEvaluationResults else {"XAnchors": list(xanchors), "YAnchors": list(yanchors)}
863                        self.addResult(accuracy, other_results, len(table), attrNames, self.evaluatedProjectionsCount, generalDict = generalDict)
864                        if self.isEvaluationCanceled(): return self.evaluatedProjectionsCount
865                        if self.__class__ != VizRank:
866                            self.setStatusBarText("Evaluated %s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount)))
867                    else:
868                        XAnchors = self.graph.create_xanchors(len(attrIndices))
869                        YAnchors = self.graph.create_yanchors(len(attrIndices))
870                        validData = self.graph.get_valid_list(attrIndices)
871                        if numpy.sum(validData) >= self.minNumOfExamples:
872                            classList = numpy.compress(validData, classListFull)
873                            selectedData = numpy.compress(validData, numpy.take(self.graph.no_jittering_scaled_data, attrIndices, axis = 0), axis = 1)
874                            sum_i = self.graph._getSum_i(selectedData)
875
876                            tempList = []
877
878                            # for every permutation compute how good it separates different classes
879                            for permutation in permutations:
880                                if self.evaluatedProjectionsCount % 10 == 0 and self.isEvaluationCanceled():
881                                    continue
882
883                                table = self.graph.create_projection_as_example_table(permutation, validData = validData, classList = classList, sum_i = sum_i, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain)
884                                accuracy, other_results = self.evaluateProjection(table)
885
886                                # save the permutation
887                                if self.storeEachPermutation:
888                                    generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
889                                    self.addResult(accuracy, other_results, len(table), [self.graph.attribute_names[i] for i in permutation], self.evaluatedProjectionsCount, generalDict)
890                                else:
891                                    tempList.append((accuracy, other_results, len(table), [self.graph.attribute_names[i] for i in permutation]))
892
893                                self.evaluatedProjectionsCount += 1
894                                if self.__class__ != VizRank:
895                                    self.setStatusBarText("Evaluated %s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount)))
896                                    qApp.processEvents()        # allow processing of other events
897
898                            if not self.storeEachPermutation and len(tempList) > 0:   # return only the best attribute placements
899                                (acc, other_results, lenTable, attrList) = maxFunct(tempList)
900                                generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
901                                self.addResult(acc, other_results, lenTable, attrList, self.evaluatedProjectionsCount, generalDict=generalDict)
902
903                        if self.isEvaluationCanceled():
904                            return self.evaluatedProjectionsCount
905
906                    permutations = self.getNextPermutations()
907
908        elif self.visualizationMethod in (SPHEREVIZ3D, LINEAR_PROJECTION3D):
909            if self.projOptimizationMethod != 0:
910                self.freeviz.useGeneralizedEigenvectors = 1
911                self.graph.normalize_examples = 0
912
913            # variables and domain for the table
914            domain = orange.Domain([orange.FloatVariable("xVar"),
915                                    orange.FloatVariable("yVar"),
916                                    orange.FloatVariable("zVar"),
917                                    orange.EnumVariable(self.graph.data_domain.classVar.name, values = getVariableValuesSorted(self.graph.data_domain.classVar))])
918            minLength = (self.optimizationType == EXACT_NUMBER_OF_ATTRS and self.attributeCount) or 3
919            maxLength = self.attributeCount
920            classListFull = self.graph.original_data[self.graph.data_class_index]
921
922            # each call to selectNextAttributeSubset gets a new combination of attributes in a range from minLength to maxLength. if we return None for a given number of attributes this
923            # doesn't mean yet that there are no more possible combinations. it may be just that we wanted a combination of 6 attributes in a domain with 4 attributes. therefore we have
924            # to try maxLength-minLength+1 times and if we fail every time then there are no more valid projections
925
926            newProjectionsExist = 1
927            while newProjectionsExist:
928                for experiment in range(maxLength-minLength+1):
929                    if self.selectNextAttributeSubset(minLength, maxLength): break
930                    newProjectionsExist = 0
931                permutations = self.getNextPermutations()
932                while permutations:
933                    attrIndices = permutations[0]
934
935                    # if we use SPCA, PLS
936                    if self.projOptimizationMethod != 0:
937                        projections = self.freeviz.findProjection(self.projOptimizationMethod, attrIndices, set_anchors = 0, percentDataUsed = self.percentDataUsed)
938                        if projections != None:
939                            xanchors, yanchors, zanchors, (attrNames, newIndices) = projections
940                            table = self.graph.create_projection_as_example_table(newIndices,
941                                                                                  domain = domain,
942                                                                                  XAnchors = xanchors,
943                                                                                  YAnchors = yanchors,
944                                                                                  ZAnchors = zanchors)
945                        if len(table) < self.minNumOfExamples: continue
946                        self.evaluatedProjectionsCount += 1
947                        accuracy, other_results = self.evaluateProjection(table)
948                        generalDict = {"XAnchors": list(xanchors),
949                                       "YAnchors": list(yanchors),
950                                       "ZAnchors": list(zanchors),
951                                       "Results": self.evaluationResults} if self.saveEvaluationResults else {"XAnchors": list(xanchors),
952                                                                                                              "YAnchors": list(yanchors),
953                                                                                                              "ZAnchors": list(zanchors)}
954                        self.addResult(accuracy, other_results, len(table), attrNames, self.evaluatedProjectionsCount, generalDict = generalDict)
955                        if self.isEvaluationCanceled(): return self.evaluatedProjectionsCount
956                        if self.__class__ != VizRank:
957                            self.setStatusBarText("Evaluated %s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount)))
958                    else:
959                        XAnchors = self.graph.create_xanchors(len(attrIndices))
960                        YAnchors = self.graph.create_yanchors(len(attrIndices))
961                        ZAnchors = self.graph.create_zanchors(len(attrIndices))
962                        validData = self.graph.get_valid_list(attrIndices)
963                        if numpy.sum(validData) >= self.minNumOfExamples:
964                            classList = numpy.compress(validData, classListFull)
965                            selectedData = numpy.compress(validData, numpy.take(self.graph.no_jittering_scaled_data, attrIndices, axis = 0), axis = 1)
966                            sum_i = self.graph._getSum_i(selectedData)
967
968                            tempList = []
969
970                            # for every permutation compute how good it separates different classes
971                            for permutation in permutations:
972                                if self.evaluatedProjectionsCount % 10 == 0 and self.isEvaluationCanceled():
973                                    continue
974
975                                table = self.graph.create_projection_as_example_table(permutation,
976                                                                                      validData = validData,
977                                                                                      classList = classList,
978                                                                                      sum_i = sum_i,
979                                                                                      XAnchors = XAnchors,
980                                                                                      YAnchors = YAnchors,
981                                                                                      ZAnchors = ZAnchors,
982                                                                                      domain = domain)
983                                accuracy, other_results = self.evaluateProjection(table)
984
985                                # save the permutation
986                                if self.storeEachPermutation:
987                                    generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
988                                    self.addResult(accuracy, other_results, len(table), [self.graph.attribute_names[i] for i in permutation], self.evaluatedProjectionsCount, generalDict)
989                                else:
990                                    tempList.append((accuracy, other_results, len(table), [self.graph.attribute_names[i] for i in permutation]))
991
992                                self.evaluatedProjectionsCount += 1
993                                if self.__class__ != VizRank:
994                                    self.setStatusBarText("Evaluated %s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount)))
995                                    qApp.processEvents()        # allow processing of other events
996
997                            if not self.storeEachPermutation and len(tempList) > 0:   # return only the best attribute placements
998                                (acc, other_results, lenTable, attrList) = maxFunct(tempList)
999                                generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
1000                                self.addResult(acc, other_results, lenTable, attrList, self.evaluatedProjectionsCount, generalDict=generalDict)
1001
1002                        if self.isEvaluationCanceled():
1003                            return self.evaluatedProjectionsCount
1004
1005                    permutations = self.getNextPermutations()
1006
1007        elif self.visualizationMethod == SCATTERPLOT3D:
1008            evaluatedAttributes = self.getEvaluatedAttributes()
1009            contVars = [orange.FloatVariable(attr.name) for attr in self.graph.data_domain.attributes]
1010            attrCount = len(self.graph.data_domain.attributes)
1011
1012            leva = len(evaluatedAttributes)
1013            count = leva*(leva-1)*(leva-2) / 6
1014            strCount = orngVisFuncts.createStringFromNumber(count)
1015           
1016            for i in range(len(evaluatedAttributes)):
1017                attr1 = self.graph.attribute_name_index[evaluatedAttributes[i]]
1018                for j in range(i):
1019                    attr2 = self.graph.attribute_name_index[evaluatedAttributes[j]]
1020                    for k in range(j):
1021                        attr3 = self.graph.attribute_name_index[evaluatedAttributes[k]]
1022                        self.evaluatedProjectionsCount += 1
1023                        if self.isEvaluationCanceled():
1024                            return self.evaluatedProjectionsCount
1025
1026                        table = self.graph.create_projection_as_example_table_3D([attr1, attr2, attr3])
1027                        if len(table) < self.minNumOfExamples: continue
1028                        accuracy, other_results = self.evaluateProjection(table)
1029                        generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
1030                        self.addResult(accuracy, other_results, len(table),
1031                            [self.graph.data_domain[attr1].name, self.graph.data_domain[attr2].name, self.graph.data_domain[attr3].name],
1032                            self.evaluatedProjectionsCount, generalDict=generalDict)
1033
1034                        if self.__class__ != VizRank:
1035                            self.setStatusBarText("Evaluated %s/%s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount), strCount))
1036                            self.parentWidget.progressBarSet(100.0*self.evaluatedProjectionsCount/max(1,float(count)))
1037
1038        else:
1039            print "unknown visualization method"
1040
1041        return self.evaluatedProjectionsCount
1042
1043    def getProjectionQuality(self, attrList, useAnchorData = 0):
1044        if not self.graph.have_data: return 0.0, None
1045        table = self.graph.create_projection_as_example_table([self.graph.attribute_name_index[attr] for attr in attrList], useAnchorData = useAnchorData)
1046        return self.evaluateProjection(table)
1047
1048
1049    def insertTempProjection(self, projections, acc, attrList):
1050        if len(projections) == 0: return [(acc, attrList)]
1051
1052        top = 0; bottom = len(projections)
1053        while (bottom-top) > 1:
1054            mid  = (bottom + top)/2
1055            if max(acc, projections[mid][0]) == acc: bottom = mid
1056            else: top = mid
1057
1058        if max(acc, projections[top][0]) == acc: projections.insert(top, (acc, attrList))
1059        else:                                    projections.insert(bottom, (acc, attrList))
1060
1061    # ##########################################################################
1062    # FUNCTION FOR OPTIMIZING BEST PROJECTIONS
1063    # ##########################################################################
1064    def optimizeBestProjections(self, restartWhenImproved = 1):
1065        random.seed(0)      # always use the same seed to make results repeatable
1066        count = min(len(self.results), self.locOptProjCount)
1067        if not count: return
1068        self.correctSettingsIfNecessary()
1069        self.optimizedProjectionsCount = 0
1070        """
1071        if self.optimizeTimeLimit == self.optimizeProjectionLimit == 0:
1072            print "Optimization of projections was started without any time or projection restrictions. To prevent an indefinite projection optimization a time limit of 2 hours was set."
1073            self.optimizeProjectionLimit = 2 * 60
1074        """
1075
1076        if self.__class__ != VizRank:
1077            from PyQt4.QtGui import qApp
1078
1079        attrs = [self.results[i][ATTR_LIST] for i in range(count)]                                   # create a list of attributes that are in the top projections
1080        attrs = [[self.graph.attribute_name_index[name] for name in projection] for projection in attrs]    # find indices from the attribute names
1081        accuracys = [self.getProjectionQuality(self.results[i][ATTR_LIST])[0] for i in range(count)]
1082        projections = [(accuracys[i], attrs[i]) for i in range(len(accuracys))]
1083
1084        domain = orange.Domain([orange.FloatVariable("xVar"), orange.FloatVariable("yVar"), orange.EnumVariable(self.graph.data_domain.classVar.name, values = getVariableValuesSorted(self.graph.data_domain.classVar))])
1085        attributes = [self.graph.attribute_name_index[name] for name in self.getEvaluatedAttributes()[:self.locOptAttrsToTry]]
1086        self.startTime = time.time()
1087        lenOfAttributes = len(attributes)
1088        maxFunct = self.getMaxFunct()
1089
1090        if self.visualizationMethod == SCATTERPLOT:
1091            classListFull = self.graph.original_data[self.graph.data_class_index]
1092
1093            tempDict = {}
1094            projIndex = 0
1095            while len(projections) > 0:
1096                (accuracy, projection) = projections.pop(0)
1097                projIndex -= 1
1098
1099                significantImprovement = 0
1100                strTotalAtts = orngVisFuncts.createStringFromNumber(lenOfAttributes)
1101                for (attrIndex, attr) in enumerate(attributes):
1102                    if attr in projection: continue
1103                    testProjections = []
1104                    if not tempDict.has_key((projection[0], attr)) and not tempDict.has_key((attr, projection[0])): testProjections.append([projection[0], attr])
1105                    if not tempDict.has_key((projection[1], attr)) and not tempDict.has_key((attr, projection[1])): testProjections.append([attr, projection[1]])
1106
1107                    for testProj in testProjections:
1108                        table = self.graph.create_projection_as_example_table(testProj, domain = domain)
1109                        if len(table) < self.minNumOfExamples: continue
1110                        acc, other_results = self.evaluateProjection(table)
1111                        if hasattr(self, "setStatusBarText") and self.optimizedProjectionsCount % 10 == 0:
1112                            self.setStatusBarText("Evaluated %s projections. Last accuracy was: %2.2f%%" % (orngVisFuncts.createStringFromNumber(self.optimizedProjectionsCount), acc))
1113                        if acc > accuracy:
1114                            self.addResult(acc, other_results, len(table), [self.graph.attribute_names[i] for i in testProj], projIndex)
1115                            self.insertTempProjection(projections, acc, testProj)
1116                            tempDict[tuple(testProj)] = 1
1117                            if min(acc, accuracy) != 0 and max(acc, accuracy) > 1.005 *min(acc, accuracy):  significantImprovement = 1
1118
1119                        self.optimizedProjectionsCount += 1
1120                        if self.__class__ != VizRank:
1121                            qApp.processEvents()        # allow processing of other events
1122                        if self.optimizedProjectionsCount % 10 == 0 and self.isOptimizationCanceled():
1123                            return self.optimizedProjectionsCount
1124                    if significantImprovement: break
1125
1126        elif self.visualizationMethod == SCATTERPLOT3D:
1127            classListFull = self.graph.original_data[self.graph.data_class_index]
1128
1129            tempDict = {}
1130            projIndex = 0
1131            while len(projections) > 0:
1132                (accuracy, projection) = projections.pop(0)
1133                projIndex -= 1
1134
1135                significantImprovement = 0
1136                strTotalAtts = orngVisFuncts.createStringFromNumber(lenOfAttributes)
1137                for (attrIndex, attr) in enumerate(attributes):
1138                    if attr in projection: continue
1139                    testProjections = []
1140                    if not tempDict.has_key((projection[0], attr)) and not tempDict.has_key((attr, projection[0])): testProjections.append([projection[0], attr])
1141                    if not tempDict.has_key((projection[1], attr)) and not tempDict.has_key((attr, projection[1])): testProjections.append([attr, projection[1]])
1142
1143                    for testProj in testProjections:
1144                        table = self.graph.create_projection_as_example_table_3D(testProj, domain = domain)
1145                        if len(table) < self.minNumOfExamples: continue
1146                        acc, other_results = self.evaluateProjection(table)
1147                        if hasattr(self, "setStatusBarText") and self.optimizedProjectionsCount % 10 == 0:
1148                            self.setStatusBarText("Evaluated %s projections. Last accuracy was: %2.2f%%" % (orngVisFuncts.createStringFromNumber(self.optimizedProjectionsCount), acc))
1149                        if acc > accuracy:
1150                            self.addResult(acc, other_results, len(table), [self.graph.attribute_names[i] for i in testProj], projIndex)
1151                            self.insertTempProjection(projections, acc, testProj)
1152                            tempDict[tuple(testProj)] = 1
1153                            if min(acc, accuracy) != 0 and max(acc, accuracy) > 1.005 *min(acc, accuracy):  significantImprovement = 1
1154
1155                        self.optimizedProjectionsCount += 1
1156                        if self.__class__ != VizRank:
1157                            qApp.processEvents()        # allow processing of other events
1158                        if self.optimizedProjectionsCount % 10 == 0 and self.isOptimizationCanceled():
1159                            return self.optimizedProjectionsCount
1160                    if significantImprovement: break
1161
1162        # #################### RADVIZ, LINEAR_PROJECTION  ################################
1163        elif self.visualizationMethod in (RADVIZ, LINEAR_PROJECTION, POLYVIZ):
1164            numClasses = len(self.graph.data_domain.classVar.values)
1165
1166            classListFull = self.graph.original_data[self.graph.data_class_index]
1167            newProjDict = {}
1168            projIndex = 0
1169
1170            while len(projections) > 0:
1171                (accuracy, projection) = projections.pop(0)
1172                projIndex -= 1
1173
1174                # first try to use the attributes in the projection and evaluate only different permutations of these attributes
1175                if self.locOptOptimizeProjectionByPermutingAttributes == 1 and self.projOptimizationMethod == 0:
1176                    bestProjection = projection; tempProjection = projection
1177                    bestAccuracy = accuracy; tempAccuracy = accuracy
1178                    triedPermutationsDict = {}
1179                    failedConsecutiveTries = 0
1180                    tries = 0
1181                    XAnchors = self.graph.create_xanchors(len(projection))
1182                    YAnchors = self.graph.create_yanchors(len(projection))
1183                    validData = self.graph.get_valid_list(projection)
1184                    classList = numpy.compress(validData, classListFull)
1185                    while failedConsecutiveTries < 5 and tries < 50:
1186                        #newProj = orngVisFuncts.switchTwoElements(tempProjection, nrOfTimes = 3)
1187                        newProj = orngVisFuncts.switchTwoElementsInGroups(tempProjection, numClasses, 3)
1188                        tries += 1
1189                        if triedPermutationsDict.has_key(str(newProj)):
1190                            failedConsecutiveTries += 1
1191                        else:
1192                            failedConsecutiveTries = 0
1193                            triedPermutationsDict[str(newProj)] = 1
1194
1195                            table = self.graph.create_projection_as_example_table(newProj, validData = validData, classList = classList, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain)
1196                            if len(table) < self.minNumOfExamples: continue
1197                            acc, other_results = self.evaluateProjection(table)
1198                            self.optimizedProjectionsCount += 1
1199                            if self.__class__ != VizRank:
1200                                qApp.processEvents()        # allow processing of other events
1201                            if self.isOptimizationCanceled(): return self.optimizedProjectionsCount
1202                            if hasattr(self, "setStatusBarText") and self.optimizedProjectionsCount % 10 == 0:
1203                                self.setStatusBarText("Evaluated %s projections. Last accuracy was: %2.2f%%" % (orngVisFuncts.createStringFromNumber(self.optimizedProjectionsCount), acc))
1204                            if acc > bestAccuracy:
1205                                bestAccuracy = acc
1206                                bestProjection = newProj
1207                                #self.addResult(acc, other_results, len(table), [self.graph.attribute_names[i] for i in newProj], -1, {})
1208                            if acc > tempAccuracy or acc > 0.99 * tempAccuracy:
1209                                tempProjection = newProj
1210                                tempAccuracy = acc
1211                    projection = bestProjection
1212                    accuracy = bestAccuracy
1213
1214                # take best projection and try to replace one of the attributes with a new attribute
1215                # when you can't further improve projections this way try adding a new attribute to the projection
1216                # in the first step try to find a better projection by substituting an existent attribute with a new one
1217                # in the second step try to find a better projection by adding a new attribute to the circle
1218                significantImprovement = 0
1219                for iteration in range(2):
1220                    if iteration == 1 and not self.locOptAllowAddingAttributes: continue    # if we are not allowed to increase the number of visualized attributes
1221                    if (len(projection) + iteration > self.locOptMaxAttrsInProj): continue
1222                    strTotalAtts = orngVisFuncts.createStringFromNumber(lenOfAttributes)
1223                    for (attrIndex, attr) in enumerate(attributes):
1224                        if attr in projection: continue
1225                        if significantImprovement and restartWhenImproved: break        # if we found a projection that is significantly better than the currently best projection then restart the search with this projection
1226                        tempList = []
1227
1228                        # SPCA, PLS
1229                        if self.projOptimizationMethod != 0:
1230                            if iteration == 0:  # replace one attribute in each projection with attribute attr
1231                                testProjections = [copy(projection) for i in range(len(projection))]
1232                                for i in range(len(testProjections)): testProjections[i][len(projection)-1-i] = attr
1233                            elif iteration == 1: testProjections = [projection + [attr]]
1234
1235                            for proj in testProjections:
1236                                proj.sort()
1237                                if newProjDict.has_key(str(proj)): continue
1238                                newProjDict[str(proj)] = 1
1239                                xanchors, yanchors, (attrNames, newIndices) = self.freeviz.findProjection(self.projOptimizationMethod, proj, set_anchors = 0, percentDataUsed = self.percentDataUsed)
1240                                table = self.graph.create_projection_as_example_table(newIndices, domain = domain, XAnchors = xanchors, YAnchors = yanchors)
1241                                if len(table) < self.minNumOfExamples: continue
1242                                self.optimizedProjectionsCount += 1
1243                                acc, other_results = self.evaluateProjection(table)
1244
1245                                tempList.append((acc, other_results, len(table), newIndices, {"XAnchors": xanchors, "YAnchors": yanchors}))
1246                                if self.storeEachPermutation:
1247                                    self.addResult(acc, other_results, len(table), attrNames, projIndex, generalDict = {"XAnchors": xanchors, "YAnchors": yanchors})
1248
1249                                if self.__class__ != VizRank:
1250                                    qApp.processEvents()        # allow processing of other events
1251                                if self.isOptimizationCanceled(): return self.optimizedProjectionsCount
1252
1253                        # ordinary radviz projections
1254                        else:
1255                            testProjections = [copy(projection) for i in range(len(projection))]
1256                            if iteration == 0:  # replace one attribute in each projection with attribute attr
1257                                count = len(projection)
1258                                for i in range(count): testProjections[i][i] = attr
1259                            elif iteration == 1:
1260                                count = len(projection) + 1
1261                                for i in range(count-1): testProjections[i].insert(i, attr)
1262
1263                            XAnchors = self.graph.create_xanchors(count)
1264                            YAnchors = self.graph.create_yanchors(count)
1265                            validData = self.graph.get_valid_list(testProjections[0])
1266                            classList = numpy.compress(validData, classListFull)
1267
1268                            for testProj in testProjections:
1269                                if newProjDict.has_key(str(testProj)): continue
1270                                newProjDict[str(testProj)] = 1
1271
1272                                table = self.graph.create_projection_as_example_table(testProj, validData = validData, classList = classList, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain)
1273                                if len(table) < self.minNumOfExamples: continue
1274                                acc, other_results = self.evaluateProjection(table)
1275                                if hasattr(self, "setStatusBarText") and self.optimizedProjectionsCount % 10 == 0: self.setStatusBarText("Evaluated %s projections. Last accuracy was: %2.2f%%" % (orngVisFuncts.createStringFromNumber(self.optimizedProjectionsCount), acc))
1276                                if acc > accuracy:
1277                                    tempList.append((acc, other_results, len(table), testProj, {}))
1278                                if self.storeEachPermutation:
1279                                    self.addResult(acc, other_results, len(table), [self.graph.attribute_names[i] for i in testProj], projIndex, {})
1280
1281                                self.optimizedProjectionsCount += 1
1282                                if self.__class__ != VizRank:
1283                                    qApp.processEvents()        # allow processing of other events
1284                                if self.isOptimizationCanceled(): return self.optimizedProjectionsCount
1285
1286                        # return only the best attribute placements
1287                        if len(tempList) == 0: continue     # can happen if the newProjDict already had all the projections that we tried
1288                        (acc, other_results, lenTable, attrList, generalDict) = maxFunct(tempList)
1289                        if acc > 1.005*accuracy:
1290                            self.insertTempProjection(projections, acc, attrList)
1291                            self.addResult(acc, other_results, lenTable, [self.graph.attribute_names[i] for i in attrList], projIndex , generalDict)
1292                            if hasattr(self, "setStatusBarText"): self.setStatusBarText("Found a better projection with accuracy: %2.2f%%" % (acc))
1293                        if accuracy != 0 and acc > 1.01 * accuracy:  significantImprovement = 1
1294
1295        else:
1296            print "unknown visualization method"
1297
1298        return self.optimizedProjectionsCount
1299
1300    # ##############################################################
1301    # Loading and saving projection files
1302    # ##############################################################
1303
1304    # save the list into a file - filename can be set if you want to call this function without showing the dialog
1305    def save(self, name, results = None, count = 1000):
1306        # take care of extension
1307        if os.path.splitext(name)[1].lower() != ".proj": name = name + ".proj"
1308
1309        if not results: results = self.results
1310        self.abortCurrentOperation = 0
1311
1312        dirName, shortFileName = os.path.split(name)
1313        self.lastSaveDirName = dirName
1314
1315        # open, write and save file
1316        file = open(name, "wt")
1317
1318        attrs = ["kValue", "percentDataUsed", "qualityMeasure", "testingMethod", "parentName", "evaluationAlgorithm", "useExampleWeighting", "projOptimizationMethod", "attrSubsetSelection", "optimizationType", "attributeCount", "attrDisc", "attrCont", "timeLimit", "projectionLimit"]
1319        dict = {}
1320        for attr in attrs: dict[attr] = self.__dict__.get(attr)
1321        dict["dataCheckSum"] = self.graph.raw_data.checksum()
1322        dict["totalProjectionsEvaluated"] = self.evaluatedProjectionsCount + self.optimizedProjectionsCount  # let's also save the total number of projections that we evaluated in order to get this list
1323
1324        file.write("%s\n%s\n" % (str(dict), str(self.selectedClasses)))
1325
1326        i=0
1327        for i in range(len(results)):
1328            if i >= count: break
1329
1330            (acc, other_results, lenTable, attrList, tryIndex, generalDict) = results[i]
1331
1332            s = "(%.3f, (" % (acc)
1333            for val in other_results:
1334                if type(val) == float: s += "%.3f ," % val
1335                elif type(val) == list:
1336                    s += "["
1337                    for el in val:
1338                        if type(el) == float: s += "%.3f, " % (el)
1339                        elif type(el) == int: s += "%d, " % (el)
1340                        else: s += "%s, " % str(el)
1341                    if s[-2] == ",": s = s[:-2]
1342                    s += "], "
1343            if s[-2] == ",": s = s[:-2]
1344            s += "), %d, %s, %d, %s)" % (lenTable, str(attrList), tryIndex, str(generalDict).replace("\n     ", "")) # be sure to remove \n in XAnchors and YAnchors otherwise load doesn't work
1345            file.write(s + "\n")
1346
1347            if self.abortCurrentOperation: break
1348            if hasattr(self, "setStatusBarText"):
1349                self.setStatusBarText("Saved %s projections" % (orngVisFuncts.createStringFromNumber(i)))
1350
1351        file.flush()
1352        file.close()
1353        self.abortCurrentOperation = 0
1354        return i
1355
1356    # load projections from a file
1357    def load(self, name, ignoreCheckSum = 1, maxCount = -1):
1358        self.clearResults()
1359        self.clearArguments()
1360        self.abortCurrentOperation = 0
1361
1362        file = open(name, "rt")
1363        settings = eval(file.readline()[:-1])
1364        if settings.get("parentName", "").lower() != self.parentName.lower():
1365            if self.__class__ != VizRank:
1366                QMessageBox.critical( self, "Optimization Dialog", 'Unable to load projection file. It was saved for %s method'%(settings["parentName"]), QMessageBox.Ok)
1367            else:
1368                print 'Unable to load projection file. It was saved for %s method' % (settings["parentName"])
1369            file.close()
1370            return [], 0
1371
1372        if settings.has_key("dataCheckSum") and settings["dataCheckSum"] != self.graph.raw_data.checksum():
1373            if not ignoreCheckSum and self.__class__.__name__ == "OWVizRank":
1374                if QMessageBox.information(self, 'VizRank', 'The current data set has a different checksum than the data set that was used to evaluate projections in this file.\nDo you want to continue loading anyway, or cancel?','Continue','Cancel', '', 0,1):
1375                    file.close()
1376                    return [], 0
1377            else:
1378                print "The data set has a different checksum than the data set that was used in projection evaluation. Projection might be invalid but the file will be loaded anyway..."
1379
1380        for key in settings.keys():
1381            setattr(self, key, settings[key])
1382
1383        # find if it was computed for specific class values
1384        selectedClasses = eval(file.readline()[:-1])
1385
1386        if self.__class__ != VizRank:
1387            from PyQt4.QtGui import qApp
1388
1389        count = 0
1390        for line in file.xreadlines():
1391            (acc, other_results, lenTable, attrList, tryIndex, generalDict) = eval(line)
1392            VizRank.insertItem(self, count, acc, other_results, lenTable, attrList, tryIndex, generalDict)
1393            count+=1
1394            if maxCount != -1 and count >= maxCount: break
1395            if self.abortCurrentOperation: break
1396            if count % 100 == 0 and hasattr(self, "setStatusBarText"):
1397                self.setStatusBarText("Loaded %s projections" % (orngVisFuncts.createStringFromNumber(count)))
1398                qApp.processEvents()        # allow processing of other events
1399        file.close()
1400
1401        self.abortCurrentOperation = 0
1402
1403        # update loaded results
1404        return selectedClasses, count
1405
1406    # remove results that have tryIndex > topProjectionIndex
1407    def reduceResults(self, topProjectionIndex):
1408        results = self.results
1409        self.clearResults()
1410        i=0
1411        for (accuracy, other_results, lenTable, attrList, tryIndex, generalDict) in results:
1412            if tryIndex <= topProjectionIndex:
1413                self.insertItem(i, accuracy, other_results, lenTable, attrList, tryIndex, generalDict)
1414                i += 1
1415
1416
1417# ###############################################################################################################################################
1418# ######           VIZRANK OUTLIERS            ##############################################################################################
1419# ###############################################################################################################################################
1420class VizRankOutliers:
1421    def __init__(self, vizrank, dialogType):
1422        self.vizrank = vizrank
1423        self.dialogType = dialogType
1424
1425        self.data = None
1426        self.results = None
1427
1428        self.projectionIndices = []
1429        self.matrixOfPredictions = None
1430        self.graphMatrix = None
1431        self.evaluatedExamples = []
1432        self.projectionCount = 20
1433
1434        if self.dialogType == VIZRANK_POINT:
1435            self.ATTR_LIST = ATTR_LIST
1436            self.ACCURACY = ACCURACY
1437        elif self.dialogType == VIZRANK_MOSAIC:
1438            import orngMosaic
1439            self.ATTR_LIST = orngMosaic.ATTR_LIST
1440            self.ACCURACY = orngMosaic.SCORE
1441
1442
1443    def setResults(self, data, results):
1444        self.data = data
1445        self.results = results
1446        self.matrixOfPredictions = None
1447
1448
1449    def evaluateProjections(self, qApp = None):
1450        if self.dialogType == VIZRANK_POINT:
1451            graph = self.vizrank.graph
1452
1453        if not self.results or not self.data: return
1454
1455        projCount = min(int(self.projectionCount), len(self.results))
1456        classCount = max(len(self.data.domain.classVar.values), 1)
1457        existing = 0
1458        if self.matrixOfPredictions != None:
1459            existing = numpy.shape(self.matrixOfPredictions)[0]/classCount
1460            if existing < projCount:
1461                self.matrixOfPredictions = numpy.resize(self.matrixOfPredictions, (projCount*classCount, len(self.data)))
1462            elif existing > projCount:
1463                self.matrixOfPredictions = self.matrixOfPredictions[0:classCount*projCount,:]
1464        else:
1465            self.matrixOfPredictions = -100 * numpy.ones((projCount*classCount, len(self.data)), numpy.float)
1466
1467        # compute the matrix of predictions
1468        results = self.results[existing:min(len(self.results), projCount)]
1469        index = 0
1470        for result in results:
1471            if self.dialogType == VIZRANK_POINT:
1472                acc, other, tableLen, attrList, tryIndex, generalDict = result
1473                attrIndices = [graph.attribute_name_index[attr] for attr in attrList]
1474                validDataIndices = graph.get_valid_indices(attrIndices)
1475                table = graph.create_projection_as_example_table(attrIndices, settingsDict = generalDict)    # TO DO: this does not work with polyviz!!!
1476                acc, probabilities = self.vizrank.kNNClassifyData(table)
1477
1478            elif self.dialogType == VIZRANK_MOSAIC:
1479                from orngCI import FeatureByCartesianProduct
1480                acc, attrList, tryIndex, other = result
1481                probabilities = numpy.zeros((len(self.data), len(self.data.domain.classVar.values)), numpy.float)
1482                newFeature, quality = FeatureByCartesianProduct(self.data, attrList)
1483                dist = orange.ContingencyAttrClass(newFeature, self.data)
1484                data = self.data.select([newFeature, self.data.classVar])     # create a dataset that has only this new feature and class info
1485                clsVals = len(self.data.domain.classVar.values)
1486                validDataIndices = range(len(data))
1487                for i, ex in enumerate(data):
1488                    try:
1489                        prob = dist[ex[0]]
1490                        for j in range(clsVals):
1491                            probabilities[i][j] = prob[j] / max(1, float(sum(prob.values())))
1492                    except:
1493                        validDataIndices.remove(i)
1494
1495            #self.matrixOfPredictions[(existing + index)*classCount:(existing + index +1)*classCount] = numpy.transpose(probabilities)
1496            probabilities = numpy.transpose(probabilities)
1497            for i in range(classCount):
1498                numpy.put(self.matrixOfPredictions[(existing + index)*classCount + i], validDataIndices, probabilities[i])
1499
1500            index += 1
1501            if hasattr(self, "setStatusBarText"):
1502                self.setStatusBarText("Evaluated %s/%s projections..." % (orngVisFuncts.createStringFromNumber(existing + index), orngVisFuncts.createStringFromNumber(projCount)))
1503                self.widget.progressBarSet(100.0*(index)/max(1, float(projCount-existing)))
1504            if qApp:
1505                qApp.processEvents()
1506
1507        # generate a sorted list of (probability, exampleIndex, classDistribution)
1508        projCount = min(int(self.projectionCount), len(self.results))
1509        self.evaluatedExamples = []
1510        for exIndex in range(len(self.data)):
1511            matrix = numpy.transpose(numpy.reshape(self.matrixOfPredictions[:, exIndex], (projCount, classCount)))
1512            valid = numpy.where(matrix[int(self.data[exIndex].getclass())] != -100, 1, 0)
1513            data = numpy.compress(valid, matrix[int(self.data[exIndex].getclass())])
1514            if len(data): aveAcc = numpy.sum(data) / float(len(data))
1515            else:         aveAcc = 0
1516            classPredictions = []
1517            for ind, val in enumerate(self.data.domain.classVar.values):
1518                data = numpy.compress(valid, matrix[ind])
1519                if len(data): acc = numpy.sum(data) / float(len(data))
1520                else:         acc = 0
1521                classPredictions.append((acc, val))
1522            self.evaluatedExamples.append((aveAcc, exIndex, classPredictions))
1523        self.evaluatedExamples.sort()
1524
1525    # take the self.evaluatedExamples list and find examples where probability of the "correct" class is lower than probability of some other class
1526    # change class value of such examples to class value that has the highest probability
1527    def changeClassToMostProbable(self):
1528        if not self.data or not self.evaluatedExamples or len(self.evaluatedExamples) != len(self.data):
1529            print "no data or outliers not found yet. Run evaluateProjections() first."
1530            return
1531
1532        correctedData = orange.ExampleTable(self.data)
1533        for (aveAcc, exInd, classPredictions) in self.evaluatedExamples:
1534            (acc, clsVal) = max(classPredictions)
1535            correctedData[exInd].setclass(clsVal)
1536        return correctedData
1537
1538
1539# ###############################################################################################################################################
1540# ######       VIZRANK LEARNERS, CLASSIFIERS       ##############################################################################################
1541# ###############################################################################################################################################
1542
1543# class that represents kNN classifier that classifies examples based on top evaluated projections
1544class VizRankClassifier(orange.Classifier):
1545    def __init__(self, vizrank, data):
1546        self.VizRank = vizrank
1547
1548        if self.VizRank.__class__.__name__ == "OWVizRank":
1549            self.VizRank.parentWidget.setData(data)
1550            self.VizRank.parentWidget.handleNewSignals()
1551            self.VizRank.timeLimit = self.VizRank.evaluationTime
1552            if self.VizRank.optimizeBestProjection:
1553                self.VizRank.optimizeTimeLimit = self.VizRank.optimizeBestProjectionTime
1554            else:
1555                self.VizRank.optimizeTimeLimit = 0
1556        else:
1557            self.VizRank.setData(data)
1558
1559        self.VizRank.evaluateProjections()
1560
1561        # do we want to optimize current projection. if yes then spend the same amount of time to optimize it
1562        if self.VizRank.optimizeTimeLimit > 0 or self.VizRank.optimizeProjectionLimit:
1563            self.VizRank.optimizeBestProjections()
1564            self.VizRank.removeTooSimilarProjections()
1565
1566        #if self.VizRank.__class__.__name__ == "OWVizRank": del self.VizRank.useTimeLimit
1567
1568
1569    # for a given example run argumentation and find out to which class it most often fall
1570    def __call__(self, example, returnType = orange.GetBoth):
1571        if self.VizRank.__class__.__name__ == "OWVizRank":
1572            table = orange.ExampleTable(example.domain)
1573            table.append(example)
1574            self.VizRank.parentWidget.setSubsetData(table)       # show the example is we use the widget
1575            self.VizRank.parentWidget.handleNewSignals()
1576            classVal, dist = self.VizRank.findArguments(example, 0, 0)
1577        else:
1578            classVal, dist = self.VizRank.findArguments(example)
1579
1580        if returnType == orange.GetBoth: return classVal, dist
1581        else:                            return classVal
1582
1583
1584# #############################################################################
1585# learner that builds VizRankClassifier
1586class VizRankLearner(orange.Learner):
1587    def __init__(self, visualizationMethod = SCATTERPLOT, vizrank = None, graph = None):
1588        if not vizrank:
1589            vizrank = VizRank(visualizationMethod, graph)
1590        self.VizRank = vizrank
1591        self.name = self.VizRank.learnerName
1592
1593
1594    def __call__(self, examples, weightID = 0):
1595        return VizRankClassifier(self.VizRank, examples)
1596
1597
1598
1599#test widget
1600if __name__=="__main__":
1601    data = orange.ExampleTable(r"E:\Development\Python23\Lib\site-packages\Orange\Datasets\UCI\wine.tab")
1602    #data = orange.ExampleTable(r"E:\Development\Python23\Lib\site-packages\Orange\Datasets\microarray\cancer\leukemia.tab")
1603    """
1604    vizrank = VizRank(LINEAR_PROJECTION)
1605    vizrank.setData(data)
1606    vizrank.optimizationType = EXACT_NUMBER_OF_ATTRS    # MAXIMUM_NUMBER_OF_ATTRS,  EXACT_NUMBER_OF_ATTRS
1607    vizrank.attributeCount = 10
1608    vizrank.attrCont = CONT_MEAS_S2NMIX
1609    vizrank.projOptimizationMethod = 0
1610    vizrank.useExampleWeighting = 0
1611    vizrank.attrSubsetSelection = GAMMA_SINGLE
1612    vizrank.timeLimit = 1
1613    vizrank.evaluateProjections()
1614    """
1615    data = orange.ExampleTable(r"E:\Development\Python23\Lib\site-packages\Orange\datasets\Imatch\irski podatki\merged\merged-all.tab")
1616    vizrank = VizRank(RADVIZ)
1617    vizrank.setData(data)
1618    vizrank.attributeCount = 6
1619    vizrank.optimizationType = MAXIMUM_NUMBER_OF_ATTRS    # MAXIMUM_NUMBER_OF_ATTRS,  EXACT_NUMBER_OF_ATTRS
1620    #vizrank.attrSubsetSelection = GAMMA_SINGLE
1621    vizrank.attrSubsetSelection = DETERMINISTIC_ALL
1622
1623    #vizrank.attrCont = CONT_MEAS_S2N
1624    vizrank.attrCont = CONT_MEAS_S2NMIX
1625
1626    #vizrank.storeEachPermutation = 1
1627    #vizrank.load(r"E:\Development\Python23\Lib\site-packages\Orange\Datasets\microarray\cancer\leukemia - Radviz - test.proj")
1628    #vizrank.computeVizRanksAccuracy()
1629    vizrank.timeLimit = 10
1630    vizrank.evaluateProjections()
1631    #vizrank.findArguments(data[0])
1632
Note: See TracBrowser for help on using the repository browser.