source: orange/orange/orngVizRank.py @ 7058:2abc06c9e8e0

Revision 7058:2abc06c9e8e0, 80.8 KB checked in by miha <miha.stajdohar@…>, 3 years ago (diff)
Line 
1import orange, sys, random, statc
2import orngVisFuncts, orngTest, orngStat
3from math import sqrt
4import os, operator
5from math import sqrt
6import numpy, time
7from copy import copy, deepcopy
8from orngLinProj import FreeViz
9from orngScaleData import getVariableValuesSorted
10
11# used for outlier detection
12VIZRANK_POINT = 0
13CLUSTER_POINT = 1
14VIZRANK_MOSAIC = 2
15
16# quality measure
17CLASS_ACCURACY = 0
18AVERAGE_CORRECT = 1
19BRIER_SCORE = 2
20AUC = 3
21measuresDict = {CLASS_ACCURACY: "Classification accuracy", AVERAGE_CORRECT: "Average probability of correct classification",
22                BRIER_SCORE: "Brier score", AUC: "Area under curve (AUC)"}
23
24# testing method
25LEAVE_ONE_OUT = 0
26TEN_FOLD_CROSS_VALIDATION = 1
27TEST_ON_LEARNING_SET = 2
28
29# results in the list
30ACCURACY = 0
31OTHER_RESULTS = 1
32LEN_TABLE = 2
33ATTR_LIST = 3
34TRY_INDEX = 4
35GENERAL_DICT = 5
36
37OTHER_ACCURACY = 0
38OTHER_PREDICTIONS = 1
39OTHER_DISTRIBUTION = 2
40
41# evaluation algorithm
42ALGORITHM_KNN = 0
43ALGORITHM_HEURISTIC = 1
44
45NUMBER_OF_INTERVALS = 6  # number of intervals to use when discretizing. used when using the very fast heuristic
46
47# attrCont
48CONT_MEAS_NONE = 0
49CONT_MEAS_RELIEFF = 1
50CONT_MEAS_S2N = 2
51CONT_MEAS_S2NMIX = 3
52
53# attrDisc
54DISC_MEAS_NONE = 0
55DISC_MEAS_RELIEFF = 1
56DISC_MEAS_GAIN = 2
57DISC_MEAS_GINI = 3
58
59DETERMINISTIC_ALL = 0
60GAMMA_ALL = 1
61GAMMA_SINGLE = 2
62
63PROJOPT_NONE = 0
64PROJOPT_SPCA = 1
65PROJOPT_PLS = 2
66
67contMeasuresDiscClass = [("None", None), ("ReliefF", orange.MeasureAttribute_relief(k=10, m=50)),
68                ("Signal to Noise Ratio", orngVisFuncts.S2NMeasure()), ("Signal to Noise OVA", orngVisFuncts.S2NMeasureMix())]
69
70discMeasuresDiscClass = [("None", None), ("ReliefF", orange.MeasureAttribute_relief(k=10, m=50)),
71                ("Gain ratio", orange.MeasureAttribute_gainRatio()), ("Gini index", orange.MeasureAttribute_gini())]
72
73contMeasuresNoClass = [("None", None)]
74discMeasuresNoClass = [("None", None)]
75
76contMeasuresContClass = [("None", None)]
77discMeasuresContClass = [("None", None)]
78
79
80# array of testing methods. used by calling python's apply method depending on the value of self.testingMethod
81testingMethods = [orngTest.leaveOneOut, orngTest.crossValidation, orngTest.learnAndTestOnLearnData]
82
83# visualization methods
84SCATTERPLOT = 1
85RADVIZ = 2
86LINEAR_PROJECTION = 3
87POLYVIZ = 4
88KNN_IN_ORIGINAL_SPACE = 10
89
90# optimization type
91EXACT_NUMBER_OF_ATTRS = 0
92MAXIMUM_NUMBER_OF_ATTRS = 1
93
94class VizRank:
95    def __init__(self, visualizationMethod, graph = None):
96        if not graph:
97            if visualizationMethod == SCATTERPLOT:
98                import orngScaleScatterPlotData
99                graph = orngScaleScatterPlotData.orngScaleScatterPlotData()
100            elif visualizationMethod == RADVIZ:
101                import orngScaleLinProjData
102                graph = orngScaleLinProjData.orngScaleLinProjData()
103                graph.normalizeExamples = 1
104            elif visualizationMethod in [LINEAR_PROJECTION, KNN_IN_ORIGINAL_SPACE]:
105                import orngScaleLinProjData
106                graph = orngScaleLinProjData.orngScaleLinProjData()
107                graph.normalizeExamples = 0
108            elif visualizationMethod == POLYVIZ:
109                import orngScalePolyvizData
110                graph = orngScalePolyvizData.orngScalePolyvizData()
111                graph.normalizeExamples = 1
112            else:
113                print "an invalid visualization method was specified. VizRank can not run."
114                return
115
116        random.seed(0)      # always use the same seed to make results repeatable
117        self.graph = graph
118        self.freeviz = FreeViz(graph)
119        self.visualizationMethod = visualizationMethod
120
121        self.results = []
122        self.arguments = []                                 # a list of arguments
123
124        self.kValue = 10
125        self.percentDataUsed = 100
126        self.qualityMeasure = AVERAGE_CORRECT
127        self.qualityMeasureCluster = 0      ### TO DO: fix it
128        self.qualityMeasureContClass = 0    ### TO DO: fix it
129        self.testingMethod = TEN_FOLD_CROSS_VALIDATION
130        self.optimizationType = MAXIMUM_NUMBER_OF_ATTRS
131        self.attributeCount = 4
132        self.evaluationAlgorithm = ALGORITHM_KNN
133        self.attrCont = CONT_MEAS_RELIEFF
134        self.attrDisc = DISC_MEAS_RELIEFF
135        self.attrContNoClass = 0
136        self.attrDiscNoClass = 0
137        self.attrDiscContClass = 0
138        self.attrContContClass = 0
139       
140        self.attrSubsetSelection = GAMMA_ALL                # how do we find attribute subsets to evaluate - deterministic according to attribute ranking score or using gamma distribution - if using gamma, do we want to evaluate all possible permutations of attributes or only one
141        self.projOptimizationMethod = PROJOPT_NONE          # None, supervisedPCA, partial least square
142        self.useExampleWeighting = 0                        # weight examples, so that the class that has a low number of examples will have higher weights
143        self.evaluationData = {}
144        self.evaluationData["triedCombinations"] = {}
145
146        self.externalLearner = None                         # do we use knn or some external learner
147        self.selectedClasses = []                           # which classes are we trying to separate
148        self.learnerName = "VizRank Learner"
149        #self.onlyOnePerSubset = 1                           # save only the best placement of attributes in radviz
150        self.maxResultListLen = 100000                      # number of projections to store in a list
151        self.abortCurrentOperation = 0
152        self.minNumOfExamples = 0                           # if a dataset has less than this number of examples we don't consider that projection
153
154        # when to stop evaluation. when first criterion holds, evaluation stops
155        self.timeLimit = 0              # if greater than 0 then this is the number of minutes that VizRank will use to evaluate projections
156        self.projectionLimit = 0        # if greater than 0 then this is the number of projections that will be evaluated with VizRank
157        self.evaluatedProjectionsCount = 0
158
159        # when to stop local optimization?
160        self.optimizeTimeLimit = 0
161        self.optimizeProjectionLimit = 0
162        self.optimizedProjectionsCount = 0
163
164        if visualizationMethod == SCATTERPLOT: self.parentName = "Scatterplot"
165        elif visualizationMethod == RADVIZ:    self.parentName = "Radviz"
166        elif visualizationMethod == LINEAR_PROJECTION:  self.parentName = "Linear Projection"
167        elif visualizationMethod == POLYVIZ:            self.parentName = "Polyviz"
168
169        self.argumentCount = 1              # number of arguments used when classifying
170        #self.argumentValueFormula = 1       # how to compute argument value
171
172        self.locOptOptimizeProjectionByPermutingAttributes = 1      # try to improve projection by switching pairs of attributes in a projection
173        self.locOptAllowAddingAttributes = 0                        # do we allow increasing the number of visualized attributes
174        self.locOptMaxAttrsInProj = 20                              # if self.locOptAllowAddingAttributes == 1 then what is the maximum number of attributes in a projection
175        self.locOptAttrsToTry = 50                                 # number of best ranked attributes to try
176        self.locOptProjCount = 20                                   # try to locally optimize this number of best ranked projections
177
178        self.rankArgumentsByStrength = 0  # how do you want to compute arguments. if 0 then we go through the top ranked projection and classify. If 1 we rerank projections to projections with strong class prediction and use them for classification
179        self.storeEachPermutation = 0       # do we want to save information for each fold when evaluating projection - used to compute VizRank's accuracy
180
181        # 0 - set to sqrt(N)
182        # 1 - set to N / c
183        self.kValueFormula = 1
184        self.autoSetTheKValue = 1       # automatically set the value k
185       
186        self.saveEvaluationResults = 0
187        self.evaluationResults = {}
188
189
190    def clearResults(self):
191        self.results = []
192        self.evaluationResults = {}
193        self.evaluationData = {}    # clear all previous data about tested permutations and stuff
194        self.evaluationData["triedCombinations"] = {}
195
196    def clearArguments(self):
197        self.arguments = []
198
199    def removeTooSimilarProjections(self, allowedPercentOfEqualAttributes = 70):
200        i=0
201        while i < len(self.results):
202            if self.results[i][TRY_INDEX] != -1 and self.existsABetterSimilarProjection(i, allowedPercentOfEqualAttributes):
203                self.results.pop(i)
204            else:
205                i += 1
206
207    # test if one of the projections in self.results[0:index] are similar to the self.results[index] projection
208    def existsABetterSimilarProjection(self, index, allowedPercentOfEqualAttributes = 70):
209        testAttrs = self.results[index][ATTR_LIST]
210        for i in range(index):
211            attrs = self.results[i][ATTR_LIST]
212            equalAttrs = [attr in attrs for attr in testAttrs]
213            if 100*sum(equalAttrs) > allowedPercentOfEqualAttributes * float(len(testAttrs)):
214                return 1
215        return 0
216
217    def getkValue(self, kValueFormula = -1):
218        if not self.graph.haveData: return 1
219        if kValueFormula == -1:
220            kValueFormula = self.kValueFormula
221        if kValueFormula == 0 or not self.graph.dataHasDiscreteClass or self.graph.dataHasContinuousClass:
222            kValue = int(sqrt(len(self.graph.rawData)))
223        else:
224            kValue = int(len(self.graph.rawData) / max(1, len(self.graph.dataDomain.classVar.values)))    # k = N / c (c = # of class values)
225        return kValue
226
227    def createkNNLearner(self, k = -1, kValueFormula = -1):
228        if k == -1:
229            if kValueFormula == -1 or not self.graph.haveData or len(self.graph.rawData) == 0:
230                kValue = self.kValue
231            else:
232                kValue = self.getkValue(kValueFormula)
233
234            if self.percentDataUsed != 100:
235                kValue = int(kValue * self.percentDataUsed / 100.0)
236        else:
237            kValue = k
238
239        return orange.kNNLearner(k = kValue, rankWeight = 0, distanceConstructor = orange.ExamplesDistanceConstructor_Euclidean(normalize=0))
240
241
242    def setData(self, data):
243        self.clearResults()
244        self.selectedClasses = []
245        if self.__class__ == VizRank:
246            self.graph.setData(data, self.graph.rawSubsetData)
247
248        if not self.graph.dataHasDiscreteClass:
249            return
250
251        self.selectedClasses = range(len(self.graph.dataDomain.classVar.values))
252
253        if self.autoSetTheKValue:
254            self.kValue = self.getkValue(self.kValueFormula)
255
256        self.correctSettingsIfNecessary()
257
258    # save subsetdata. first example from this dataset can be used with argumentation - it can find arguments for classifying the example to the possible class values
259    def setSubsetData(self, subData):
260        if self.__class__ == VizRank:
261            self.graph.setData(self.graph.rawData, subData)
262        self.clearArguments()
263
264    def getEvaluatedAttributes(self):       
265        if self.graph.dataHasDiscreteClass:
266            return orngVisFuncts.evaluateAttributesDiscClass(self.graph.rawData, contMeasuresDiscClass[self.attrCont][1], discMeasuresDiscClass[self.attrDisc][1])
267        elif self.graph.dataHasContinuousClass:
268            return orngVisFuncts.evaluateAttributesContClass(self.graph.rawData, contMeasuresContClass[self.attrContContClass][1], discMeasuresContClass[self.attrDiscContClass][1])
269        else:
270            return orngVisFuncts.evaluateAttributesNoClass(self.graph.rawData, contMeasuresNoClass[self.attrContNoClass][1], discMeasuresNoClass[self.attrDiscNoClass][1])
271       
272
273    # return a function that is appropriate to find the best projection in a list in respect to the selected quality measure
274    def getMaxFunct(self):
275        if self.graph.dataHasDiscreteClass and self.qualityMeasure == BRIER_SCORE: return min
276        else: return max
277
278    def addResult(self, accuracy, other_results, lenTable, attrList, tryIndex, generalDict = {}, results=None):
279        self.insertItem(self.findTargetIndex(accuracy), accuracy, other_results, lenTable, attrList, tryIndex, generalDict)
280
281    # use bisection to find correct index
282    def findTargetIndex(self, accuracy):
283        funct = self.getMaxFunct()
284        top = 0; bottom = len(self.results)
285
286        while (bottom-top) > 1:
287            mid  = (bottom + top)/2
288            if funct(accuracy, self.results[mid][ACCURACY]) == accuracy: bottom = mid
289            else: top = mid
290
291        if len(self.results) == 0: return 0
292        if funct(accuracy, self.results[top][ACCURACY]) == accuracy:
293            return top
294        else:
295            return bottom
296
297    # insert new result - give parameters: accuracy of projection, number of examples in projection and list of attributes.
298    def insertItem(self, index, accuracy, other_results, lenTable, attrList, tryIndex, generalDict = {}, updateStatusBar = 0):
299        if index < self.maxResultListLen:
300            self.results.insert(index, (accuracy, other_results, lenTable, attrList, tryIndex, generalDict))
301
302
303    # kNNClassifyData - compute classification error for every example in table
304    def kNNClassifyData(self, table):
305        if len(table) == 0:
306            return [], []
307
308        # check if we have a discrete class
309        if not table.domain.classVar or not table.domain.classVar.varType == orange.VarTypes.Discrete:
310            return [], []
311
312        if self.externalLearner: learner = self.externalLearner
313        else:                    learner = self.createkNNLearner()
314        results = apply(testingMethods[self.testingMethod], [[learner], table])
315
316        returnTable = []
317
318        if table.domain.classVar.varType == orange.VarTypes.Discrete:
319            probabilities = numpy.zeros((len(table), len(table.domain.classVar.values)), numpy.float)
320            lenClassValues = len(list(table.domain.classVar.values))
321            if self.qualityMeasure in [AVERAGE_CORRECT, AUC]:       # for AUC we have no way of computing the prediction accuracy for each example
322                for i in range(len(results.results)):
323                    res = results.results[i]
324                    returnTable.append(res.probabilities[0][res.actualClass])
325                    probabilities[i] = res.probabilities[0]
326            elif self.qualityMeasure == BRIER_SCORE:
327                for i in range(len(results.results)):
328                    res = results.results[i]
329                    s = sum([val*val for val in res.probabilities[0]])
330                    returnTable.append((s + 1 - 2*res.probabilities[0][res.actualClass])/float(lenClassValues))
331                    probabilities[i] = res.probabilities[0]
332            elif self.qualityMeasure == CLASS_ACCURACY:
333                for i in range(len(results.results)):
334                    res = results.results[i]
335                    returnTable.append(res.probabilities[0][res.actualClass] == max(res.probabilities[0]))
336                    probabilities[i] = res.probabilities[0]
337            else:
338                print "unknown quality measure for kNNClassifyData"
339        else:
340            probabilities = None
341            # for continuous class we can't compute brier score and classification accuracy
342            for res in results.results:
343                if not res.probabilities[0]: returnTable.append(0)
344                else:                        returnTable.append(res.probabilities[0].density(res.actualClass))
345
346        return returnTable, probabilities
347
348    # kNNClassifyData - compute classification error for every example in table
349    def kNNClassifyData(self, table):
350        if len(table) == 0:
351            return [], []
352
353        # check if we have a discrete class
354        if not table.domain.classVar or not table.domain.classVar.varType == orange.VarTypes.Discrete:
355            return [], []
356
357        if self.externalLearner: learner = self.externalLearner
358        else:                    learner = self.createkNNLearner()
359        results = apply(testingMethods[self.testingMethod], [[learner], table])
360
361        returnTable = []
362
363        if table.domain.classVar.varType == orange.VarTypes.Discrete:
364            probabilities = numpy.zeros((len(table), len(table.domain.classVar.values)), numpy.float)
365            lenClassValues = max(1, len(list(table.domain.classVar.values)))
366            if self.qualityMeasure in [AVERAGE_CORRECT, AUC]:       # for AUC we have no way of computing the prediction accuracy for each example
367                for i in range(len(results.results)):
368                    res = results.results[i]
369                    returnTable.append(res.probabilities[0][res.actualClass])
370                    probabilities[i] = res.probabilities[0]
371            elif self.qualityMeasure == BRIER_SCORE:
372                for i in range(len(results.results)):
373                    res = results.results[i]
374                    s = sum([val*val for val in res.probabilities[0]])
375                    returnTable.append((s + 1 - 2*res.probabilities[0][res.actualClass])/float(lenClassValues))
376                    probabilities[i] = res.probabilities[0]
377            elif self.qualityMeasure == CLASS_ACCURACY:
378                for i in range(len(results.results)):
379                    res = results.results[i]
380                    returnTable.append(res.probabilities[0][res.actualClass] == max(res.probabilities[0]))
381                    probabilities[i] = res.probabilities[0]
382            else:
383                print "unknown quality measure for kNNClassifyData"
384        else:
385            probabilities = None
386            # for continuous class we can't compute brier score and classification accuracy
387            for res in results.results:
388                if not res.probabilities[0]: returnTable.append(0)
389                else:                        returnTable.append(res.probabilities[0].density(res.actualClass))
390
391        return returnTable, probabilities
392
393    # kNNEvaluate - evaluate class separation in the given projection using a heuristic or k-NN method
394    def kNNComputeAccuracy(self, table):
395        # select a subset of the data if necessary
396        if self.percentDataUsed != 100:
397            indices = orange.MakeRandomIndices2(table, 1.0-float(self.percentDataUsed)/100.0)
398            testTable = table.select(indices)
399        else:
400            testTable = table
401
402        if len(testTable) == 0: return 0, 0
403
404        if self.evaluationAlgorithm == ALGORITHM_KNN or self.externalLearner:
405            if self.externalLearner: learner = self.externalLearner
406            else:                    learner = self.createkNNLearner(); weight = 0
407
408            if self.useExampleWeighting and testTable.domain.classVar and testTable.domain.classVar.varType == orange.VarTypes.Discrete:
409                testTable, weightID = orange.Preprocessor_addClassWeight(testTable, equalize=1)
410                results = apply(testingMethods[self.testingMethod], [[learner], (testTable, weightID)])
411            else:
412                results = apply(testingMethods[self.testingMethod], [[learner], testTable])
413
414            # compute classification success using selected measure
415            if testTable.domain.classVar.varType == orange.VarTypes.Discrete:
416                return self.computeAccuracyFromResults(testTable, results)
417
418            # for continuous class we can't compute brier score and classification accuracy
419            else:
420                val = 0.0
421                if not results.results or not results.results[0].probabilities[0]: return 0, 0
422                for res in results.results:  val += res.probabilities[0].density(res.actualClass)
423                if len(results.results) > 0: val/= float(len(results.results))
424                return 100.0*val, (100.0*val)
425
426        # ###############################
427        # do we want to use very fast heuristic
428        # ###############################
429        elif self.evaluationAlgorithm == ALGORITHM_HEURISTIC:
430            # if input attributes are continuous (may be discrete for evaluating scatterplots, where we dicretize the whole domain...)
431            if testTable.domain[0].varType == orange.VarTypes.Continuous and testTable.domain[1].varType == orange.VarTypes.Continuous:
432                discX = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
433                discY = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
434                testTable = testTable.select([discX, discY, testTable.domain.classVar])
435
436            currentClassDistribution = [int(v) for v in orange.Distribution(testTable.domain.classVar, testTable)]
437            prediction = [0.0 for i in range(len(testTable.domain.classVar.values))]
438
439            # create a new attribute that is a cartesian product of the two visualized attributes
440            nattr = orange.EnumVariable(values=[str(i) for i in range(NUMBER_OF_INTERVALS*NUMBER_OF_INTERVALS)])
441            nattr.getValueFrom = orange.ClassifierByLookupTable2(nattr, testTable.domain[0], testTable.domain[1])
442            for i in range(len(nattr.getValueFrom.lookupTable)): nattr.getValueFrom.lookupTable[i] = i
443
444            for dist in orange.ContingencyAttrClass(nattr, testTable):
445                dist = list(dist)
446                if sum(dist) == 0: continue
447                m = max(dist)
448                prediction[dist.index(m)] += m * m / float(sum(dist))
449
450            prediction = [val*100.0 for val in prediction]             # turn prediction array into percents
451            acc = sum(prediction) / float(max(1, len(testTable)))               # compute accuracy for all classes
452            val = 0.0; s = 0.0
453            for index in self.selectedClasses:                          # compute accuracy for selected classes
454                val += prediction[index]
455                s += currentClassDistribution[index]
456            for i in range(len(prediction)):
457                prediction[i] /= float(max(1, currentClassDistribution[i]))    # turn to probabilities
458            return val/float(max(1,s)), (acc, prediction, currentClassDistribution)
459        else:
460            return 0, 0     # in case of an invalid value
461
462
463    def computeAccuracyFromResults(self, table, results):
464        prediction = [0.0 for i in range(len(table.domain.classVar.values))]
465        countsByFold =  [0 for i in range(results.numberOfIterations)]
466
467        if self.qualityMeasure == AVERAGE_CORRECT:
468            for res in results.results:
469                if not res.probabilities[0]: continue
470                prediction[res.actualClass] += res.probabilities[0][res.actualClass]
471                countsByFold[res.iterationNumber] += 1
472            prediction = [val*100.0 for val in prediction]
473
474        elif self.qualityMeasure == BRIER_SCORE:
475            #return orngStat.BrierScore(results)[0], results
476            for res in results.results:
477                if not res.probabilities[0]: continue
478                prediction[res.actualClass] += sum([prob*prob for prob in res.probabilities[0]]) - 2*res.probabilities[0][res.actualClass] + 1
479                countsByFold[res.iterationNumber] += 1
480
481        elif self.qualityMeasure == CLASS_ACCURACY:
482            #return 100*orngStat.CA(results)[0], results
483            for res in results.results:
484                prediction[res.actualClass] += res.classes[0]==res.actualClass
485                countsByFold[res.iterationNumber] += 1
486            prediction = [val*100.0 for val in prediction]
487        elif self.qualityMeasure == AUC:
488            aucResult = orngStat.AUC(results)
489            if aucResult:
490                return aucResult[0], None
491            else:
492                return 0, None
493
494        # compute accuracy only for classes that are selected as interesting. other class values do not participate in projection evaluation
495        acc = sum(prediction) / float(max(1, len(results.results)))                 # accuracy over all class values
496        classes = self.selectedClasses or range(len(self.graph.dataDomain.classVar.values))
497        val = sum([prediction[index] for index in classes])    # accuracy over all selected classes
498
499        currentClassDistribution = [int(v) for v in orange.Distribution(table.domain.classVar, table)]
500        s = sum([currentClassDistribution[index] for index in classes])
501
502        prediction = [prediction[i] / float(max(1, currentClassDistribution[i])) for i in range(len(prediction))] # turn to probabilities
503       
504        if self.saveEvaluationResults:
505            self.evaluationResults = results
506       
507        return val/max(1, float(s)), (acc, prediction, list(currentClassDistribution))
508
509
510    # Argumentation functions
511    def findArguments(self, example):
512        self.clearArguments()
513        if not self.graph.haveData or not self.graph.dataHasClass or len(self.results) == 0:
514            if len(self.results) == 0: print 'To classify an example using VizRank you first have to evaluate some projections.'
515            return orange.MajorityLearner(self.graph.rawData)(example, orange.GetBoth)
516
517        self.arguments = [[] for i in range(len(self.graph.dataDomain.classVar.values))]
518        vals = [0.0 for i in range(len(self.arguments))]
519
520        if self.rankArgumentsByStrength == 1:
521            for index in range(min(len(self.results), self.argumentCount + 50)):
522                classValue, dist = self.computeClassificationForExample(index, example, kValue = len(self.graph.rawData))
523                if classValue and dist:
524                    for i in range(len(self.arguments)):
525                        self.arguments[i].insert(self.getArgumentIndex(dist[i], i), (dist[i], dist, self.results[index][ATTR_LIST], index))
526
527            for i in range(len(self.arguments)):
528                arr = self.arguments[i]
529                arr.sort()
530                arr.reverse()
531                arr = arr[:self.argumentCount]
532                self.arguments[i] = arr
533                vals[i] = sum([arg[0] for arg in arr])
534        else:
535            usedArguments = 0; index = 0
536            while usedArguments < self.argumentCount and index < len(self.results):
537                classValue, dist = self.computeClassificationForExample(index, example, kValue = self.getkValue(kValueFormula = 0))
538                if classValue and dist:
539                    for i in range(len(self.arguments)):
540                        self.arguments[i].insert(self.getArgumentIndex(dist[i], i), (dist[i], dist, self.results[index][ATTR_LIST], index))
541                        vals[i] += dist[i]
542                    usedArguments += 1
543                index += 1
544
545        suma = sum(vals)
546        if suma == 0:
547            dist = orange.Distribution(self.graph.dataDomain.classVar.name, self.graph.rawData)
548            vals = [dist[i] for i in range(len(dist))]; suma = sum(vals)
549
550        classValue = example.domain.classVar[vals.index(max(vals))]
551        dist = orange.DiscDistribution([val/float(suma) for val in vals])
552        dist.variable = self.graph.dataDomain.classVar
553        return classValue, dist
554
555
556    def computeClassificationForExample(self, projectionIndex, example, kValue = -1):
557        (accuracy, other_results, lenTable, attrList, tryIndex, generalDict) = self.results[projectionIndex]
558
559        if 1 in [example[attr].isSpecial() for attr in attrList]: return None, None
560
561        attrIndices = [self.graph.attributeNameIndex[attr] for attr in attrList]
562        attrVals = [self.graph.scaleExampleValue(example, ind) for ind in attrIndices]
563
564        table = self.graph.createProjectionAsExampleTable(attrIndices, settingsDict = generalDict)
565        [xTest, yTest] = self.graph.getProjectedPointPosition(attrIndices, attrVals, settingsDict = generalDict)
566
567        learner = self.externalLearner or self.createkNNLearner(k = kValue)
568        if self.useExampleWeighting: table, weightID = orange.Preprocessor_addClassWeight(table, equalize=1)
569        else: weightID = 0
570
571        classifier = learner(table, weightID)
572        classVal, dist = classifier(orange.Example(table.domain, [xTest, yTest, "?"]), orange.GetBoth)
573        return classVal, dist
574
575
576    def getArgumentIndex(self, value, classValue):
577        top = 0; bottom = len(self.arguments[classValue])
578        while (bottom-top) > 1:
579            mid  = (bottom + top)/2
580            if max(value, self.arguments[classValue][mid][0]) == value: bottom = mid
581            else: top = mid
582
583        if len(self.arguments[classValue]) == 0: return 0
584        if max(value, self.arguments[classValue][top][0]) == value:  return top
585        else:                                                        return bottom
586
587    def correctSettingsIfNecessary(self):
588        if not self.graph.haveData: return
589        # check if we have discrete attributes. if yes, then make sure we are not using s2nMix measure and GAMMA_SINGLE
590        if orange.VarTypes.Discrete in [attr.varType for attr in self.graph.dataDomain.attributes]:
591            if self.attrCont == CONT_MEAS_S2NMIX:           self.attrCont = CONT_MEAS_S2N
592            if self.attrSubsetSelection == GAMMA_SINGLE:    self.attrSubsetSelection = GAMMA_ALL
593
594    def isEvaluationCanceled(self):
595        stop = 0
596        if self.timeLimit > 0: stop = (time.time() - self.startTime) / 60 >= self.timeLimit
597        if self.projectionLimit > 0: stop = stop or self.evaluatedProjectionsCount >= self.projectionLimit
598        return stop
599
600    def isOptimizationCanceled(self):
601        stop = 0
602        if self.optimizeTimeLimit > 0: stop = (time.time() - self.startTime) / 60 >= self.optimizeTimeLimit
603        if self.optimizeProjectionLimit > 0: stop = stop or self.optimizedProjectionsCount >= self.optimizeProjectionLimit
604        return stop
605
606
607    # get a new subset of attributes. if attributes are not evaluated yet then evaluate them and save info to evaluationData dict.
608    def selectNextAttributeSubset(self, minLength, maxLength):
609        z = self.evaluationData.get("z", minLength-1)
610        u = self.evaluationData.get("u", minLength-1)
611        self.evaluationData["combinations"] = []
612        self.evaluationData["index"] = 0
613
614        # if we use heuristic to find attribute orders
615        if self.attrCont == CONT_MEAS_S2NMIX or self.attrSubsetSelection == GAMMA_SINGLE:
616            if not self.evaluationData.has_key("attrs"):
617                attributes, attrsByClass = orngVisFuncts.findAttributeGroupsForRadviz(self.graph.rawData, orngVisFuncts.S2NMeasureMix())
618                attributes = [self.graph.attributeNameIndex[name] for name in attributes]
619                attrsByClass = [[self.graph.attributeNameIndex[name] for name in arr] for arr in attrsByClass]
620                self.evaluationData["attrs"] = (attributes, attrsByClass)
621            else:
622                attributes, attrsByClass = self.evaluationData["attrs"]
623
624            if z >= len(attributes): return None      # did we already try all the attributes
625            numClasses = len(self.graph.dataDomain.classVar.values)
626            if self.attrSubsetSelection in [GAMMA_ALL, GAMMA_SINGLE]:
627                combinations = self.getAttributeSubsetUsingGammaDistribution(u+1)
628            else:
629                combinations = orngVisFuncts.combinations(range(z), u)
630                for i in range(len(combinations))[::-1]:
631                    comb = combinations[i] + [z]
632                    counts = [0] * numClasses
633                    for ind in comb: counts[ind%numClasses] += 1
634                    if max(counts) - min(counts) > 1:
635                        combinations.pop(i)     # ignore combinations that don't have approximately the same number of attributes for each class value
636                        continue
637                    attrList = [[] for c in range(numClasses)]
638                    for ind in comb: attrList[ind % numClasses].append(attributes[ind])
639                    combinations[i] = attrList
640
641        # no heuristic. try all combinations of a group of attributes
642        else:
643            if not self.evaluationData.has_key("attrs"):
644                # evaluate attributes
645                evaluatedAttributes = self.getEvaluatedAttributes()
646                attributes = [self.graph.attributeNameIndex[name] for name in evaluatedAttributes]
647                self.evaluationData["attrs"] = attributes
648                self.totalPossibilities = 0
649
650                # build list of indices for permutations of different number of attributes
651                permutationIndices = {}
652                for i in range(minLength, maxLength+1):
653                    if i > len(attributes): continue        # if we don't have enough attributes
654                    if self.projOptimizationMethod != 0 or self.visualizationMethod == KNN_IN_ORIGINAL_SPACE:
655                        permutationIndices[i] = [range(i)]
656                    else:
657                        permutationIndices[i] = orngVisFuncts.generateDifferentPermutations(range(i))
658                    self.totalPossibilities += orngVisFuncts.combinationsCount(i, len(attributes)) * len(permutationIndices[i])
659##                sys.stderr.write("selectNextAttributeSubset " + str(permutationIndices.keys()) + "\n")
660                self.evaluationData["permutationIndices"] = permutationIndices
661            else:
662                attributes = self.evaluationData["attrs"]
663
664            # do we have enough attributes at all?
665            if len(attributes) < u+1:
666                combinations = []
667            else:
668                # if we don't want to use any heuristic
669                if self.attrCont == CONT_MEAS_NONE and self.attrDisc == DISC_MEAS_NONE:
670                    combination = []
671                    while len(combination) < u+1:
672                        v = random.randint(0, len(self.graph.dataDomain.attributes)-1)
673                        if v not in combination: combination.append(v)
674                    combinations = [combination]
675                elif self.attrSubsetSelection == DETERMINISTIC_ALL:
676                    if z >= len(attributes): return None      # did we already try all the attributes
677                    combinations = orngVisFuncts.combinations(attributes[:z], u)
678                    map(list.append, combinations, [attributes[z]] * len(combinations))     # append the z-th attribute to all combinations in the list
679                elif self.attrSubsetSelection in [GAMMA_ALL, GAMMA_SINGLE]:
680                    combinations = self.getAttributeSubsetUsingGammaDistribution(u+1)
681
682        # update values for the number of attributes
683        u += 1
684        self.evaluationData["u"] = (u >= maxLength and minLength-1) or u
685        if self.attrSubsetSelection == DETERMINISTIC_ALL:
686            self.evaluationData["z"] = (u >= maxLength and z+1) or z
687
688        self.evaluationData["combinations"] = combinations
689        return combinations
690
691    # use gamma distribution to select a subset of attrCount attributes. if we want to use heuristic to find attribute order then
692    # apply gamma distribution on attribute lists for each class value.
693    # before returning a subset of attributes also test if this subset was already tested. if yes, then try to generate a new subset (repeat this max 50 times)
694    def getAttributeSubsetUsingGammaDistribution(self, attrCount):
695        maxTries = 100
696        triedDict = self.evaluationData.get("triedCombinations", {})
697        projCountWidth = len(triedDict.keys()) / 1000
698
699        if self.attrCont == CONT_MEAS_S2NMIX or self.attrSubsetSelection == GAMMA_SINGLE:
700            numClasses = len(self.graph.dataDomain.classVar.values)
701            attributes, attrsByClass = self.evaluationData["attrs"]
702
703            for i in range(maxTries):
704                attrList = [[] for c in range(numClasses)]; attrs = []
705                tried = 0
706                while len(attrs) < min(attrCount, len(self.graph.dataDomain.attributes)):
707                    ind = tried%numClasses
708                    #ind = random.randint(0, numClasses-1)       # warning: this can generate uneven groups for each class value!!!
709                    attr = attrsByClass[ind][int(random.gammavariate(1, 5 + i/10 + projCountWidth))%len(attrsByClass[ind])]
710                    if attr not in attrList[ind]:
711                        attrList[ind].append(attr)
712                        attrs.append(attr)
713                    tried += 1
714                attrs.sort()
715                if not triedDict.has_key(tuple(attrs)) and len(attrs) == attrCount:
716                    self.evaluationData["triedCombinations"][tuple(attrs)] = 1     # this is not the best, since we don't want to save used combinations since we only test one permutation
717                    #return [filter(None, attrList)]        # problem: using filter removes value 0 from the array, which means that the attribute ranked as best wont be in the projections
718                    return [attrList]
719        else:
720            attributes = self.evaluationData["attrs"]
721            for i in range(maxTries):
722                attrList = []
723                while len(attrList) < min(attrCount, len(attributes)):
724                    attr = attributes[int(random.gammavariate(1,5 + (len(attributes)/1000) + projCountWidth))%len(attributes)]
725                    if attr not in attrList:
726                        attrList.append(attr)
727                attrList.sort()
728                if not triedDict.has_key(tuple(attrList)):
729                    triedDict[tuple(attrList)] = 1
730                    #return [filter(None, attrList)]        # problem: using filter removes value 0 from the array, which means that the attribute ranked as best wont be in the projections
731                    return [attrList]
732        return None
733
734    # generate possible permutations of the current attribute subset. use evaluationData dict to find which attribute subset to use.
735    def getNextPermutations(self):
736        combinations = self.evaluationData["combinations"]
737        index  = self.evaluationData["index"]
738        if not combinations or index >= len(combinations):
739            return None     # did we test all the projections
740
741        combination = combinations[index]
742        permutations = []
743
744        if self.attrCont == CONT_MEAS_S2NMIX or self.attrSubsetSelection == GAMMA_SINGLE:
745            # if we don't want to test all placements then we only create a permutation of groups and attributes in each group
746            if self.attrSubsetSelection == GAMMA_SINGLE:
747                permutations = [reduce(operator.add, combination)]
748                usedPerms = {tuple(permutations[0]):1}
749                for c in range(10):
750                    combination = [[group.pop(random.randint(0, len(group)-1)) for num in range(len(group))] for group in [combination.pop(random.randint(0, len(combination)-1)) for i in range(len(combination))]]
751                    comb = reduce(operator.add, combination)
752                    if not usedPerms.has_key(tuple(comb)):
753                        usedPerms[tuple(comb)] = 1
754                        permutations.append(comb)
755
756            # create only one permutation, because its all we need
757            elif self.projOptimizationMethod != 0 or self.visualizationMethod == KNN_IN_ORIGINAL_SPACE:
758                permutations.append(reduce(operator.add, combination))
759            else:
760                for proj in orngVisFuncts.createProjections(len(self.graph.dataDomain.classVar.values), sum([len(group) for group in combination])):
761                    try: permutations.append([combination[i][j] for (i,j) in proj])
762                    except: pass
763        else:
764            permutationIndices = self.evaluationData["permutationIndices"]
765##            sys.stderr.write("getNextPermutations " + str(permutationIndices.keys()) + "\n")
766            permutations = [[combination[val] for val in ind] for ind in permutationIndices[len(combination)]]
767
768        self.evaluationData["index"] = index + 1
769        return permutations
770
771    def computeTotalHeight(self, node):
772        if node.branches: 
773            return node.height * (node.last - node.first) + sum([self.computeTotalHeight(n) for n in node.branches])
774        else:
775            return node.height
776
777    def evaluateProjection(self, data):
778        if self.graph.dataHasDiscreteClass:
779            return self.kNNComputeAccuracy(data)
780        elif self.graph.dataHasContinuousClass:
781            return 0
782        else:
783            matrix = orange.SymMatrix(len(data))
784            matrix.setattr('items', data)
785            dist = orange.ExamplesDistanceConstructor_Euclidean(data)
786            for i in range(len(data)):
787                for j in range(i+1):
788                    matrix[i, j] = dist(data[i], data[j])
789            root = orange.HierarchicalClustering(matrix, linkage = orange.HierarchicalClustering.Ward, overwriteMatrix = 0)
790            val = self.computeTotalHeight(root)
791            return val, (val)
792           
793
794    # ##########################################################################
795    # MAIN FUNCTION FOR EVALUATING PROJECTIONS
796    # ##########################################################################
797    def evaluateProjections(self, clearPreviousProjections = 1):
798        random.seed(0)      # always use the same seed to make results repeatable
799        if not self.graph.haveData: return 0
800       
801        # TO DO: remove the following line when we add support for cont class
802        if not self.graph.dataHasDiscreteClass: return 0
803        self.correctSettingsIfNecessary()
804        if self.timeLimit == self.projectionLimit == 0 and self.__class__.__name__ == "VizRank":
805            print "Evaluation of projections was started without any time or projection restrictions. To prevent an indefinite projection evaluation a time limit of 2 hours was set."
806            self.timeLimit = 2 * 60
807
808        self.startTime = time.time()
809
810        if clearPreviousProjections:
811            self.evaluatedProjectionsCount = 0
812            self.optimizedProjectionsCount = 0
813            self.evaluationData = {}            # clear all previous data about tested permutations and stuff
814            self.evaluationData["triedCombinations"] = {}
815            self.clearResults()
816
817        self.clearArguments()
818        maxFunct = self.getMaxFunct()
819       
820        if self.__class__ != VizRank:
821            from PyQt4.QtGui import qApp
822
823#        if not self.graph.dataHasDiscreteClass:
824#            print "Projections can be evaluated only for data with a discrete class."
825#            return 0
826
827        if self.visualizationMethod == SCATTERPLOT:
828            evaluatedAttributes = self.getEvaluatedAttributes()
829            contVars = [orange.FloatVariable(attr.name) for attr in self.graph.dataDomain.attributes]
830            attrCount = len(self.graph.dataDomain.attributes)
831
832            count = len(evaluatedAttributes)*(len(evaluatedAttributes)-1)/2
833            strCount = orngVisFuncts.createStringFromNumber(count)
834           
835            for i in range(len(evaluatedAttributes)):
836                attr1 = self.graph.attributeNameIndex[evaluatedAttributes[i]]
837                for j in range(i):
838                    attr2 = self.graph.attributeNameIndex[evaluatedAttributes[j]]
839                    self.evaluatedProjectionsCount += 1
840                    if self.isEvaluationCanceled():
841                        return self.evaluatedProjectionsCount
842
843                    table = self.graph.createProjectionAsExampleTable([attr1, attr2])
844                    if len(table) < self.minNumOfExamples: continue
845                    accuracy, other_results = self.evaluateProjection(table)
846                    generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
847                    self.addResult(accuracy, other_results, len(table), [self.graph.dataDomain[attr1].name, self.graph.dataDomain[attr2].name], self.evaluatedProjectionsCount, generalDict=generalDict)
848
849                    if self.__class__ != VizRank:
850                        self.setStatusBarText("Evaluated %s/%s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount), strCount))
851                        self.parentWidget.progressBarSet(100.0*self.evaluatedProjectionsCount/max(1,float(count)))
852
853        # #################### RADVIZ, LINEAR_PROJECTION  ################################
854        elif self.visualizationMethod in (RADVIZ, LINEAR_PROJECTION, POLYVIZ, KNN_IN_ORIGINAL_SPACE):
855            if self.projOptimizationMethod != 0:
856                self.freeviz.useGeneralizedEigenvectors = 1
857                self.graph.normalizeExamples = 0
858
859            # variables and domain for the table
860            domain = orange.Domain([orange.FloatVariable("xVar"), orange.FloatVariable("yVar"), orange.EnumVariable(self.graph.dataDomain.classVar.name, values = getVariableValuesSorted(self.graph.dataDomain.classVar))])
861            minLength = (self.optimizationType == EXACT_NUMBER_OF_ATTRS and self.attributeCount) or 3
862            maxLength = self.attributeCount
863            classListFull = self.graph.originalData[self.graph.dataClassIndex]
864
865            # each call to selectNextAttributeSubset gets a new combination of attributes in a range from minLength to maxLength. if we return None for a given number of attributes this
866            # doesn't mean yet that there are no more possible combinations. it may be just that we wanted a combination of 6 attributes in a domain with 4 attributes. therefore we have
867            # to try maxLength-minLength+1 times and if we fail every time then there are no more valid projections
868
869            newProjectionsExist = 1
870            while newProjectionsExist:
871                for experiment in range(maxLength-minLength+1):
872                    if self.selectNextAttributeSubset(minLength, maxLength): break
873                    newProjectionsExist = 0
874                permutations = self.getNextPermutations()
875                while permutations:
876                    attrIndices = permutations[0]
877
878                    # if we use SPCA, PLS or KNN_IN_ORIGINAL_SPACE
879                    if self.projOptimizationMethod != 0 or self.visualizationMethod == KNN_IN_ORIGINAL_SPACE:
880                        if self.visualizationMethod == KNN_IN_ORIGINAL_SPACE:
881                            table = self.graph.rawData.select([self.graph.dataDomain[attr] for attr in attrIndices] + [self.graph.dataDomain.classVar] )
882                            xanchors, yanchors = self.graph.createXAnchors(len(attrIndices)), self.graph.createYAnchors(len(attrIndices))
883                            attrNames = [self.graph.dataDomain[attr].name for attr in attrIndices]
884                        else:
885                            projections = self.freeviz.findProjection(self.projOptimizationMethod, attrIndices, setAnchors = 0, percentDataUsed = self.percentDataUsed)
886                            if projections != None:
887                                xanchors, yanchors, (attrNames, newIndices) = projections
888                                table = self.graph.createProjectionAsExampleTable(newIndices, domain = domain, XAnchors = xanchors, YAnchors = yanchors)
889                        if len(table) < self.minNumOfExamples: continue
890                        self.evaluatedProjectionsCount += 1
891                        accuracy, other_results = self.evaluateProjection(table)
892                        generalDict = {"XAnchors": list(xanchors), "YAnchors": list(yanchors), "Results": self.evaluationResults} if self.saveEvaluationResults else {"XAnchors": list(xanchors), "YAnchors": list(yanchors)}
893                        self.addResult(accuracy, other_results, len(table), attrNames, self.evaluatedProjectionsCount, generalDict = generalDict)
894                        if self.isEvaluationCanceled(): return self.evaluatedProjectionsCount
895                        if self.__class__ != VizRank:
896                            self.setStatusBarText("Evaluated %s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount)))
897                    else:
898                        XAnchors = self.graph.createXAnchors(len(attrIndices))
899                        YAnchors = self.graph.createYAnchors(len(attrIndices))
900                        validData = self.graph.getValidList(attrIndices)
901                        if numpy.sum(validData) >= self.minNumOfExamples:
902                            classList = numpy.compress(validData, classListFull)
903                            selectedData = numpy.compress(validData, numpy.take(self.graph.noJitteringScaledData, attrIndices, axis = 0), axis = 1)
904                            sum_i = self.graph._getSum_i(selectedData)
905
906                            tempList = []
907
908                            # for every permutation compute how good it separates different classes
909                            for permutation in permutations:
910                                if self.evaluatedProjectionsCount % 10 == 0 and self.isEvaluationCanceled():
911                                    continue
912
913                                table = self.graph.createProjectionAsExampleTable(permutation, validData = validData, classList = classList, sum_i = sum_i, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain)
914                                accuracy, other_results = self.evaluateProjection(table)
915
916                                # save the permutation
917                                if self.storeEachPermutation:
918                                    generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
919                                    self.addResult(accuracy, other_results, len(table), [self.graph.attributeNames[i] for i in permutation], self.evaluatedProjectionsCount, generalDict)
920                                else:
921                                    tempList.append((accuracy, other_results, len(table), [self.graph.attributeNames[i] for i in permutation]))
922
923                                self.evaluatedProjectionsCount += 1
924                                if self.__class__ != VizRank:
925                                    self.setStatusBarText("Evaluated %s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount)))
926                                    qApp.processEvents()        # allow processing of other events
927
928                            if not self.storeEachPermutation and len(tempList) > 0:   # return only the best attribute placements
929                                (acc, other_results, lenTable, attrList) = maxFunct(tempList)
930                                generalDict = {"Results": self.evaluationResults} if self.saveEvaluationResults else {}
931                                self.addResult(acc, other_results, lenTable, attrList, self.evaluatedProjectionsCount, generalDict=generalDict)
932
933                        if self.isEvaluationCanceled():
934                            return self.evaluatedProjectionsCount
935
936                    permutations = self.getNextPermutations()
937        else:
938            print "unknown visualization method"
939
940        return self.evaluatedProjectionsCount
941
942    def getProjectionQuality(self, attrList, useAnchorData = 0):
943        if not self.graph.haveData: return 0.0, None
944        table = self.graph.createProjectionAsExampleTable([self.graph.attributeNameIndex[attr] for attr in attrList], useAnchorData = useAnchorData)
945        return self.evaluateProjection(table)
946
947
948    def insertTempProjection(self, projections, acc, attrList):
949        if len(projections) == 0: return [(acc, attrList)]
950
951        top = 0; bottom = len(projections)
952        while (bottom-top) > 1:
953            mid  = (bottom + top)/2
954            if max(acc, projections[mid][0]) == acc: bottom = mid
955            else: top = mid
956
957        if max(acc, projections[top][0]) == acc: projections.insert(top, (acc, attrList))
958        else:                                    projections.insert(bottom, (acc, attrList))
959
960    # ##########################################################################
961    # FUNCTION FOR OPTIMIZING BEST PROJECTIONS
962    # ##########################################################################
963    def optimizeBestProjections(self, restartWhenImproved = 1):
964        random.seed(0)      # always use the same seed to make results repeatable
965        count = min(len(self.results), self.locOptProjCount)
966        if not count: return
967        self.correctSettingsIfNecessary()
968        self.optimizedProjectionsCount = 0
969        """
970        if self.optimizeTimeLimit == self.optimizeProjectionLimit == 0:
971            print "Optimization of projections was started without any time or projection restrictions. To prevent an indefinite projection optimization a time limit of 2 hours was set."
972            self.optimizeProjectionLimit = 2 * 60
973        """
974
975        if self.__class__ != VizRank:
976            from PyQt4.QtGui import qApp
977
978        attrs = [self.results[i][ATTR_LIST] for i in range(count)]                                   # create a list of attributes that are in the top projections
979        attrs = [[self.graph.attributeNameIndex[name] for name in projection] for projection in attrs]    # find indices from the attribute names
980        accuracys = [self.getProjectionQuality(self.results[i][ATTR_LIST])[0] for i in range(count)]
981        projections = [(accuracys[i], attrs[i]) for i in range(len(accuracys))]
982
983        domain = orange.Domain([orange.FloatVariable("xVar"), orange.FloatVariable("yVar"), orange.EnumVariable(self.graph.dataDomain.classVar.name, values = getVariableValuesSorted(self.graph.dataDomain.classVar))])
984        attributes = [self.graph.attributeNameIndex[name] for name in self.getEvaluatedAttributes()[:self.locOptAttrsToTry]]
985        self.startTime = time.time()
986        lenOfAttributes = len(attributes)
987        maxFunct = self.getMaxFunct()
988
989        if self.visualizationMethod == SCATTERPLOT:
990            classListFull = self.graph.originalData[self.graph.dataClassIndex]
991
992            tempDict = {}
993            projIndex = 0
994            while len(projections) > 0:
995                (accuracy, projection) = projections.pop(0)
996                projIndex -= 1
997
998                significantImprovement = 0
999                strTotalAtts = orngVisFuncts.createStringFromNumber(lenOfAttributes)
1000                for (attrIndex, attr) in enumerate(attributes):
1001                    if attr in projection: continue
1002                    testProjections = []
1003                    if not tempDict.has_key((projection[0], attr)) and not tempDict.has_key((attr, projection[0])): testProjections.append([projection[0], attr])
1004                    if not tempDict.has_key((projection[1], attr)) and not tempDict.has_key((attr, projection[1])): testProjections.append([attr, projection[1]])
1005
1006                    for testProj in testProjections:
1007                        table = self.graph.createProjectionAsExampleTable(testProj, domain = domain)
1008                        if len(table) < self.minNumOfExamples: continue
1009                        acc, other_results = self.evaluateProjection(table)
1010                        if hasattr(self, "setStatusBarText") and self.optimizedProjectionsCount % 10 == 0:
1011                            self.setStatusBarText("Evaluated %s projections. Last accuracy was: %2.2f%%" % (orngVisFuncts.createStringFromNumber(self.optimizedProjectionsCount), acc))
1012                        if acc > accuracy:
1013                            self.addResult(acc, other_results, len(table), [self.graph.attributeNames[i] for i in testProj], projIndex)
1014                            self.insertTempProjection(projections, acc, testProj)
1015                            tempDict[tuple(testProj)] = 1
1016                            if min(acc, accuracy) != 0 and max(acc, accuracy) > 1.005 *min(acc, accuracy):  significantImprovement = 1
1017
1018                        self.optimizedProjectionsCount += 1
1019                        if self.__class__ != VizRank:
1020                            qApp.processEvents()        # allow processing of other events
1021                        if self.optimizedProjectionsCount % 10 == 0 and self.isOptimizationCanceled():
1022                            return self.optimizedProjectionsCount
1023                    if significantImprovement: break
1024
1025        # #################### RADVIZ, LINEAR_PROJECTION  ################################
1026        elif self.visualizationMethod in (RADVIZ, LINEAR_PROJECTION, POLYVIZ):
1027            numClasses = len(self.graph.dataDomain.classVar.values)
1028
1029            classListFull = self.graph.originalData[self.graph.dataClassIndex]
1030            newProjDict = {}
1031            projIndex = 0
1032
1033            while len(projections) > 0:
1034                (accuracy, projection) = projections.pop(0)
1035                projIndex -= 1
1036
1037                # first try to use the attributes in the projection and evaluate only different permutations of these attributes
1038                if self.locOptOptimizeProjectionByPermutingAttributes == 1 and self.projOptimizationMethod == 0:
1039                    bestProjection = projection; tempProjection = projection
1040                    bestAccuracy = accuracy; tempAccuracy = accuracy
1041                    triedPermutationsDict = {}
1042                    failedConsecutiveTries = 0
1043                    tries = 0
1044                    XAnchors = self.graph.createXAnchors(len(projection))
1045                    YAnchors = self.graph.createYAnchors(len(projection))
1046                    validData = self.graph.getValidList(projection)
1047                    classList = numpy.compress(validData, classListFull)
1048                    while failedConsecutiveTries < 5 and tries < 50:
1049                        #newProj = orngVisFuncts.switchTwoElements(tempProjection, nrOfTimes = 3)
1050                        newProj = orngVisFuncts.switchTwoElementsInGroups(tempProjection, numClasses, 3)
1051                        tries += 1
1052                        if triedPermutationsDict.has_key(str(newProj)):
1053                            failedConsecutiveTries += 1
1054                        else:
1055                            failedConsecutiveTries = 0
1056                            triedPermutationsDict[str(newProj)] = 1
1057
1058                            table = self.graph.createProjectionAsExampleTable(newProj, validData = validData, classList = classList, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain)
1059                            if len(table) < self.minNumOfExamples: continue
1060                            acc, other_results = self.evaluateProjection(table)
1061                            self.optimizedProjectionsCount += 1
1062                            if self.__class__ != VizRank:
1063                                qApp.processEvents()        # allow processing of other events
1064                            if self.isOptimizationCanceled(): return self.optimizedProjectionsCount
1065                            if hasattr(self, "setStatusBarText") and self.optimizedProjectionsCount % 10 == 0:
1066                                self.setStatusBarText("Evaluated %s projections. Last accuracy was: %2.2f%%" % (orngVisFuncts.createStringFromNumber(self.optimizedProjectionsCount), acc))
1067                            if acc > bestAccuracy:
1068                                bestAccuracy = acc
1069                                bestProjection = newProj
1070                                #self.addResult(acc, other_results, len(table), [self.graph.attributeNames[i] for i in newProj], -1, {})
1071                            if acc > tempAccuracy or acc > 0.99 * tempAccuracy:
1072                                tempProjection = newProj
1073                                tempAccuracy = acc
1074                    projection = bestProjection
1075                    accuracy = bestAccuracy
1076
1077                # take best projection and try to replace one of the attributes with a new attribute
1078                # when you can't further improve projections this way try adding a new attribute to the projection
1079                # in the first step try to find a better projection by substituting an existent attribute with a new one
1080                # in the second step try to find a better projection by adding a new attribute to the circle
1081                significantImprovement = 0
1082                for iteration in range(2):
1083                    if iteration == 1 and not self.locOptAllowAddingAttributes: continue    # if we are not allowed to increase the number of visualized attributes
1084                    if (len(projection) + iteration > self.locOptMaxAttrsInProj): continue
1085                    strTotalAtts = orngVisFuncts.createStringFromNumber(lenOfAttributes)
1086                    for (attrIndex, attr) in enumerate(attributes):
1087                        if attr in projection: continue
1088                        if significantImprovement and restartWhenImproved: break        # if we found a projection that is significantly better than the currently best projection then restart the search with this projection
1089                        tempList = []
1090
1091                        # SPCA, PLS
1092                        if self.projOptimizationMethod != 0:
1093                            if iteration == 0:  # replace one attribute in each projection with attribute attr
1094                                testProjections = [copy(projection) for i in range(len(projection))]
1095                                for i in range(len(testProjections)): testProjections[i][len(projection)-1-i] = attr
1096                            elif iteration == 1: testProjections = [projection + [attr]]
1097
1098                            for proj in testProjections:
1099                                proj.sort()
1100                                if newProjDict.has_key(str(proj)): continue
1101                                newProjDict[str(proj)] = 1
1102                                xanchors, yanchors, (attrNames, newIndices) = self.freeviz.findProjection(self.projOptimizationMethod, proj, setAnchors = 0, percentDataUsed = self.percentDataUsed)
1103                                table = self.graph.createProjectionAsExampleTable(newIndices, domain = domain, XAnchors = xanchors, YAnchors = yanchors)
1104                                if len(table) < self.minNumOfExamples: continue
1105                                self.optimizedProjectionsCount += 1
1106                                acc, other_results = self.evaluateProjection(table)
1107
1108                                tempList.append((acc, other_results, len(table), newIndices, {"XAnchors": xanchors, "YAnchors": yanchors}))
1109                                if self.storeEachPermutation:
1110                                    self.addResult(acc, other_results, len(table), attrNames, projIndex, generalDict = {"XAnchors": xanchors, "YAnchors": yanchors})
1111
1112                                if self.__class__ != VizRank:
1113                                    qApp.processEvents()        # allow processing of other events
1114                                if self.isOptimizationCanceled(): return self.optimizedProjectionsCount
1115
1116                        # ordinary radviz projections
1117                        else:
1118                            testProjections = [copy(projection) for i in range(len(projection))]
1119                            if iteration == 0:  # replace one attribute in each projection with attribute attr
1120                                count = len(projection)
1121                                for i in range(count): testProjections[i][i] = attr
1122                            elif iteration == 1:
1123                                count = len(projection) + 1
1124                                for i in range(count-1): testProjections[i].insert(i, attr)
1125
1126                            XAnchors = self.graph.createXAnchors(count)
1127                            YAnchors = self.graph.createYAnchors(count)
1128                            validData = self.graph.getValidList(testProjections[0])
1129                            classList = numpy.compress(validData, classListFull)
1130
1131                            for testProj in testProjections:
1132                                if newProjDict.has_key(str(testProj)): continue
1133                                newProjDict[str(testProj)] = 1
1134
1135                                table = self.graph.createProjectionAsExampleTable(testProj, validData = validData, classList = classList, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain)
1136                                if len(table) < self.minNumOfExamples: continue
1137                                acc, other_results = self.evaluateProjection(table)
1138                                if hasattr(self, "setStatusBarText") and self.optimizedProjectionsCount % 10 == 0: self.setStatusBarText("Evaluated %s projections. Last accuracy was: %2.2f%%" % (orngVisFuncts.createStringFromNumber(self.optimizedProjectionsCount), acc))
1139                                if acc > accuracy:
1140                                    tempList.append((acc, other_results, len(table), testProj, {}))
1141                                if self.storeEachPermutation:
1142                                    self.addResult(acc, other_results, len(table), [self.graph.attributeNames[i] for i in testProj], projIndex, {})
1143
1144                                self.optimizedProjectionsCount += 1
1145                                if self.__class__ != VizRank:
1146                                    qApp.processEvents()        # allow processing of other events
1147                                if self.isOptimizationCanceled(): return self.optimizedProjectionsCount
1148
1149                        # return only the best attribute placements
1150                        if len(tempList) == 0: continue     # can happen if the newProjDict already had all the projections that we tried
1151                        (acc, other_results, lenTable, attrList, generalDict) = maxFunct(tempList)
1152                        if acc > 1.005*accuracy:
1153                            self.insertTempProjection(projections, acc, attrList)
1154                            self.addResult(acc, other_results, lenTable, [self.graph.attributeNames[i] for i in attrList], projIndex , generalDict)
1155                            if hasattr(self, "setStatusBarText"): self.setStatusBarText("Found a better projection with accuracy: %2.2f%%" % (acc))
1156                        if accuracy != 0 and acc > 1.01 * accuracy:  significantImprovement = 1
1157
1158        else:
1159            print "unknown visualization method"
1160
1161        return self.optimizedProjectionsCount
1162
1163    # ##############################################################
1164    # Loading and saving projection files
1165    # ##############################################################
1166
1167    # save the list into a file - filename can be set if you want to call this function without showing the dialog
1168    def save(self, name, results = None, count = 1000):
1169        # take care of extension
1170        if os.path.splitext(name)[1].lower() != ".proj": name = name + ".proj"
1171
1172        if not results: results = self.results
1173        self.abortCurrentOperation = 0
1174
1175        dirName, shortFileName = os.path.split(name)
1176        self.lastSaveDirName = dirName
1177
1178        # open, write and save file
1179        file = open(name, "wt")
1180
1181        attrs = ["kValue", "percentDataUsed", "qualityMeasure", "testingMethod", "parentName", "evaluationAlgorithm", "useExampleWeighting", "projOptimizationMethod", "attrSubsetSelection", "optimizationType", "attributeCount", "attrDisc", "attrCont", "timeLimit", "projectionLimit"]
1182        dict = {}
1183        for attr in attrs: dict[attr] = self.__dict__.get(attr)
1184        dict["dataCheckSum"] = self.graph.rawData.checksum()
1185        dict["totalProjectionsEvaluated"] = self.evaluatedProjectionsCount + self.optimizedProjectionsCount  # let's also save the total number of projections that we evaluated in order to get this list
1186
1187        file.write("%s\n%s\n" % (str(dict), str(self.selectedClasses)))
1188
1189        i=0
1190        for i in range(len(results)):
1191            if i >= count: break
1192
1193            (acc, other_results, lenTable, attrList, tryIndex, generalDict) = results[i]
1194
1195            s = "(%.3f, (" % (acc)
1196            for val in other_results:
1197                if type(val) == float: s += "%.3f ," % val
1198                elif type(val) == list:
1199                    s += "["
1200                    for el in val:
1201                        if type(el) == float: s += "%.3f, " % (el)
1202                        elif type(el) == int: s += "%d, " % (el)
1203                        else: s += "%s, " % str(el)
1204                    if s[-2] == ",": s = s[:-2]
1205                    s += "], "
1206            if s[-2] == ",": s = s[:-2]
1207            s += "), %d, %s, %d, %s)" % (lenTable, str(attrList), tryIndex, str(generalDict).replace("\n     ", "")) # be sure to remove \n in XAnchors and YAnchors otherwise load doesn't work
1208            file.write(s + "\n")
1209
1210            if self.abortCurrentOperation: break
1211            if hasattr(self, "setStatusBarText"):
1212                self.setStatusBarText("Saved %s projections" % (orngVisFuncts.createStringFromNumber(i)))
1213
1214        file.flush()
1215        file.close()
1216        self.abortCurrentOperation = 0
1217        return i
1218
1219    # load projections from a file
1220    def load(self, name, ignoreCheckSum = 1, maxCount = -1):
1221        self.clearResults()
1222        self.clearArguments()
1223        self.abortCurrentOperation = 0
1224
1225        file = open(name, "rt")
1226        settings = eval(file.readline()[:-1])
1227        if settings.get("parentName", "").lower() != self.parentName.lower():
1228            if self.__class__ != VizRank:
1229                QMessageBox.critical( self, "Optimization Dialog", 'Unable to load projection file. It was saved for %s method'%(settings["parentName"]), QMessageBox.Ok)
1230            else:
1231                print 'Unable to load projection file. It was saved for %s method' % (settings["parentName"])
1232            file.close()
1233            return [], 0
1234
1235        if settings.has_key("dataCheckSum") and settings["dataCheckSum"] != self.graph.rawData.checksum():
1236            if not ignoreCheckSum and self.__class__.__name__ == "OWVizRank":
1237                if QMessageBox.information(self, 'VizRank', 'The current data set has a different checksum than the data set that was used to evaluate projections in this file.\nDo you want to continue loading anyway, or cancel?','Continue','Cancel', '', 0,1):
1238                    file.close()
1239                    return [], 0
1240            else:
1241                print "The data set has a different checksum than the data set that was used in projection evaluation. Projection might be invalid but the file will be loaded anyway..."
1242
1243        for key in settings.keys():
1244            setattr(self, key, settings[key])
1245
1246        # find if it was computed for specific class values
1247        selectedClasses = eval(file.readline()[:-1])
1248
1249        if self.__class__ != VizRank:
1250            from PyQt4.QtGui import qApp
1251
1252        count = 0
1253        for line in file.xreadlines():
1254            (acc, other_results, lenTable, attrList, tryIndex, generalDict) = eval(line)
1255            VizRank.insertItem(self, count, acc, other_results, lenTable, attrList, tryIndex, generalDict)
1256            count+=1
1257            if maxCount != -1 and count >= maxCount: break
1258            if self.abortCurrentOperation: break
1259            if count % 100 == 0 and hasattr(self, "setStatusBarText"):
1260                self.setStatusBarText("Loaded %s projections" % (orngVisFuncts.createStringFromNumber(count)))
1261                qApp.processEvents()        # allow processing of other events
1262        file.close()
1263
1264        self.abortCurrentOperation = 0
1265
1266        # update loaded results
1267        return selectedClasses, count
1268
1269    # remove results that have tryIndex > topProjectionIndex
1270    def reduceResults(self, topProjectionIndex):
1271        results = self.results
1272        self.clearResults()
1273        i=0
1274        for (accuracy, other_results, lenTable, attrList, tryIndex, generalDict) in results:
1275            if tryIndex <= topProjectionIndex:
1276                self.insertItem(i, accuracy, other_results, lenTable, attrList, tryIndex, generalDict)
1277                i += 1
1278
1279
1280# ###############################################################################################################################################
1281# ######           VIZRANK OUTLIERS            ##############################################################################################
1282# ###############################################################################################################################################
1283class VizRankOutliers:
1284    def __init__(self, vizrank, dialogType):
1285        self.vizrank = vizrank
1286        self.dialogType = dialogType
1287
1288        self.data = None
1289        self.results = None
1290
1291        self.projectionIndices = []
1292        self.matrixOfPredictions = None
1293        self.graphMatrix = None
1294        self.evaluatedExamples = []
1295        self.projectionCount = 20
1296
1297        if self.dialogType == VIZRANK_POINT:
1298            self.ATTR_LIST = ATTR_LIST
1299            self.ACCURACY = ACCURACY
1300        elif self.dialogType == VIZRANK_MOSAIC:
1301            import orngMosaic
1302            self.ATTR_LIST = orngMosaic.ATTR_LIST
1303            self.ACCURACY = orngMosaic.SCORE
1304
1305
1306    def setResults(self, data, results):
1307        self.data = data
1308        self.results = results
1309        self.matrixOfPredictions = None
1310
1311
1312    def evaluateProjections(self, qApp = None):
1313        if self.dialogType == VIZRANK_POINT:
1314            graph = self.vizrank.graph
1315
1316        if not self.results or not self.data: return
1317
1318        projCount = min(int(self.projectionCount), len(self.results))
1319        classCount = max(len(self.data.domain.classVar.values), 1)
1320        existing = 0
1321        if self.matrixOfPredictions != None:
1322            existing = numpy.shape(self.matrixOfPredictions)[0]/classCount
1323            if existing < projCount:
1324                self.matrixOfPredictions = numpy.resize(self.matrixOfPredictions, (projCount*classCount, len(self.data)))
1325            elif existing > projCount:
1326                self.matrixOfPredictions = self.matrixOfPredictions[0:classCount*projCount,:]
1327        else:
1328            self.matrixOfPredictions = -100 * numpy.ones((projCount*classCount, len(self.data)), numpy.float)
1329
1330        # compute the matrix of predictions
1331        results = self.results[existing:min(len(self.results), projCount)]
1332        index = 0
1333        for result in results:
1334            if self.dialogType == VIZRANK_POINT:
1335                acc, other, tableLen, attrList, tryIndex, generalDict = result
1336                attrIndices = [graph.attributeNameIndex[attr] for attr in attrList]
1337                validDataIndices = graph.getValidIndices(attrIndices)
1338                table = graph.createProjectionAsExampleTable(attrIndices, settingsDict = generalDict)    # TO DO: this does not work with polyviz!!!
1339                acc, probabilities = self.vizrank.kNNClassifyData(table)
1340
1341            elif self.dialogType == VIZRANK_MOSAIC:
1342                from orngCI import FeatureByCartesianProduct
1343                acc, attrList, tryIndex, other = result
1344                probabilities = numpy.zeros((len(self.data), len(self.data.domain.classVar.values)), numpy.float)
1345                newFeature, quality = FeatureByCartesianProduct(self.data, attrList)
1346                dist = orange.ContingencyAttrClass(newFeature, self.data)
1347                data = self.data.select([newFeature, self.data.classVar])     # create a dataset that has only this new feature and class info
1348                clsVals = len(self.data.domain.classVar.values)
1349                validDataIndices = range(len(data))
1350                for i, ex in enumerate(data):
1351                    try:
1352                        prob = dist[ex[0]]
1353                        for j in range(clsVals):
1354                            probabilities[i][j] = prob[j] / max(1, float(sum(prob.values())))
1355                    except:
1356                        validDataIndices.remove(i)
1357
1358            #self.matrixOfPredictions[(existing + index)*classCount:(existing + index +1)*classCount] = numpy.transpose(probabilities)
1359            probabilities = numpy.transpose(probabilities)
1360            for i in range(classCount):
1361                numpy.put(self.matrixOfPredictions[(existing + index)*classCount + i], validDataIndices, probabilities[i])
1362
1363            index += 1
1364            if hasattr(self, "setStatusBarText"):
1365                self.setStatusBarText("Evaluated %s/%s projections..." % (orngVisFuncts.createStringFromNumber(existing + index), orngVisFuncts.createStringFromNumber(projCount)))
1366                self.widget.progressBarSet(100.0*(index)/max(1, float(projCount-existing)))
1367            if qApp:
1368                qApp.processEvents()
1369
1370        # generate a sorted list of (probability, exampleIndex, classDistribution)
1371        projCount = min(int(self.projectionCount), len(self.results))
1372        self.evaluatedExamples = []
1373        for exIndex in range(len(self.data)):
1374            matrix = numpy.transpose(numpy.reshape(self.matrixOfPredictions[:, exIndex], (projCount, classCount)))
1375            valid = numpy.where(matrix[int(self.data[exIndex].getclass())] != -100, 1, 0)
1376            data = numpy.compress(valid, matrix[int(self.data[exIndex].getclass())])
1377            if len(data): aveAcc = numpy.sum(data) / float(len(data))
1378            else:         aveAcc = 0
1379            classPredictions = []
1380            for ind, val in enumerate(self.data.domain.classVar.values):
1381                data = numpy.compress(valid, matrix[ind])
1382                if len(data): acc = numpy.sum(data) / float(len(data))
1383                else:         acc = 0
1384                classPredictions.append((acc, val))
1385            self.evaluatedExamples.append((aveAcc, exIndex, classPredictions))
1386        self.evaluatedExamples.sort()
1387
1388    # take the self.evaluatedExamples list and find examples where probability of the "correct" class is lower than probability of some other class
1389    # change class value of such examples to class value that has the highest probability
1390    def changeClassToMostProbable(self):
1391        if not self.data or not self.evaluatedExamples or len(self.evaluatedExamples) != len(self.data):
1392            print "no data or outliers not found yet. Run evaluateProjections() first."
1393            return
1394
1395        correctedData = orange.ExampleTable(self.data)
1396        for (aveAcc, exInd, classPredictions) in self.evaluatedExamples:
1397            (acc, clsVal) = max(classPredictions)
1398            correctedData[exInd].setclass(clsVal)
1399        return correctedData
1400
1401
1402# ###############################################################################################################################################
1403# ######       VIZRANK LEARNERS, CLASSIFIERS       ##############################################################################################
1404# ###############################################################################################################################################
1405
1406# class that represents kNN classifier that classifies examples based on top evaluated projections
1407class VizRankClassifier(orange.Classifier):
1408    def __init__(self, vizrank, data):
1409        self.VizRank = vizrank
1410
1411        if self.VizRank.__class__.__name__ == "OWVizRank":
1412            self.VizRank.parentWidget.setData(data)
1413            self.VizRank.parentWidget.handleNewSignals()
1414            self.VizRank.timeLimit = self.VizRank.evaluationTime
1415            if self.VizRank.optimizeBestProjection:
1416                self.VizRank.optimizeTimeLimit = self.VizRank.optimizeBestProjectionTime
1417            else:
1418                self.VizRank.optimizeTimeLimit = 0
1419        else:
1420            self.VizRank.setData(data)
1421
1422        self.VizRank.evaluateProjections()
1423
1424        # do we want to optimize current projection. if yes then spend the same amount of time to optimize it
1425        if self.VizRank.optimizeTimeLimit > 0 or self.VizRank.optimizeProjectionLimit:
1426            self.VizRank.optimizeBestProjections()
1427            self.VizRank.removeTooSimilarProjections()
1428
1429        #if self.VizRank.__class__.__name__ == "OWVizRank": del self.VizRank.useTimeLimit
1430
1431
1432    # for a given example run argumentation and find out to which class it most often fall
1433    def __call__(self, example, returnType = orange.GetBoth):
1434        if self.VizRank.__class__.__name__ == "OWVizRank":
1435            table = orange.ExampleTable(example.domain)
1436            table.append(example)
1437            self.VizRank.parentWidget.setSubsetData(table)       # show the example is we use the widget
1438            self.VizRank.parentWidget.handleNewSignals()
1439            classVal, dist = self.VizRank.findArguments(example, 0, 0)
1440        else:
1441            classVal, dist = self.VizRank.findArguments(example)
1442
1443        if returnType == orange.GetBoth: return classVal, dist
1444        else:                            return classVal
1445
1446
1447# #############################################################################
1448# learner that builds VizRankClassifier
1449class VizRankLearner(orange.Learner):
1450    def __init__(self, visualizationMethod = SCATTERPLOT, vizrank = None, graph = None):
1451        if not vizrank:
1452            vizrank = VizRank(visualizationMethod, graph)
1453        self.VizRank = vizrank
1454        self.name = self.VizRank.learnerName
1455
1456
1457    def __call__(self, examples, weightID = 0):
1458        return VizRankClassifier(self.VizRank, examples)
1459
1460
1461
1462#test widget
1463if __name__=="__main__":
1464    data = orange.ExampleTable(r"E:\Development\Python23\Lib\site-packages\Orange\Datasets\UCI\wine.tab")
1465    #data = orange.ExampleTable(r"E:\Development\Python23\Lib\site-packages\Orange\Datasets\microarray\cancer\leukemia.tab")
1466    """
1467    vizrank = VizRank(LINEAR_PROJECTION)
1468    vizrank.setData(data)
1469    vizrank.optimizationType = EXACT_NUMBER_OF_ATTRS    # MAXIMUM_NUMBER_OF_ATTRS,  EXACT_NUMBER_OF_ATTRS
1470    vizrank.attributeCount = 10
1471    vizrank.attrCont = CONT_MEAS_S2NMIX
1472    vizrank.projOptimizationMethod = 0
1473    vizrank.useExampleWeighting = 0
1474    vizrank.attrSubsetSelection = GAMMA_SINGLE
1475    vizrank.timeLimit = 1
1476    vizrank.evaluateProjections()
1477    """
1478    data = orange.ExampleTable(r"E:\Development\Python23\Lib\site-packages\Orange\datasets\Imatch\irski podatki\merged\merged-all.tab")
1479    vizrank = VizRank(RADVIZ)
1480    vizrank.setData(data)
1481    vizrank.attributeCount = 6
1482    vizrank.optimizationType = MAXIMUM_NUMBER_OF_ATTRS    # MAXIMUM_NUMBER_OF_ATTRS,  EXACT_NUMBER_OF_ATTRS
1483    #vizrank.attrSubsetSelection = GAMMA_SINGLE
1484    vizrank.attrSubsetSelection = DETERMINISTIC_ALL
1485
1486    #vizrank.attrCont = CONT_MEAS_S2N
1487    vizrank.attrCont = CONT_MEAS_S2NMIX
1488
1489    #vizrank.storeEachPermutation = 1
1490    #vizrank.load(r"E:\Development\Python23\Lib\site-packages\Orange\Datasets\microarray\cancer\leukemia - Radviz - test.proj")
1491    #vizrank.computeVizRanksAccuracy()
1492    vizrank.timeLimit = 10
1493    vizrank.evaluateProjections()
1494    #vizrank.findArguments(data[0])
1495
Note: See TracBrowser for help on using the repository browser.